JelfsMaterialsGroup / stko

A collection of molecular optimisers and property calculators for use with stk.
https://stko-docs.readthedocs.io/en/latest/
MIT License
21 stars 8 forks source link

Rewrite atom typer for UFF in Gulp. #59

Open andrewtarzia opened 3 years ago

andrewtarzia commented 3 years ago

Found issues with the handling of conjugation in stko.GulpUFF.assign_FF().

The method was written based on rdkit code, but the rdkit code also uses the bond order to set bond types, which we do not do here, leading to conjugated atoms having resonant bonds in Gulp/UFF.

andrewtarzia commented 3 years ago

I have begun working on this in uff_typer_tests branch on my fork, where I have added test molecules and made those tests pass and match the expected result.

However, a full rewrite is ideal.

andrewtarzia commented 3 years ago

Does this gulp wrapper have a better solution to atom typing? https://github.com/learningmatter-mit/gulpy

stevenkbennett commented 3 years ago

It sounds like it could be a good option. Would you want to include it as a dependency?

andrewtarzia commented 2 years ago

Prepared a test script with some molecules optimised with stko.UFF() [uses rdkit engine] and stko.GulpUFFOptimizer() that currently plots parities of bonds, angles and dihedrals.

The effect of the optimisation engine could be at play.

Bond distances are reasonable, save a few deviations (perhaps incorrect aromaticity assignment). dists.pdf

Angles are also reasonable - although I would expect better agreement. angles.pdf

Dihedrals show a horrible scatter - unsure what to make of this. dihedrals.pdf

Need to go further into this in the future.

import stk
import stko
import matplotlib.pyplot as plt
import numpy as np 
from scipy.spatial.distance import euclidean
import rdkit.Chem.AllChem as rdkit

def molecules():

    smiles = (
        'C#C',
        'C1=CC=CC=C1',
        'C1=CC=C2C=CC=CC2=C1',
        'c1cncc(c1)C#Cc2cc(cc(c2)c3ccc(cc3)c4cc(cc(c4)C#Cc5cccnc5)C#Cc6cccnc6)C#Cc7cccnc7',
        'C1=CC(=CC(=C1)C#CC2=CN=CC=C2)C#CC3=CN=CC=C3',
        'CS[CH]=[C]1C[CH2]C1',
        'CC1CCCCC(C1)c2cc(C)cc(O)c2',
        'CN1C=NC2=C1C(=O)N(C(=O)N2C)C',
        'CCC(C)C(C(=O)O)N',
        'CC(C)CC(C(=O)O)N',
        'CC(C)C(C(=O)O)N',
        'CC(C(C(=O)O)N)O',
        'CSCCC(C(=O)O)N',
        'C1=CC=C(C=C1)CC(C(=O)O)N',
        'C1=CC=C2C(=C1)C(=CN2)CC(C(=O)O)N',
        'C(CCN)CC(C(=O)O)N',

    )

    for smi in smiles:
        yield  stk.BuildingBlock(smi)

def parity(uff, gulp, filename, lim):

    fig, ax = plt.subplots(figsize=(5, 5))
    ax.scatter(uff, gulp, c='gold', edgecolors='k',
               alpha=1.0, s=40)
    ax.plot(np.linspace(min(lim) - 1, max(lim) + 1, 2),
            np.linspace(min(lim) - 1, max(lim) + 1, 2),
            c='k', alpha=0.4)
    # Set number of ticks for x-axis
    ax.tick_params(axis='both', which='major', labelsize=16)
    ax.set_xlabel('uff', fontsize=16)
    ax.set_ylabel('gulp', fontsize=16)
    ax.set_xlim(lim)
    ax.set_ylim(lim)
    fig.tight_layout()
    fig.savefig(
        f'{filename}.pdf',
        dpi=720,
        bbox_inches='tight'
    )

def get_bond_length(atom1_id, atom2_id, position_matrix):
    distance = euclidean(
        u=position_matrix[atom1_id],
        v=position_matrix[atom2_id]
    )

    return float(distance)

def get_dihedral(pt1, pt2, pt3, pt4):
    p0 = np.asarray(pt1)
    p1 = np.asarray(pt2)
    p2 = np.asarray(pt3)
    p3 = np.asarray(pt4)

    b0 = -1.0 * (p1 - p0)
    b1 = p2 - p1
    b2 = p3 - p2

    # normalize b1 so that it does not influence magnitude of vector
    # rejections that come next
    b1 /= np.linalg.norm(b1)

    # vector rejections
    # v = projection of b0 onto plane perpendicular to b1
    #   = b0 minus component that aligns with b1
    # w = projection of b2 onto plane perpendicular to b1
    #   = b2 minus component that aligns with b1
    v = b0 - np.dot(b0, b1) * b1
    w = b2 - np.dot(b2, b1) * b1

    # angle between v and w in a plane is the torsion angle
    # v and w may not be normalized but that's fine since tan is y/x
    x = np.dot(v, w)
    y = np.dot(np.cross(b1, v), w)
    return np.degrees(np.arctan2(y, x))

def unit_vector(vector):
    return vector / np.linalg.norm(vector)

def angle_between(v1, v2, normal=None):
    v1_u = unit_vector(v1)
    v2_u = unit_vector(v2)
    angle = np.arccos(np.clip(np.dot(v1_u, v2_u), -1.0, 1.0))

    if normal is not None:
        # Get normal vector and cross product to determine sign.
        cross = np.cross(v1_u, v2_u)
        if np.dot(normal, cross) < 0:
            angle = -angle
    return angle

def main():

    u_dists = []
    u_angles = []
    u_dihedrals = []
    g_dists = []
    g_angles = []
    g_dihedrals = []
    for i, molecule in enumerate(molecules()):
        print(f'doing {molecule}')
        # Run optimisations.
        uff = stko.UFF()
        u = uff.optimize(molecule)
        u.write(f'uff_{i}.mol')
        gulp = stko.GulpUFFOptimizer(
            gulp_path='/home/atarzia/software/gulp-5.1/Src/gulp/gulp',
            output_dir=f'gulp_{i}',
            conjugate_gradient=False,
        )
        # Assign the force field.
        gulp.assign_FF(molecule)
        g = gulp.optimize(molecule)
        g.write(f'gulp_{i}.mol')

        for bond in molecule.get_bonds():
            u_dists.append(get_bond_length(bond.get_atom1().get_id(), bond.get_atom2().get_id(), u.get_position_matrix()))
            g_dists.append(get_bond_length(bond.get_atom1().get_id(), bond.get_atom2().get_id(), g.get_position_matrix()))

        paths = rdkit.FindAllPathsOfLengthN(
            mol=molecule.to_rdkit_mol(),
            length=3,
            useBonds=False,
            useHs=True,
        )
        for atom_ids in paths:
            pos_mat = u.get_position_matrix()
            vector1 = pos_mat[atom_ids[1]]-pos_mat[atom_ids[0]]
            vector2 = pos_mat[atom_ids[1]]-pos_mat[atom_ids[2]]
            u_angles.append(np.degrees(
                angle_between(vector1, vector2)
            ))

            pos_mat = g.get_position_matrix()
            vector1 = pos_mat[atom_ids[1]]-pos_mat[atom_ids[0]]
            vector2 = pos_mat[atom_ids[1]]-pos_mat[atom_ids[2]]
            g_angles.append(np.degrees(
                angle_between(vector1, vector2)
            ))

        paths = rdkit.FindAllPathsOfLengthN(
            mol=molecule.to_rdkit_mol(),
            length=4,
            useBonds=False,
            useHs=True,
        )
        for atom_ids in paths:
            u_dihedrals.append(abs(get_dihedral(
                pt1=tuple(
                    u.get_atomic_positions(atom_ids[0])
                )[0],
                pt2=tuple(
                    u.get_atomic_positions(atom_ids[1])
                )[0],
                pt3=tuple(
                    u.get_atomic_positions(atom_ids[2])
                )[0],
                pt4=tuple(
                    u.get_atomic_positions(atom_ids[3])
                )[0],                                
            )))
            g_dihedrals.append(abs(get_dihedral(
                pt1=tuple(
                    g.get_atomic_positions(atom_ids[0])
                )[0],
                pt2=tuple(
                    g.get_atomic_positions(atom_ids[1])
                )[0],
                pt3=tuple(
                    g.get_atomic_positions(atom_ids[2])
                )[0],
                pt4=tuple(
                    g.get_atomic_positions(atom_ids[3])
                )[0],                                
            )))

    parity(u_dists, g_dists, 'dists', (min([min(u_dists), min(g_dists)]), max([max(u_dists), max(g_dists)])))
    parity(u_angles, g_angles, 'angles', (min([min(u_angles), min(g_angles)]), max([max(u_angles), max(g_angles)])))
    parity(u_dihedrals, g_dihedrals, 'dihedrals', (min([min(u_dihedrals), min(g_dihedrals)]), max([max(u_dihedrals), max(g_dihedrals)])))

if __name__ == "__main__":
    main()
andrewtarzia commented 4 months ago

Previous commits of this file contain test cases that failed due to this issue: https://github.com/JelfsMaterialsGroup/stko/blob/0a7c74ab2ce2bbef2469478474363dc70fc3ef30/tests/optimizers/gulp/test_uff_assign_ff.py

Removed as part of #175