Open proteneer opened 4 years ago
The current SDF files have about ~40 molecules in SDF format that are non-neutral. Here's a script that regenerates correct ones.
import csv import os from rdkit import Chem from rdkit.Chem import AllChem def is_neutral(mol): net_charge = 0 for a in mol.GetAtoms(): net_charge += a.GetFormalCharge() return net_charge == 0 mols = [] mmff_fail_count = 0 with open('database.txt', newline='') as csvfile: spamreader = csv.reader(csvfile, delimiter=';', quotechar='|') for line, row in enumerate(spamreader): if line > 2: name = row[0] smiles = row[1] mol = Chem.MolFromSmiles(smiles) mol = Chem.AddHs(mol) print(smiles) res = AllChem.EmbedMolecule(mol) assert res == 0 res = AllChem.MMFFOptimizeMolecule(mol) if res != 0: mmff_fail_count += 1 exp_dG = float(row[3]) exp_dG_err = float(row[4]) mol.SetProp('_Name', name) mol.SetProp('dG', str(exp_dG)) mol.SetProp('dG_err', str(exp_dG_err)) assert is_neutral(mol) mols.append(mol) print("mm_fail", mmff_fail_count) w = Chem.SDWriter('freesolv.sdf') for m in mols: w.write(m) w.flush() print("wrote", len(mols), "mols")
The current SDF files have about ~40 molecules in SDF format that are non-neutral. Here's a script that regenerates correct ones.