I tried to use CDPL Chem to read a RDKit output SDF file. However, I encountered an error:
mol = CDPL_readSDF(temp_file)
File "~/csv2sdf.py", line 462, in CDPL_readSDF
reader = CDPLChem.MoleculeReader(input_sd_file)
Boost.Python.ArgumentError: Python argument types in
MoleculeReader.__init__(MoleculeReader, str)
did not match C++ signature:
__init__(_object* self, std::istream {lvalue} is, CDPL::Base::DataFormat fmt)
__init__(_object* self, std::istream {lvalue} is, std::string fmt)
__init__(_object* self, std::string file_name, CDPL::Base::DataFormat fmt, std::_Ios_Openmode mode=CDPL.Base._base.OpenMode(12))
__init__(_object* self, std::string file_name, std::string fmt, std::_Ios_Openmode mode=CDPL.Base._base.OpenMode(12))
__init__(_object* self, std::string file_name, std::_Ios_Openmode mode=CDPL.Base._base.OpenMode(12))
My import:
import CDPL.Chem as CDPLChem
import CDPL.ConfGen as CCfGen
from rdkit import Chem
from rdkit.Chem import AllChem
My output RDKit SDF file:
def prepare_mol_for_SDPL(smiles:str, columns: list, row: pd.Series, temp_file: str):
"""
Adds hydrogens to the provided SMILES string.
"""
mol = Chem.MolFromSmiles(smiles)
mol = Chem.AddHs(mol)
AllChem.Compute2DCoords(mol)
for col in columns:
mol.SetProp(col, str(row[col]))
mol.SetProp("_InputInChIKey", str(Chem.inchi.InchiToInchiKey(Chem.inchi.MolToInchi(Chem.MolFromSmiles(smiles)))))
mol.SetProp("_SMILES", str(Chem.MolToSmiles(mol)))
mol.SetProp("_InChIKey", str(Chem.inchi.InchiToInchiKey(Chem.inchi.MolToInchi(mol))))
writer = Chem.SDWriter(temp_file)
writer.write(mol)
writer.close()
My read function:
def CDPL_readSDF(input_sd_file: str) -> None:
"""
Retrieves the structure data of each molecule in the provided SD file and outputs it to the console.
Parameters:
- input_sd_file (str): Path to the input SD file.
Refs:
- https://cdpkit.org/cdpl_python_tutorial/cdpl_python_tutorial.html#retrieving-structure-data-from-mdl-sd-files
"""
# Create reader for MDL SD-files
# reader = CDPLChem.FileSDFMoleculeReader(input_sd_file)
reader = CDPLChem.MoleculeReader(input_sd_file)
# create an instance of the default implementation of the Chem.Molecule interface
mol = CDPLChem.BasicMolecule()
# Iterate over each molecule in the file and retrieve structure data
try:
while reader.read(mol):
return mol
# try:
# if not CDPLChem.hasStructureData(mol):
# raise Exception('Error: no structure data available for molecule', Chem.getName(mol))
# return mol
# except Exception as e:
# raise Exception('Error: processing of molecule failed: ' + str(e))
except Exception as e: # handle exception raised in case of severe read errors
raise Exception('Error: reading molecule failed: ' + str(e))
My main function:
for ind in df.index:
smiles = df.loc[ind, args.smi_column]
mol = None
try:
try:
os.remove(temp_file)
except:
print()
temp_file = output_file.replace('.sdf', '_temp.sdf')
prepare_mol_for_SDPL(smiles, df.columns, df.loc[ind], temp_file)
mol = CDPL_readSDF(temp_file)
os.remove(temp_file)
I tried to use CDPL Chem to read a RDKit output SDF file. However, I encountered an error:
My import:
My output RDKit SDF file:
My read function:
My main function:
p/s: I took a look at some refs: They both works:
Please help me to solve it. Thank you very much.