Closed lipelopesoliveira closed 1 year ago
O código SMILES
para cada bloco de construção deverá ser salvo no formato Chemaxon Extended SMILES
(XSMILES, rdkit). Esse formato permite que as imagens dos blocos de construção sejam geradas com os labels Q
, Rx
e X
:
Esse formato o bloco de construção acima possui o código:
(*)C1=C(*)C(*)=C(*)C(*)=C1(*) |$Q;;;R1;;R2;;Q;;R1;;R2$|
Será necessário desenvolver uma forma de automatizar o processo de geração desses códigos, uma vez que fazer isso manualmente será extremamente trabalhoso.
Tendo o código smiles da molécula com os átomos especiais, o código abaixo gera o xsmiles
com os labels:
smiles_string = '[Q]C1=C([Q])C([R1])=C([Q])C([Q])=C1[R1]'
def smiles_to_xsmiles(smiles_string:str) -> str:
'''
Converts a SMILES string to an extended SMILES string with labels
Parameters
----------
smiles_string : str
SMILES string to be converted
Returns
-------
xsmiles : str
Extended SMILES string with labels
'''
SPECIAL_ATOMS = ['Q', 'R', 'X']
REGULAR_ATOMS = ['C', 'N', 'H', 'O']
xsmiles = ''
labels = []
for i, letter in enumerate(smiles_string):
if letter in SPECIAL_ATOMS:
xsmiles += '*'
labels += [letter]
elif letter.isnumeric():
if smiles_string[i-1] == 'R':
labels[-1] = labels[-1] + letter
else:
xsmiles += letter
elif letter in REGULAR_ATOMS:
xsmiles += letter
labels += ['']
else:
xsmiles += letter
return xsmiles + ' |$' + ';'.join(labels) + '$|'
> smiles_to_xsmiles(smiles_string)
'[*]C1=C([*])C([*])=C([*])C([*])=C1[*] |$Q;;;Q;;R1;;Q;;Q;;R1$|'
from rdkit import Chem
from rdkit.Chem import Draw
SMILES_LIST = ['[*]C1=C([*])C([*])=C([*])C([*])=C1[*] |$Q;;;R1;;R2;;Q;;R1;;R2$|',
'[*]C1=C([*])C([*])=C(C2=C([*])C([*])=C([*])C([*])=C2[*])C([*])=C1[*] |$Q;;;R1;;R4;;;;R4;;R1;;Q;;R2;;R3;;R3;;R2$|',
'c1c(O)cccn1',
'c1c(F)c(C)ccn1',
'c1cc(Cl)c(F)cn1']
NAMES_LIST = ['Benzene', "1-1'-biphenyl", 'c1c(O)cccn1', 'c1c(F)c(C)ccn1', 'c1cc(Cl)c(F)cn1']
mols = [Chem.MolFromSmiles(smi) for smi in SMILES_LIST]
Draw.MolsToGridImage(mols,
molsPerRow=3,
legends=NAMES_LIST,
subImgSize=(300,200),
useSVG=True)
Esse código gera uma imagem com todas as moléculas.
smiles_C2 = [{'name': 'benzene',
'smiles': '[Q]C1=C([R2])C([R1])=C([Q])C([R2])=C1[R1]',
'code': 'BENZ',
'xsmiles': '[*]C1=C([*])C([*])=C([*])C([*])=C1[*]',
'xsmiles_label': '|$Q;;;R2;;R1;;Q;;R2;;R1$|'},
{'name': 'naphthalene',
'smiles': '[Q]C1=C([R3])C([R2])=C2C(C([R2])=C([R3])C([Q])=C2[R1])=C1[R1]',
'code': 'NAPT',
'xsmiles': '[*]C1=C([*])C([*])=C2C(C([*])=C([*])C([*])=C2[*])=C1[*]',
'xsmiles_label': '|$Q;;;R3;;R2;;;;R2;;R3;;Q;;R1;;R1$|'},
{'name': "1,1'-biphenyl",
'smiles': '[Q]C1=C([R1])C([R3])=C(C2=C([R4])C([R2])=C([Q])C([R1])=C2[R3])C([R4])=C1[R2]',
'code': 'BPNY',
'xsmiles': '[*]C1=C([*])C([*])=C(C2=C([*])C([*])=C([*])C([*])=C2[*])C([*])=C1[*]',
'xsmiles_label': '|$Q;;;R1;;R3;;;;R4;;R2;;Q;;R1;;R3;;R4;;R2$|'},
{'name': 'anthracene',
'smiles': '[R1]C1=C2C(C([R2])=C([R4])C([Q])=C2[R3])=C([R1])C3=C([R3])C([Q])=C([R4])C([R2])=C31',
'code': 'ANTR',
'xsmiles': '[*]C1=C2C(C([*])=C([*])C([*])=C2[*])=C([*])C3=C([*])C([*])=C([*])C([*])=C31',
'xsmiles_label': '|$R1;;;;;R2;;R4;;Q;;R3;;R1;;;R3;;Q;;R4;;R2;$|'},
{'name': '1,7-dihydro-s-indacene',
'smiles': '[R1]C1=C2C(C([R2])=C([Q])C2[R3])=C([R1])C3=C1C([R2])C([Q])=C3[R3]',
'code': 'DHSI',
'xsmiles': '[*]C1=C2C(C([*])=C([*])C2[*])=C([*])C3=C1C([*])C([*])=C3[*]',
'xsmiles_label': '|$R1;;;;;R2;;Q;;R3;;R1;;;;R2;;Q;;R3$|'},
{'name': 'thieno[3,2-b]thiophene',
'smiles': '[Q]C1=C([R])C2=C(S1)C([R])=C([Q])S2',
'code': 'TTPH',
'xsmiles': '[*]C1=C([*])C2=C(S1)C([*])=C([*])S2',
'xsmiles_label': '|$Q;;;R;;;;;R;;Q;$|'},
{'name': "3,3'-bipyridine",
'smiles': '[Q]C1=C([R1])C([R3])=C(C2=C([R2])N=C([Q])C([R1])=C2[R3])C([R2])=N1',
'code': '3BPD',
'xsmiles': '[*]C1=C([*])C([*])=C(C2=C([*])N=C([*])C([*])=C2[*])C([*])=N1',
'xsmiles_label': '|$Q;;;R1;;R3;;;;R2;;;Q;;R1;;R3;;R2;$|'},
{'name': "2,2'-bithiophene",
'smiles': '[Q]C1=C([R2])C([R1])=C(C2=C([R1])C([R2])=C([Q])S2)S1',
'code': 'BTPH',
'xsmiles': '[*]C1=C([*])C([*])=C(C2=C([*])C([*])=C([*])S2)S1',
'xsmiles_label': '|$Q;;;R2;;R1;;;;R1;;R2;;Q;;$|'},
{'name': "1,1':4',1''-terphenyl",
'smiles': '[Q]C(C([R5])=C1[R6])=C([R4])C([R3])=C1C(C([R1])=C2[R2])=C([R2])C([R1])=C2C3=C([R3])C([R4])=C([Q])C([R5])=C3[R6]',
'code': 'TPNY',
'xsmiles': '[*]C(C([*])=C1[*])=C([*])C([*])=C1C(C([*])=C2[*])=C([*])C([*])=C2C3=C([*])C([*])=C([*])C([*])=C3[*]',
'xsmiles_label': '|$Q;;;R5;;R6;;R4;;R3;;;;R1;;R2;;R2;;R1;;;;R3;;R4;;Q;;R5;;R6$|'},
{'name': '1,2-diphenylethyne',
'smiles': '[Q]C(C([R4])=C1[R1])=C([R3])C([R2])=C1C#CC2=C([R1])C([R4])=C([Q])C([R3])=C2[R2]',
'code': 'DPEY',
'xsmiles': '[*]C(C([*])=C1[*])=C([*])C([*])=C1C#CC2=C([*])C([*])=C([*])C([*])=C2[*]',
'xsmiles_label': '|$Q;;;R4;;R1;;R3;;R2;;;;;;R1;;R4;;Q;;R3;;R2$|'},
{'name': "2,2'-bipyridine",
'smiles': '[Q]C1=C([R1])C([R3])=C(C2=NC([R2])=C([Q])C([R1])=C2[R3])N=C1[R2]',
'code': '2BPD',
'xsmiles': '[*]C1=C([*])C([*])=C(C2=NC([*])=C([*])C([*])=C2[*])N=C1[*]',
'xsmiles_label': '|$Q;;;R1;;R3;;;;;R2;;Q;;R1;;R3;;;R2$|'},
{'name': 'pyrene',
'smiles': '[Q]C1=C([R3])C2=C(C(C([R2])=C3[R1])=C1[R4])C(C3=C([R3])C([Q])=C4[R4])=C4C([R2])=C2[R1]',
'code': 'PYRN',
'xsmiles': '[*]C1=C([*])C2=C(C(C([*])=C3[*])=C1[*])C(C3=C([*])C([*])=C4[*])=C4C([*])=C2[*]',
'xsmiles_label': '|$Q;;;R3;;;;;R2;;R1;;R4;;;;R3;;Q;;R4;;;R2;;R1$|'},
{'name': 'pyrene-1,3,6,8(2H,7H)-tetraone',
'smiles': 'O=C([Q]C1=O)C2=C([R1])C([R2])=C3C4=C2C1=C([R2])C([R1])=C4C([Q]C3=O)=O',
'code': 'PYTO',
'xsmiles': 'O=C([*]C1=O)C2=C([*])C([*])=C3C4=C2C1=C([*])C([*])=C4C([*]C3=O)=O',
'xsmiles_label': '|$;;Q;;;;;R1;;R2;;;;;;R2;;R1;;;Q;;;$|'},
{'name': '1,4-bis(phenylethynyl)benzene',
'smiles': '[Q]C(C([R4])=C1[R1])=C([R3])C([R2])=C1C#CC(C([R6])=C2[R5])=C([R5])C([R6])=C2C#CC3=C([R1])C([R4])=C([Q])C([R3])=C3[R2]',
'code': 'BPYB',
'xsmiles': '[*]C(C([*])=C1[*])=C([*])C([*])=C1C#CC(C([*])=C2[*])=C([*])C([*])=C2C#CC3=C([*])C([*])=C([*])C([*])=C3[*]',
'xsmiles_label': '|$Q;;;R4;;R1;;R3;;R2;;;;;;R6;;R5;;R5;;R6;;;;;;R1;;R4;;Q;;R3;;R2$|'},
{'name': '(E)-1,2-diphenylethene',
'smiles': '[Q]C(C([R4])=C1[R2])=C([R1])C([R3])=C1/C=C/C2=C([R2])C([R4])=C([Q])C([R1])=C2[R3]',
'code': 'DPEL',
'xsmiles': '[*]C(C([*])=C1[*])=C([*])C([*])=C1/C=C/C2=C([*])C([*])=C([*])C([*])=C2[*]',
'xsmiles_label': '|$Q;;;R4;;R2;;R1;;R3;;;;;;R2;;R4;;Q;;R1;;R3$|'},
{'name': '(E)-1,2-diphenyldiazene',
'smiles': '[Q]C(C([R4])=C1[R2])=C([R1])C([R3])=C1/N=N/C2=C([R2])C([R4])=C([Q])C([R1])=C2[R3]',
'code': 'DPDA',
'xsmiles': '[*]C(C([*])=C1[*])=C([*])C([*])=C1/N=N/C2=C([*])C([*])=C([*])C([*])=C2[*]',
'xsmiles_label': '|$Q;;;R4;;R2;;R1;;R3;;;;;;R2;;R4;;Q;;R1;;R3$|'},
{'name': "benzo[1,2-b:4,5-b']dithiophene",
'smiles': '[Q]C(S1)=C([R2])C2=C1C([R1])=C(C([R2])=C([Q])S3)C3=C2[R1]',
'code': 'BDTP',
'xsmiles': '[*]C(S1)=C([*])C2=C1C([*])=C(C([*])=C([*])S3)C3=C2[*]',
'xsmiles_label': '|$Q;;;;R2;;;;R1;;;R2;;Q;;;;R1$|'},
{'name': "benzo[1,2-d:4,5-d']bis(thiazole)",
'smiles': '[Q]C(S1)=NC2=C1C([R1])=C(N=C([Q])S3)C3=C2[R1]',
'code': 'BBTZ',
'xsmiles': '[*]C(S1)=NC2=C1C([*])=C(N=C([*])S3)C3=C2[*]',
'xsmiles_label': '|$Q;;;;;;;R1;;;;Q;;;;R1$|'},
{'name': '1,4-diphenylbuta-1,3-diyne',
'smiles': '[Q]C(C([R4])=C1[R1])=C([R2])C([R3])=C1C#CC#CC2=C([R1])C([R3])=C([Q])C([R2])=C2[R3]',
'code': 'DPBY',
'xsmiles': '[*]C(C([*])=C1[*])=C([*])C([*])=C1C#CC#CC2=C([*])C([*])=C([*])C([*])=C2[*]',
'xsmiles_label': '|$Q;;;R4;;R1;;R2;;R3;;;;;;;;R1;;R3;;Q;;R2;;R3$|'},
{'name': 's-indacene-1,3,5,7(2H,6H)-tetraone',
'smiles': 'O=C([Q]C1=O)C2=C1C([R])=C(C([Q]C3=O)=O)C3=C2[R]',
'code': 'INTO',
'xsmiles': 'O=C([*]C1=O)C2=C1C([*])=C(C([*]C3=O)=O)C3=C2[*]',
'xsmiles_label': '|$;;Q;;;;;;R;;;Q;;;;;;R$|'},
{'name': "benzo[1,2-b:4,5-b']difuran",
'smiles': '[Q]C(O1)=C([R2])C2=C1C([R1])=C(C([R2])=C([Q])O3)C3=C2[R1]',
'code': 'BDFN',
'xsmiles': '[*]C(O1)=C([*])C2=C1C([*])=C(C([*])=C([*])O3)C3=C2[*]',
'xsmiles_label': '|$Q;;;;R2;;;;R1;;;R2;;Q;;;;R1$|'},
{'name': '1,5-dihydropyrrolo[2,3-f]indole',
'smiles': '[Q]C(N1[H])=C([R2])C2=C1C([R1])=C(C([R2])=C([Q])N3[H])C3=C2[R1]',
'code': 'DHPI',
'xsmiles': '[*]C(N1[H])=C([*])C2=C1C([*])=C(C([*])=C([*])N3[H])C3=C2[*]',
'xsmiles_label': '|$Q;;;;;R2;;;;R1;;;R2;;Q;;;;;R1$|'},
{'name': '1,7-dihydro-s-indacene',
'smiles': '[R1]C1=C2C(C([R2])=C([Q])C2[R3])=C([R1])C3=C1C([R2])C([Q])=C3[R3]',
'code': 'DHSI',
'xsmiles': '[*]C1=C2C(C([*])=C([*])C2[*])=C([*])C3=C1C([*])C([*])=C3[*]',
'xsmiles_label': '|$R1;;;;;R2;;Q;;R3;;R1;;;;R2;;Q;;R3$|'},
{'name': 'hydrazine',
'smiles': '[Q][Q]',
'code': 'HDZN',
'xsmiles': '[*][*]',
'xsmiles_label': '|$Q;Q$|'},
{'name': "naphtho[1,2-b:5,6-b']dithiophene",
'smiles': '[Q]C(S1)=C([R1])C2=C1C(C([R3])=C([R2])C3=C4SC([Q])=C3[R1])=C4C([R3])=C2[R2]',
'code': 'NDTP',
'xsmiles': '[*]C(S1)=C([*])C2=C1C(C([*])=C([*])C3=C4SC([*])=C3[*])=C4C([*])=C2[*]',
'xsmiles_label': '|$Q;;;;R1;;;;;R3;;R2;;;;;Q;;R1;;;R3;;R2$|'},
{'name': "3a,7a-dihydroanthra[2,1,9-def:6,5,10-d'e'f']diisochromene-1,3,8,10-tetraone",
'smiles': 'O=C([Q]C1=O)C2=C(C1C([R1])=C3[R3])C4=C3C(C([R4])=C([R2])C5C([Q]C6=O)=O)=C(C5=C6C([R1])=C7[R3])C7=C4C([R4])=C2[R2]',
'code': 'PTCD',
'xsmiles': 'O=C([*]C1=O)C2=C(C1C([*])=C3[*])C4=C3C(C([*])=C([*])C5C([*]C6=O)=O)=C(C5=C6C([*])=C7[*])C7=C4C([*])=C2[*]',
'xsmiles_label': '|$;;Q;;;;;;;R1;;R3;;;;;R4;;R2;;;Q;;;;;;;;R1;;R3;;;;R4;;R2$|'},
{'name': "(E)-4,4'-dimethyl-[6,6'-bithieno[3,2-b]pyrrolylidene]-5,5'(4H,4'H)-dione",
'smiles': 'O=C1N(C)C2=C(SC([Q])=C2[R1])/C1=C3C(SC([Q])=C4[R1])=C4N(C)C/3=O',
'code': 'TIDA',
'xsmiles': 'O=C1N(C)C2=C(SC([*])=C2[*])/C1=C3C(SC([*])=C4[*])=C4N(C)C/3=O',
'xsmiles_label': '|$;;;;;;;;Q;;R1;;;;;;Q;;R1;;;;;$|'},
{'name': "indeno[2,1-a]indene",
'code': 'INDE',
'smiles': '[R1]C(C([Q])=C1[R2])=C([R3])C2=C1C([R4])=C3C2=C([R4])C4=C([R2])C([Q])=C([R1])C([R3])=C43',
'xsmiles': '[*]C(C([*])=C1[*])=C([*])C2=C1C([*])=C3C2=C([*])C4=C([*])C([*])=C([*])C([*])=C43',
'xsmiles_label': '|$R1;;;Q;;R2;;R3;;;;R4;;;;R4;;;R2;;Q;;R1;;R3;$|'},
{'name': "indeno[1,2-b]fluorene",
'code': 'INFL',
'smiles': '[Q]C1=C([R2])C2=C(C([R3])=C1[R1])C3=C([R5])C4=C([R4])C5=C(C([R3])=C([R1])C([Q])=C5[R2])C4=C([R5])C3=C2[R4]',
'xsmiles': '[*]C1=C([*])C2=C(C([*])=C1[*])C3=C([*])C4=C([*])C5=C(C([*])=C([*])C([*])=C5[*])C4=C([*])C3=C2[*]',
'xsmiles_label': '|$Q;;;R2;;;;R3;;R1;;;R5;;;R4;;;;R3;;R1;;Q;;R2;;;R5;;;R4$|'}
]
O código:
from rdkit import Chem
from rdkit.Chem import Draw
NAME = [i['name'] for i in BB_C2]
SMILES = [i['xsmiles'] + ' ' + i['xsmiles_label'] for i in BB_C2]
CODE = [i['code'] for i in BB_C2]
mols = [Chem.MolFromSmiles(smi) for smi in SMILES]
Draw.MolsToGridImage(mols,
molsPerRow=4,
legends=CODE,
subImgSize=(500,250),
useSVG=True)
Gera a imagem:
Blocos de construção C3:
smiles_C3 = [
{'name': 'benzene',
'code': 'BENZ',
'smiles': '[Q]C1=C([R1])C([Q])=C([R1])C([Q])=C1[R1]',
'xsmiles': '[*]C1=C([*])C([*])=C([*])C([*])=C1[*]',
'xsmiles_label': '|$Q;;;R1;;Q;;R1;;Q;;R1$|'},
{'name': "5'-phenyl-1,1':3',1''-terphenyl",
'code': 'TPBZ',
'smiles': '[Q]C(C([R4])=C1[R5])=C([R3])C([R2])=C1C2=C([R1])C(C3=C([R2])C([R3])=C([Q])C([R4])=C3[R5])=C([R1])C(C4=C([R2])C([R3])=C([Q])C([R4])=C4[R5])=C2[R1]',
'xsmiles': '[*]C(C([*])=C1[*])=C([*])C([*])=C1C2=C([*])C(C3=C([*])C([*])=C([*])C([*])=C3[*])=C([*])C(C4=C([*])C([*])=C([*])C([*])=C4[*])=C2[*]',
'xsmiles_label': '|$Q;;;R4;;R5;;R3;;R2;;;;R1;;;;R2;;R3;;Q;;R4;;R5;;R1;;;;R2;;R3;;Q;;R4;;R5;;R1$|'},
{'name': 'triphenylamine',
'code': 'TPAM',
'smiles': '[Q]C(C([R2])=C1)=C([R1])C=C1N(C2=CC([R1])=C([Q])C([R2])=C2)C3=CC([R1])=C([Q])C([R2])=C3',
'xsmiles': '[*]C(C([*])=C1)=C([*])C=C1N(C2=CC([*])=C([*])C([*])=C2)C3=CC([*])=C([*])C([*])=C3',
'xsmiles_label': '|$Q;;;R2;;;R1;;;;;;;R1;;Q;;R2;;;;;R1;;Q;;R2;$|'},
{'name': "10,15-dihydro-5H-diindolo[3,2-a:3',2'-c]carbazole",
'code': 'DICZ',
'smiles': '[Q]C1=C([R1])C2=C(C3=C(N2)C(C4=C([R3])C([R2])=C([Q])C([R1])=C4N5)=C5C6=C3NC7=C([R1])C([Q])=C([R2])C([R3])=C76)C([R3])=C1[R2]',
'xsmiles': '[*]C1=C([*])C2=C(C3=C(N2)C(C4=C([*])C([*])=C([*])C([*])=C4N5)=C5C6=C3NC7=C([*])C([*])=C([*])C([*])=C76)C([*])=C1[*]',
'xsmiles_label': '|$Q;;;R1;;;;;;;;;R3;;R2;;Q;;R1;;;;;;;;;R1;;Q;;R2;;R3;;;R3;;R2$|'},
{'name': 'triphenylene',
'code': 'TPNY',
'smiles': '[Q]C1=C([R1])C2=C(C([R2])=C1)C3=C(C([R2])=CC([Q])=C3[R1])C4=C2C([R2])=CC([Q])=C4[R1]',
'xsmiles': '[*]C1=C([*])C2=C(C([*])=C1)C3=C(C([*])=CC([*])=C3[*])C4=C2C([*])=CC([*])=C4[*]',
'xsmiles_label': '|$Q;;;R1;;;;R2;;;;;R2;;;Q;;R1;;;;R2;;;Q;;R1$|'},
{'name': '2,4,6-triphenoxy-1,3,5-triazine',
'code': 'TPOB',
'smiles': '[Q]C(C([R2])=C1)=C([R1])C=C1OC2=NC(OC3=CC([R2])=C([Q])C([R1])=C3)=NC(OC4=CC([R2])=C([Q])C([R1])=C4)=N2',
'xsmiles': '[*]C(C([*])=C1)=C([*])C=C1OC2=NC(OC3=CC([*])=C([*])C([*])=C3)=NC(OC4=CC([*])=C([*])C([*])=C4)=N2',
'xsmiles_label': '|$Q;;;R2;;;R1;;;;;;;;;;;R2;;Q;;R1;;;;;;;;R2;;Q;;R1;;$|'},
{'name': '1,3,5-triphenoxybenzene',
'code': 'TPTA',
'smiles': '[Q]C(C([R2])=C1)=C([R1])C=C1OC2=C([R3])C(OC3=CC([R2])=C([Q])C([R1])=C3)=C([R3])C(OC4=CC([R2])=C([Q])C([R1])=C4)=C2[R3]',
'xsmiles': '[*]C(C([*])=C1)=C([*])C=C1OC2=C([*])C(OC3=CC([*])=C([*])C([*])=C3)=C([*])C(OC4=CC([*])=C([*])C([*])=C4)=C2[*]',
'xsmiles_label': '|$Q;;;R2;;;R1;;;;;;R3;;;;;;R2;;Q;;R1;;;R3;;;;;;R2;;Q;;R1;;;R3$|'},
{'name': '2,4,6-triphenyl-1,3,5-triazine',
'code': 'TPTZ',
'smiles': '[Q]C(C([R3])=C1[R4])=C([R2])C([R1])=C1C2=NC(C3=C([R1])C([R2])=C([Q])C([R3])=C3[R4])=NC(C4=C([R1])C([R2])=C([Q])C([R3])=C4[R4])=N2',
'xsmiles': '[*]C(C([*])=C1[*])=C([*])C([*])=C1C2=NC(C3=C([*])C([*])=C([*])C([*])=C3[*])=NC(C4=C([*])C([*])=C([*])C([*])=C4[*])=N2',
'xsmiles_label': '|$Q;;;R3;;R4;;R2;;R1;;;;;;;R1;;R2;;Q;;R3;;R4;;;;;R1;;R2;;Q;;R3;;R4;$|'},
{'name': 'None',
'code': 'DBA1',
'smiles': '[Q]C1=C([R3])C(C#CC2=C3C([R3])=C([Q])C([R2])=C2[R1])=C(C#CC(C([R3])=C([Q])C([R2])=C4[R1])=C4C#C3)C([R1])=C1[R2]',
'xsmiles': '[*]C1=C([*])C(C#CC2=C3C([*])=C([*])C([*])=C2[*])=C(C#CC(C([*])=C([*])C([*])=C4[*])=C4C#C3)C([*])=C1[*]',
'xsmiles_label': '|$Q;;;R3;;;;;;;R3;;Q;;R2;;R1;;;;;;R3;;Q;;R2;;R1;;;;;R1;;R2$|'},
{'name': 'None',
'code': 'DBA2',
'smiles': '[Q]C1=C([R3])C(C#CC#CC2=C3C([R3])=C([Q])C([R2])=C2[R1])=C(C#CC#CC4=C([R3])C([Q])=C([R2])C([R1])=C4C#CC#C3)C([R1])=C1[R2]',
'xsmiles': '[*]C1=C([*])C(C#CC#CC2=C3C([*])=C([*])C([*])=C2[*])=C(C#CC#CC4=C([*])C([*])=C([*])C([*])=C4C#CC#C3)C([*])=C1[*]',
'xsmiles_label': '|$Q;;;R3;;;;;;;;;R3;;Q;;R2;;R1;;;;;;;;R3;;Q;;R2;;R1;;;;;;;R1;;R2$|'},
{'name': '13,14-dihydro-1,2,3(3,6)-triphenanthrenacyclopropaphane',
'code': 'STAR',
'smiles': '[Q]C1=C([R1])C2=C(C3=C1C([R2])=C([R3])C(C4=C([R5])C(C(C([R6])C(C5=C([R5])C(C(C([R6])=C6C([R3])=C7[R2])=C7C([Q])=C8[R1])=C8C([R5])=C5[R4])C([R3])=C9[R2])=C9C([Q])=C%10[R1])=C%10C([R5])=C4[R4])=C3[R6])C([R5])=C6C([R4])=C2[R5]',
'xsmiles': '[*]C1=C([*])C2=C(C3=C1C([*])=C([*])C(C4=C([*])C(C(C([*])C(C5=C([*])C(C(C([*])=C6C([*])=C7[*])=C7C([*])=C8[*])=C8C([*])=C5[*])C([*])=C9[*])=C9C([*])=C%10[*])=C%10C([*])=C4[*])=C3[*])C([*])=C6C([*])=C2[*]',
'xsmiles_label': '|$Q;;;R1;;;;;;R2;;R3;;;;R5;;;;R6;;;;R5;;;;R6;;;R3;;R2;;;Q;;R1;;;R5;;R4;;R3;;R2;;;Q;;R1;;;R5;;R4;;R6;;R5;;;R4;;R5$|'},
{'name': 'None',
'code': 'STAR1',
'smiles': '[Q]C1=C([R1])C2=C(C3=C1C([R2])=C([R3])C(C#CC4=C([R5])C(C(C([R6])C(C#C5)C([R3])=C6[R2])=C6C([Q])=C7[R1])=C7C([R5])=C4[R4])=C3[R6])C([R5])=C(C#CC(C([R3])=C8[R2])=C([R5])C9=C8C([Q])=C([R1])C%10=C9C([R5])=C5C([R4])=C%10[R5])C([R4])=C2[R5]',
'xsmiles': '[*]C1=C([*])C2=C(C3=C1C([*])=C([*])C(C#CC4=C([*])C(C(C([*])C(C#C5)C([*])=C6[*])=C6C([*])=C7[*])=C7C([*])=C4[*])=C3[*])C([*])=C(C#CC(C([*])=C8[*])=C([*])C9=C8C([*])=C([*])C%10=C9C([*])=C5C([*])=C%10[*])C([*])=C2[*]',
'xsmiles_label': '|$Q;;;R1;;;;;;R2;;R3;;;;;;R5;;;;R6;;;;;R3;;R2;;;Q;;R1;;;R5;;R4;;R6;;R5;;;;;;R3;;R2;;R5;;;;Q;;R1;;;;R5;;;R4;;R5;;R4;;R5$|'},
{'name': "benzo[1,2-b:3,4-b':5,6-b'']trithiophene",
'code': 'BTTP',
'smiles': '[Q]C1=C([R1])C2=C(S1)C(C([R1])=C([Q])S3)=C3C4=C2SC([Q])=C4[R1]',
'xsmiles': '[*]C1=C([*])C2=C(S1)C(C([*])=C([*])S3)=C3C4=C2SC([*])=C4[*]',
'xsmiles_label': '|$Q;;;R1;;;;;;R1;;Q;;;;;;;Q;;R1$|'},
{'name': "5''-([1,1'-biphenyl]-4-yl)-1,1':4',1'':3'',1''':4''',1''''-quinquephenyl",
'code': 'TBBZ',
'smiles': '[Q]C(C([R2])=C1[R4])=C([R1])C([R3])=C1C(C([R6])=C2[R8])=C([R5])C([R7])=C2C3=C([R9])C(C4=C([R7])C([R5])=C(C5=C([R3])C([R1])=C([Q])C([R2])=C5[R4])C([R6])=C4[R8])=C([R9])C(C6=C([R7])C([R5])=C(C7=C([R3])C([R1])=C([Q])C([R2])=C7[R4])C([R6])=C6[R8])=C3[R9]',
'xsmiles': '[*]C(C([*])=C1[*])=C([*])C([*])=C1C(C([*])=C2[*])=C([*])C([*])=C2C3=C([*])C(C4=C([*])C([*])=C(C5=C([*])C([*])=C([*])C([*])=C5[*])C([*])=C4[*])=C([*])C(C6=C([*])C([*])=C(C7=C([*])C([*])=C([*])C([*])=C7[*])C([*])=C6[*])=C3[*]',
'xsmiles_label': '|$Q;;;R2;;R4;;R1;;R3;;;;R6;;R8;;R5;;R7;;;;R9;;;;R7;;R5;;;;R3;;R1;;Q;;R2;;R4;;R6;;R8;;R9;;;;R7;;R5;;;;R3;;R1;;Q;;R2;;R4;;R6;;R8;;R9$|'},
{'name': "1,3,5-triazine",
'code': 'TRZN',
'smiles': '[Q]C1=NC([Q])=NC([Q])=N1',
'xsmiles': '[*]C1=NC([*])=NC([*])=N1',
'xsmiles_label': '|$Q;;;;Q;;;Q;$|'}
]
Grupos R:
smiles_R = [
{'name': 'hydrogen',
'code': 'H',
'smiles': '[R][H]',
'xsmiles': '[*][H]',
'xsmiles_label': '|$R;$|'},
{'name': 'hydroxyl',
'code': 'OH',
'smiles': '[R]O',
'xsmiles': '[*]O',
'xsmiles_label': '|$R;$|'},
{'name': 'methyl',
'code': 'CH3',
'smiles': '[R]C',
'xsmiles': '[*]C',
'xsmiles_label': '|$R;$|'},
{'name': 'tert-butyl',
'code': 'tBu',
'smiles': '[R]C(C)(C)C',
'xsmiles': '[*]C(C)(C)C',
'xsmiles_label': '|$R;;;;$|'},
{'name': 'methoxy',
'code': 'OMe',
'smiles': '[R]OC',
'xsmiles': '[*]OC',
'xsmiles_label': '|$R;;$|'},
{'name': 'ethoxy',
'code': 'OEt',
'smiles': '[R]OCC',
'xsmiles': '[*]OCC',
'xsmiles_label': '|$R;;;$|'},
{'name': 'amine',
'code': 'NH2',
'smiles': '[R]N',
'xsmiles': '[*]N',
'xsmiles_label': '|$R;$|'},
{'name': 'nitro',
'code': 'NO2',
'smiles': '[R][N+]([O-])=O',
'xsmiles': '[*][N+]([O-])=O',
'xsmiles_label': '|$R;;;$|'},
{'name': 'cyano',
'code': 'CN',
'smiles': '[R]C#N',
'xsmiles': '[*]C#N',
'xsmiles_label': '|$R;;$|'},
{'name': 'formyl',
'code': 'CHO',
'smiles': '[R]C([H])=O',
'xsmiles': '[*]C([H])=O',
'xsmiles_label': '|$R;;;$|'},
{'name': 'carboxy',
'code': 'COOH',
'smiles': '[R]C(O)=O',
'xsmiles': '[*]C(O)=O',
'xsmiles_label': '|$R;;;$|'},
{'name': 'acetoxy',
'code': 'OCOCH3',
'smiles': '[R]OC(C)=O',
'xsmiles': '[*]OC(C)=O',
'xsmiles_label': '|$R;;;;$|'},
{'name': 'thyol',
'code': 'SH',
'smiles': '[R]S',
'xsmiles': '[*]S',
'xsmiles_label': '|$R;$|'},
{'name': 'keto',
'code': 'O',
'smiles': '[R]=O',
'xsmiles': '[*]=O',
'xsmiles_label': '|$R;$|'},
{'name': 'nitroso',
'code': 'NO',
'smiles': '[R]N=O',
'xsmiles': '[*]N=O',
'xsmiles_label': '|$R;;$|'},
{'name': 'fluorine',
'code': 'F',
'smiles': '[R]F',
'xsmiles': '[*]F',
'xsmiles_label': '|$R$|'},
{'name': 'chlorine',
'code': 'Cl',
'smiles': '[R]Cl',
'xsmiles': '[*]Cl',
'xsmiles_label': '|$R;$|'},
{'name': 'bromine',
'code': 'Br',
'smiles': '[R]Br',
'xsmiles': '[*]Br',
'xsmiles_label': '|$R$|'},
{'name': 'iodine',
'code': 'I',
'smiles': '[R]I',
'xsmiles': '[*]I',
'xsmiles_label': '|$R$|'},
{'name': 'sulfinic acid',
'code': 'SO2H',
'smiles': '[R]S(O)=O',
'xsmiles': '[*]S(O)=O',
'xsmiles_label': '|$R;;;$|'},
{'name': 'sulfonic acid',
'code': 'SO3H',
'smiles': '[R]S(=O)(O)=O',
'xsmiles': '[*]S(=O)(O)=O',
'xsmiles_label': '|$R;;;;$|'},
{'name': 'thial',
'code': 'CHS',
'smiles': '[R]C([H])=S',
'xsmiles': '[*]C([H])=S',
'xsmiles_label': '|$R;;;$|'},
{'name': 'epoxide',
'code': 'EPO',
'smiles': '[R]C1CO1',
'xsmiles': '[*]C1CO1',
'xsmiles_label': '|$R;;;$|'},
{'name': 'methyl epoxide',
'code': 'MEPO',
'smiles': '[R]CC1CO1',
'xsmiles': '[*]CC1CO1',
'xsmiles_label': '|$R;;;;$|'},
{'name': 'ethyl epoxide',
'code': 'EEPO',
'smiles': '[R]CCC1CO1',
'xsmiles': '[*]CCC1CO1',
'xsmiles_label': '|$R;;;;;$|'},
{'name': 'ethoxymethyl epoxide',
'code': 'EMEPO',
'smiles': '[R]COCC1CO1',
'xsmiles': '[*]COCC1CO1',
'xsmiles_label': '|$R;;;;;;$|'},
{'name': 'oxiethyl epoxide',
'code': 'OEEPO',
'smiles': '[R]OCC1CO1',
'xsmiles': '[*]OCC1CO1',
'xsmiles_label': '|$R;;;;;$|'},
{'name': 'benzene',
'code': 'Ph',
'smiles': '[R]C1=CC=CC=C1',
'xsmiles': '[*]C1=CC=CC=C1',
'xsmiles_label': '|$R;;;;;;$|'}
]
Grupos Q:
smiles_Q = [
{'name': 'amine',
'code': 'NH2',
'smiles': '[Q]N',
'xsmiles': '[*]N',
'xsmiles_label': '|$Q;$|'},
{'name': 'aldehyde',
'code': 'CHO',
'smiles': '[Q]C([H])=O',
'xsmiles': '[*]C([H])=O',
'xsmiles_label': '|$Q;;;$|'},
{'name': 'boronic acid',
'code': 'BOH2',
'smiles': '[Q]B(O)O',
'xsmiles': '[*]B(O)O',
'xsmiles_label': '|$Q;;$|'},
{'name': 'acetohydrazide',
'code': 'CONHNH2',
'smiles': '[Q]C(NN)=O',
'xsmiles': '[*]C(NN)=O',
'xsmiles_label': '|$Q;;;;$|'},
{'name': 'methylhydrazine',
'code': 'NHNH2',
'smiles': '[Q]NN',
'xsmiles': '[*]NN',
'xsmiles_label': '|$Q;;$|'},
{'name': 'nitrile',
'code': 'CN',
'smiles': '[Q]C#N',
'xsmiles': '[*]C#N',
'xsmiles_label': '|$Q;;$|'},
{'name': 'bromine',
'code': 'Br',
'smiles': '[Q]Br',
'xsmiles': '[*]Br',
'xsmiles_label': '|$Q;$|'},
{'name': 'chlorine',
'code': 'Cl',
'smiles': '[Q]Cl',
'xsmiles': '[*]Cl',
'xsmiles_label': '|$Q;$|'},
{'name': 'oxigem',
'code': 'O',
'smiles': '[Q]O',
'xsmiles': '[*]O',
'xsmiles_label': '|$Q;$|'},
{'name': 'dihydroxy',
'code': 'OH2',
'smiles': '[Q]O[B]O1',
'xsmiles': '[*]O[B]O1',
'xsmiles_label': '|$Q;;B;$|'},
]
Motivação
Atualmente os blocos de construção e grupos químicos são armazenados na no formato
xyz
. É possível também adicionar as informações no formato smiles também:Para o benzeno tripodal: Organic core: C1=CC=CC=C1 Building Block: [Q]C1=C([Q])C([R1])=C([Q])C([Q])=C1[R1]
O que deve ser feito?
Alterar a forma como as informações são salvas internamente para um formato
json
que contenha as posições atômicas mas também outras informações sobre os blocos de construção. Considerar utilizar um formato Chemical JSON.Os códigos para ler o BuildingBlock e criar o Retículo devem utilizar esses arquivos, além de herdar a informação do código SMILES.
Etapas para a realização desta tarefa:
O que será obtido ao final dessa tarefa?
Possui prazo limite de conclusão?
Mais alguma outra coisa?