Global-Chem / global-chem

A Knowledge Graph of Common Chemical Names to their Molecular Definition
https://globalchemistry.org/
Mozilla Public License 2.0
153 stars 20 forks source link

SS-184: Predict New Rings to Pass FDA Trials Phase III #315

Open Sulstice opened 2 months ago

Sulstice commented 2 months ago

Hi,

In this study, we are taking the rings in drugs list and applying generative AI to rings in drugs to predict future most likely ring systems.

Read these two papers:

1.) https://arxiv.org/abs/1704.07555 2.) https://pubs.acs.org/doi/10.1021/jm4017625

Ask questions and ask some thoughts on you can apply these two together.

Sulstice commented 1 month ago

Read this:

https://plotly.com/python/distplot/ https://sharifsuliman.medium.com/understanding-drug-likeness-filters-with-rdkit-and-exploring-the-withdrawn-database-ebd6b8b2921e https://www.rdkit.org/docs/index.html

Code to Process Rings in Drugs:

from rdkit import Chem
from rdkit.Chem import Descriptors

import progressbar

if __name__ == '__main__':

    smiles_list = []
    molecules = [ Chem.MolFromSmiles(smiles) for smiles in smiles_list ]
    results = {
        "Lipinski Rule of 5": 0,
        "Ghose Filter": 0,
        "Veber Filter": 0,
        "Rule of 3 Filter": 0,
        "REOS Filter": 0,
        "Drug-like Filter": 0,
        "Passes All Filters": 0,
    }

    print ("Molecule Database Length: " + str(len(molecules)))

    for i in progressbar.progressbar(range(len(molecules))):

        molecule = molecules[i]
        if molecule:

            lipinski = False
            rule_of_3 = False
            ghose_filter = False
            veber_filter = False
            reos_filter = False
            drug_like_filter = False

            molecular_weight = Descriptors.ExactMolWt(molecule)
            logp = Descriptors.MolLogP(molecule)
            h_bond_donor = Descriptors.NumHDonors(molecule)
            h_bond_acceptors = Descriptors.NumHAcceptors(molecule)
            rotatable_bonds = Descriptors.NumRotatableBonds(molecule)
            number_of_atoms = Chem.rdchem.Mol.GetNumAtoms(molecule)
            molar_refractivity = Chem.Crippen.MolMR(molecule)
            topological_surface_area_mapping = Chem.QED.properties(molecule).PSA
            formal_charge = Chem.rdmolops.GetFormalCharge(molecule)
            heavy_atoms = Chem.rdchem.Mol.GetNumHeavyAtoms(molecule)
            num_of_rings = Chem.rdMolDescriptors.CalcNumRings(molecule)

            # Lipinski
            if molecular_weight <= 500 and logp <= 5 and h_bond_donor <= 5 and h_bond_acceptors <= 5 and rotatable_bonds <= 5:
                lipinski = True
                results["Lipinski Rule of 5"] += 1

            # Ghose Filter
            if molecular_weight >= 160 and molecular_weight <= 480 and logp >= 0.4 and logp <= 5.6 and number_of_atoms >= 20 and number_of_atoms <= 70 and molar_refractivity >= 40 and molar_refractivity <= 130:
                ghose_filter = True
                results["Ghose Filter"] += 1

            # Veber Filter
            if rotatable_bonds <= 10 and topological_surface_area_mapping <= 140:
                veber_filter = True
                results["Veber Filter"] += 1

            # Rule of 3
            if molecular_weight <= 300 and logp <= 3 and h_bond_donor <= 3 and h_bond_acceptors <= 3 and rotatable_bonds <= 3:
                rule_of_3 = True
                results["Rule of 3 Filter"] += 1

            # REOS Filter
            if molecular_weight >= 200 and molecular_weight <= 500 and logp >= int(0 - 5) and logp <= 5 and h_bond_donor >= 0 and h_bond_donor <= 5 and h_bond_acceptors >= 0 and h_bond_acceptors <= 10 and formal_charge >= int(0-2) and formal_charge <= 2 and rotatable_bonds >= 0 and rotatable_bonds <= 8 and heavy_atoms >= 15 and heavy_atoms <= 50:
                reos_filter = True
                results["REOS Filter"] += 1

            #Drug Like Filter
            if molecular_weight < 400 and num_of_rings > 0 and rotatable_bonds < 5 and h_bond_donor <= 5 and h_bond_acceptors <= 10 and logp < 5:
                drug_like_filter = True
                results["Drug-like Filter"] += 1

            if lipinski and ghose_filter and veber_filter and rule_of_3 and reos_filter and drug_like_filter:
                results["Passes All Filters"] += 1

    print (results)
Lyq322 commented 1 month ago

I looked over the smiles list in the rings in drugs file and I corrected some errors.

smiles_list = {
    "benzene": "C1=CC=CC=C1",
    "pyridine": "C1=CC=CN=C1",
    "piperidine": "N1CCCCC1",
    "piperazine": "N1CCNCC1",
    "cyclohexane": "C1CCCCC1",
    "oxane": "O1CCCCC1",
    "imidazole": "C1=NC=CN1",
    "pyrrolidine": "C1CCNC1",
    "(R)-5-thia-1-azabicyclo[4.2.0]oct-2-en-8-one": "O=C1C[C@@H]2N1C=CCS2",
    "cyclopropane": "C1CC1",
    "tetrahydrofuran" : "C1CCOC1",
    "thiazole": "C1=NC=CS1",
    "indole": "C12=CC=CC=C1C=CN2",
    "diazine": "C1=NC=CC=N1",
    "(R)-4-thia-1-azabicyclo[3.2.0]heptan-7-one": "O=C1N2CCS[C@@H]2C1",
    "6,7,8,9,10,11,12,13,14,15,16,17-dodecahydro-3H-cyclopenta[a]phenanthren-3-one": "O=C1C=C2C(C3[C@@H](CC2)[C@H](CCC4)C4CC3)C=C1",
    "tetrazole": "N1=NN=CN1",
    "cyclopentane": "C1CCCC1",
    "thiophenyl" : "C1=CC=CS1",
    "naphthalene": "C12=CC=CC=C1C=CC=C2",
    "1H-benzo[d]imidazole" : "C12=CC=CC=C1N=CN2",
    "quinoline": "C12=CC=CC=C1C=CC=N2",
    "1H-purine": "C12=CNC=NC1=NC=N2",
    "1,2,6,7,8,9,10,11,12,13,14,15,16,17-tetradecahydro-3H-cyclopenta[a]phenanthren-3-one": "O=C1C=C2C([C@@H]3[C@@H](CC2)[C@H](CCC4)C4CC3)CC1",
    "furan" : "C1=CC=CO1",
    "1H-1,2,4-Triazole": "N1=CN=CN1",
    "10H-Phenothiazine" : "C12=CC=CC=C1NC3=C(C=CC=C3)S2",
    "quinazoline" : "C12=CC=CC=C1C=NC=N2",
    "morpholine": "C1CNCCO1",
    "pyrimidin-2(1H)-one" : "O=C1N=CC=CN1",
    "quinolin-4(1H)-one": "O=C1C2=C(C=CC=C2)NC=C1",
    "(9S,14R)-6,7,8,9,10,11,12,13,14,15,16,17-dodecahydro-3H-cyclopenta[a]phenanthren-3-one": "O=C1C=C2C([C@@H]3[C@@H](CC2)[C@H](CCC4)C4CC3)C=C1",
    "isoxazole": "C1=CC=NO1",
    "imidazoline": "C1=NCCN1",
    "1,4-dihydropyridine": "C1=CCC=CN1",
    "pyrimidine-2,4(1H,3H)-dione": "O=C(N1)NC=CC1=O",
    "3,4-dihydro-2H-benzo[e][1,4]diazepin-2-one": "O=C1N=C2C=CC=CC2=CNC1",
    "cyclohexene": "C1=CCCCC1",
    "pyrrolidin-2-one": "O=C1NCCC1",
    "imidazolidine-2,4-dione": "O=C(CN1)NC1=O",
    "1,2,3,4-tetrahydroisoquinoline": "C1(C=CC=C2)=C2CCNC1",
    "3,4-dihydro-2H-benzo[e][1,2,4]thiadiazine 1,1-dioxide": "O=S1(NCNC2=C1C=CC=C2)=O",
    "7,8,9,11,12,13,14,15,16,17-decahydro-6H-cyclopenta[a]phenanthrene": "C1=CC2=C([C@@H]3[C@@H](CC2)[C@H](CCC4)C4CC3)C=C1",
    "1H-pyrazole": "N1=CC=CN1",
    "quinuclidine": "C1(CC2)CCN2CC1",
    "epoxide": "C1CO1",
    "pyrazine": "C1=CN=CC=N1",
    "oxazolidinone": "O=C1OCCN1",
    "tetrahydronaphthalene":"C1(C=CC=C2)=C2CCCC1",
    "adamantane": "C1(CC(C2)C3)CC2CC3C1",
    "1,8-naphthyridin-4(1H)-one":"O=C(C=CN1)C2=C1N=CC=C2",
    "3,7-dihydro-1H-purine-2,6-dione": "O=C(C(NC=N1)=C1N2)NC2=O",
    "hexadecahydro-1H-cyclopenta[a]phenanthrene": "C1C[C@H]2C([C@@H]3[C@@H](CC2)[C@H](CCC4)C4CC3)CC1",
    "7,8,9,10-tetrahydrotetracene-5,12-dione": "O=C(C(C=C(CCCC1)C1=C2)=C2C3=O)C4=C3C=CC=C4",
    "cyclobutane": "C1CCC1",
    "1,2-dihydro-3H-1,2,4-triazol-3-one": "O=C1NNC=N1",
    "1,3,4-thiadiazole": "C1=NN=CS1",
    "azepane": "C1NCCCCC1",
    "8-azabicyclo[3.2.1]octane": "C12CCCC(CC2)N1",
    "piperidine-2,6-dione":"O=C(N1)CCCC1=O",
    "2,3-dihydro-1H-indene":"C1=CC=CC2=C1CCC2",
    "benzo[d]isoxazole":"C12=CC=CC=C1C=NO2",
    "1,9-dihydro-6H-purin-6-one":"O=C1C2=C(NC=N2)N=CN1",
    "9H-fluorene":"C12=CC=CC=C1C3=C(C=CC=C3)C2",
    "10,11-dihydro-5H-dibenzo[b,f]azepine":"C12=CC=CC=C1CCC3=C(C=CC=C3)N2",
    "(6aR,10aR)-4,6,6a,7,8,9,10,10a-octahydroindolo[4,3-fg]quinoline":"C12=CC=CC3=C1C(C[C@@H]4[C@@H]2CCCN4)=CN3",
    "placeholder1": "O=C1C=C2[C@@H]([C@@H]3[C@@H](CC2)[C@H](CCC4)C4CC3)CC1",
    "placeholder2": "O=C1C=C2C(C3[C@@H](CC2)[C@H](C[C@@H]4C5(OCO4))C5CC3)C=C1",
    "1H-pyrrole": "C1=CC=CN1",
    "1,3-dioxolane":"O1CCOC1",
    "(1R,5S)-3-azabicyclo[3.1.0]hexane": "[C@H]1(C2)[C@@H]2CNC1",
    "cyclopentanone": "O=C1CCCC1",
    "pyrrolidine-2,5-dione":"O=C(N1)CCC1=O",
    "pyrazolidine":"O=C(NN1)CC1=O",
    "(R)-1-azabicyclo[3.2.0]hept-2-en-7-one":"O=C1N2C=CC[C@@H]2C1",
    "placeholder3": "C1CC2OCCC2O1",
    "thiazolidine-2,4-dione":"O=C(CS1)NC1=O",
    "benzofuran":"C12=CC=CC=C1C=CO2",
    "1H-indazole":"C12=CC=CC=C1C=NN2",
    "placeholder4": "O=C1CC(=O)NC(=O)N1",
    "indolin-2-one":"O=C1NC2=CC=CC=C2C1",
    "benzo[b]thiophene":"C12=CC=CC=C1C=CS2",
    "(R)-1,2,3,7,8,8a-hexahydronaphthalene":"C12=CCCC[C@@H]1CCC=C2",
    "4,5,6,7-tetrahydrothieno[3,2-c]pyridine":"C1(C=CS2)=C2CCNC1",
    "4H-chromen-4-one":"O=C(C=CO1)C2=C1C=CC=C2",
    "3,4-dihydroquino-2(1H)-one":"O=C(CC1)NC2=C1C=CC=C2",
    "napthalene-1,4-dione": "O=C(C=CC1=O)C2=C1C=CC=C2",
    "2H-benzo[e][1,2,4]thiadiazine 1,1-dioxide":"O=S(C1=C2C=CC=C1)(NC=N2)=O",
    "4H-benzo[f][1,2,4]triazolo[4,3-a][1,4]diazepine":"C1(N2C(CN=C3)=NN=C2)=C3C=CC=C1",
    "9H-thioxanthene":"C12=CC=CC=C1CC3=C(C=CC=C3)S2",
    "placeholder5": "C1C=C2[C@@H]([C@@H]3[C@@H](CC2)[C@H](CCC4)C4CC3)CC1",
    "(5aR,8aR)-5,8,8a,9-tetrahydrofuro[3',4':6,7]naphtho[2,3-d][1,3]dioxol-6(5aH)-one":"O=C(OC1)[C@H]2[C@H]1CC3=CC4=C(C=C3C2)OCO4",
    "placeholder6": "O=C1C=C2C(=C3[C@@H](CC2)[C@H](CCC4)C4CC3)CC1",
    "(3a1S,5aS,10bS)-3a,3a1,4,5,5a,6,11,12-octahydro-1H-indolizino[8,1-cd]carbazole":"C12=CC=CC=C1[C@@]34[C@H](CCC5[C@@H]3N(CC=C5)CC4)N2",
    "(4aR,5aR)-4a,5a,6,12a-tetrahydrotetracene-1,11(4H,5H)-dione":"O=C1C2=CC3[C@H](CC=CC3=O)C[C@@H]2CC4=CC=CC=C41",
    "placeholder7": "C1CNCCC1=C2C3=CC=CC=C3CCC4=CC=CN=C24",
    "placeholder8": "O=C1C=C2C([C@@H]3[C@@H](CC2)[C@H](C[C@@H]4C5(OCO4))C5CC3)C=C1",
    "1H-1,2,3-triazole": "N1=NC=CN1",
    "azetidin-2-one":"O=C1NCC1",
    "oxetan-2-one":"O=C1OCC1"
}

Corrected the SMILES of

I also saw that some molecules were missing so I added those (# 67, 68, 76, 80, 91, 93, 96, 97), but I don't know where to find the IUPAC name for them other than learning the rules myself... I've looked online and I only found paid ones like chemdraw. I did find a website that can turn IUPAC -> structure image, so I can check all the molecule names, but I don't have a way to get a correct name for it.

I also ran the drug likeness filters file and I got this:

{'Lipinski Rule of 5': 99, 'Ghose Filter': 5, 'Veber Filter': 100, 'Rule of 3 Filter': 82, 'REOS Filter': 15, 'Drug-like Filter': 99, 'Passes All Filters': 2}

I think most didn't pass Ghose or REOS filters just because the molecules were too small.

I also plotted the distribution of the properties using plotly:

Screenshot 2024-07-23 at 12 15 05 AM Screenshot 2024-07-23 at 12 15 13 AM Screenshot 2024-07-23 at 12 15 21 AM Screenshot 2024-07-23 at 12 15 28 AM Screenshot 2024-07-23 at 12 15 37 AM Screenshot 2024-07-23 at 12 15 44 AM Screenshot 2024-07-23 at 12 15 51 AM Screenshot 2024-07-23 at 12 15 59 AM Screenshot 2024-07-23 at 12 16 06 AM

Formal charges and rotatable bonds are 0 for all molecules.

Sulstice commented 1 month ago

@Lyq322

I looked at the cost and it's around $1,400, I would rather use that money elsewhere so let's try an AI way. For SMILES where the name is ambiguous can we use an Recurrent Neural Network trained on SMILES/IUPAC: https://github.com/Kohulan/Smiles-TO-iUpac-Translator

Install:


pip install STOUT-pypi

Code to Run


from STOUT import translate_forward, translate_reverse

for name, smiles in smiles_list.items():
    smiles = "CN1C=NC2=C1C(=O)N(C(=O)N2C)C"
    iupac_name = translate_forward(SMILES)
    print('Name: %s, SMILES: %s')

Try that out and let me know.

Ghose was really interesting because it included Molar refractivity. Why do you think Ghose included this a filter for drug like molecules?

For plotly turn your plots into white background, show the curves:


figure.update_layout(legend=dict(itemsizing='constant'))
figure.update_layout(legend=dict(
            orientation="v",
            yanchor="bottom",
            y=0.96,
            xanchor="right",
            x=1,
            font = dict(family = "Arial", size = 10),
            bordercolor="LightSteelBlue",
            borderwidth=2,
        ),
            legend_title = dict(font = dict(family = "Arial", size = 10))
        )
figure.update_xaxes(
            ticks="outside",
            tickwidth=1,
            tickcolor='black',
            tickfont=dict(family='Arial', color='black', size=13),
            title_font=dict(size=13, family='Arial'),
            title_text='X Axis',
            ticklen=15,
)

figure.update_yaxes(
            ticks="outside",
            tickwidth=1,
            tickcolor='black',
            title_text='Y-Axis',
            tickfont=dict(family='Arial', color='black', size=13),
            title_font=dict(size=13, family='Arial'),
            ticklen=15,
)

figure.update_layout(
            xaxis_tickformat = 'i',
            bargap=0.2, # gap between bars of adjacent location coordinates,
            height=1000,
            width=1000,
            plot_bgcolor='rgba(0,0,0,0)'
)
Lyq322 commented 1 month ago

When I try installing STOUT-pypi I get this error:

ERROR: Cannot install stout-pypi==2.0.0, stout-pypi==2.0.1, stout-pypi==2.0.2, stout-pypi==2.0.3, stout-pypi==2.0.4 and stout-pypi==2.0.5 because these package versions have conflicting dependencies.

The conflict is caused by:
    stout-pypi 2.0.5 depends on tensorflow==2.10.1
    stout-pypi 2.0.4 depends on tensorflow==2.10.1
    stout-pypi 2.0.3 depends on tensorflow==2.10.1
    stout-pypi 2.0.2 depends on tensorflow==2.10.0
    stout-pypi 2.0.1 depends on rdkit-pypi
    stout-pypi 2.0.0 depends on rdkit-pypi

I tried installing tensorflow 2.10.1 but I get this:

ERROR: Could not find a version that satisfies the requirement tensorflow==2.10.1 (from versions: 2.16.0rc0, 2.16.1, 2.16.2, 2.17.0rc0, 2.17.0rc1, 2.17.0)
ERROR: No matching distribution found for tensorflow==2.10.1

I thought it was because my python version (3.12) is too new, so I tried doing it using 3.8 but it still doesn't allow me to install 2.10.1

Also one of the updated charts:

Screenshot 2024-07-25 at 9 16 31 PM

For Ghose, I found online that molar refractivity indicates polarizability. I'm not sure why it's included, the only idea I have is higher molar refractivity means stronger LDFs between the drug and the target...

Lyq322 commented 1 month ago

conda worked! I finished checking the IUPAC names:

{
    "benzene": "C1=CC=CC=C1",
    "pyridine": "C1=CC=CN=C1",
    "piperidine": "N1CCCCC1",
    "piperazine": "N1CCNCC1",
    "cyclohexane": "C1CCCCC1",
    "oxane": "O1CCCCC1",
    "imidazole": "C1=NC=CN1",
    "pyrrolidine": "C1CCNC1",
    "(R)-5-thia-1-azabicyclo[4.2.0]oct-2-en-8-one": "O=C1C[C@@H]2N1C=CCS2",
    "cyclopropane": "C1CC1",
    "tetrahydrofuran" : "C1CCOC1",
    "thiazole": "C1=NC=CS1",
    "indole": "C12=CC=CC=C1C=CN2",
    "pyrimidine": "C1=NC=CC=N1",
    "(R)-4-thia-1-azabicyclo[3.2.0]heptan-7-one": "O=C1N2CCS[C@@H]2C1",
    "(8S,14R)-6,7,8,9,10,11,12,13,14,15,16,17-dodecahydro-3H-cyclopenta[a]phenanthren-3-one": "O=C1C=C2C(C3[C@@H](CC2)[C@H](CCC4)C4CC3)C=C1",
    "tetrazole": "N1=NN=CN1",
    "cyclopentane": "C1CCCC1",
    "thiophene" : "C1=CC=CS1",
    "naphthalene": "C12=CC=CC=C1C=CC=C2",
    "benzimidazole" : "C12=CC=CC=C1N=CN2",
    "quinoline": "C12=CC=CC=C1C=CC=N2",
    "1H-purine": "C12=CNC=NC1=NC=N2",
    "(8S,9S,14R)-1,2,6,7,8,9,10,11,12,13,14,15,16,17-tetradecahydro-3H-cyclopenta[a]phenanthren-3-one": "O=C1C=C2C([C@@H]3[C@@H](CC2)[C@H](CCC4)C4CC3)CC1",
    "furan" : "C1=CC=CO1",
    "1H-1,2,4-triazole": "N1=CN=CN1",
    "10H-phenothiazine" : "C12=CC=CC=C1NC3=C(C=CC=C3)S2",
    "quinazoline" : "C12=CC=CC=C1C=NC=N2",
    "morpholine": "C1CNCCO1",
    "pyrimidin-2(1H)-one" : "O=C1N=CC=CN1",
    "quinolin-4(1H)-one": "O=C1C2=C(C=CC=C2)NC=C1",
    "(8S,9S,14R)-6,7,8,9,10,11,12,13,14,15,16,17-dodecahydro-3H-cyclopenta[a]phenanthren-3-one": "O=C1C=C2C([C@@H]3[C@@H](CC2)[C@H](CCC4)C4CC3)C=C1",
    "isoxazole": "C1=CC=NO1",
    "2-imidazoline": "C1=NCCN1",
    "1,4-dihydropyridine": "C1=CCC=CN1",
    "pyrimidine-2,4(1H,3H)-dione": "O=C(N1)NC=CC1=O",
    "1,3-dihydro-2H-1,4-benzodiazepin-2-one": "O=C1N=C2C=CC=CC2=CNC1",
    "cyclohexene": "C1=CCCCC1",
    "pyrrolidin-2-one": "O=C1NCCC1",
    "imidazolidine-2,4-dione": "O=C(CN1)NC1=O",
    "1,2,3,4-tetrahydroisoquinoline": "C1(C=CC=C2)=C2CCNC1",
    "3,4-dihydro-2H-benzo[e][1,2,4]thiadiazine 1,1-dioxide": "O=S1(NCNC2=C1C=CC=C2)=O",
    "(8S,9S,14R)-7,8,9,11,12,13,14,15,16,17-decahydro-6H-cyclopenta[a]phenanthrene": "C1=CC2=C([C@@H]3[C@@H](CC2)[C@H](CCC4)C4CC3)C=C1",
    "1H-pyrazole": "N1=CC=CN1",
    "quinuclidine": "C1(CC2)CCN2CC1",
    "epoxide": "C1CO1",
    "pyrazine": "C1=CN=CC=N1",
    "oxazolidinone": "O=C1OCCN1",
    "tetralin":"C1(C=CC=C2)=C2CCCC1",
    "adamantane": "C1(CC(C2)C3)CC2CC3C1",
    "1,8-naphthyridin-4(1H)-one":"O=C(C=CN1)C2=C1N=CC=C2",
    "3,7-dihydro-1H-purine-2,6-dione": "O=C(C(NC=N1)=C1N2)NC2=O",
    "(5R,8S,9R,14R)-2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17-hexadecahydro-1H-cyclopenta[a]phenanthrene": "C1C[C@H]2C([C@@H]3[C@@H](CC2)[C@H](CCC4)C4CC3)CC1",
    "7,8,9,10-tetrahydrotetracene-5,12-dione": "O=C(C(C=C(CCCC1)C1=C2)=C2C3=O)C4=C3C=CC=C4",
    "cyclobutane": "C1CCC1",
    "1,2-dihydro-3H-1,2,4-triazol-3-one": "O=C1NNC=N1",
    "1,3,4-thiadiazole": "C1=NN=CS1",
    "azepane": "C1NCCCCC1",
    "8-azabicyclo[3.2.1]octane": "C12CCCC(CC2)N1",
    "piperidine-2,6-dione":"O=C(N1)CCCC1=O",
    "2,3-dihydro-1H-indene":"C1=CC=CC2=C1CCC2",
    "benzo[d]isoxazole":"C12=CC=CC=C1C=NO2",
    "1,9-dihydro-6H-purin-6-one":"O=C1C2=C(NC=N2)N=CN1",
    "9H-fluorene":"C12=CC=CC=C1C3=C(C=CC=C3)C2",
    "10,11-dihydro-5H-dibenzo[b,f]azepine":"C12=CC=CC=C1CCC3=C(C=CC=C3)N2",
    "(6aR,10aR)-4,6,6a,7,8,9,10,10a-octahydroindolo[4,3-fg]quinoline":"C12=CC=CC3=C1C(C[C@@H]4[C@@H]2CCCN4)=CN3",
    "(8S,9S,10R,14R)-1,2,6,7,8,9,10,11,12,13,14,15,16,17-tetradecahydrocyclopenta[a]phenanthren-3-one": "O=C1C=C2[C@@H]([C@@H]3[C@@H](CC2)[C@H](CCC4)C4CC3)CC1",
    "(1R,2S,4R)-5,7-dioxapentacyclo[10.8.0.02,9.04,8.013,18]icosa-14,17-dien-16-one": "O=C1C=C2C(C3[C@@H](CC2)[C@H](C[C@@H]4C5(OCO4))C5CC3)C=C1",
    "1H-pyrrole": "C1=CC=CN1",
    "1,3-dioxolane":"O1CCOC1",
    "(1R,5S)-3-azabicyclo[3.1.0]hexane": "[C@H]1(C2)[C@@H]2CNC1",
    "cyclopentanone": "O=C1CCCC1",
    "pyrrolidine-2,5-dione":"O=C(N1)CCC1=O",
    "pyrazolidine-3,5-dione":"O=C(NN1)CC1=O",
    "(R)-1-azabicyclo[3.2.0]hept-2-en-7-one":"O=C1N2C=CC[C@@H]2C1",
    "2,3,3a,5,6,6a-hexahydrofuro[3,2-b]furan": "C1CC2OCCC2O1",
    "thiazolidine-2,4-dione":"O=C(CS1)NC1=O",
    "benzofuran":"C12=CC=CC=C1C=CO2",
    "1H-indazole":"C12=CC=CC=C1C=NN2",
    "1,3-diazinane-2,4,6-trione": "O=C1CC(=O)NC(=O)N1",
    "oxindole":"O=C1NC2=CC=CC=C2C1",
    "benzo[b]thiophene":"C12=CC=CC=C1C=CS2",
    "(R)-1,2,3,7,8,8a-hexahydronaphthalene":"C12=CCCC[C@@H]1CCC=C2",
    "4,5,6,7-tetrahydrothieno[3,2-c]pyridine":"C1(C=CS2)=C2CCNC1",
    "4H-chromen-4-one":"O=C(C=CO1)C2=C1C=CC=C2",
    "3,4-dihydroquino-2(1H)-one":"O=C(CC1)NC2=C1C=CC=C2",
    "naphthalene-1,4-dione": "O=C(C=CC1=O)C2=C1C=CC=C2",
    "2H-benzo[e][1,2,4]thiadiazine 1,1-dioxide":"O=S(C1=C2C=CC=C1)(NC=N2)=O",
    "4H-benzo[f][1,2,4]triazolo[4,3-a][1,4]diazepine":"C1(N2C(CN=C3)=NN=C2)=C3C=CC=C1",
    "9H-thioxanthene":"C12=CC=CC=C1CC3=C(C=CC=C3)S2",
    "(8S,9S,10R,14R)-2,3,6,7,8,9,10,11,12,13,14,15,16,17-tetradecahydro-1H-cyclopenta[a]phenanthrene": "C1C=C2[C@@H]([C@@H]3[C@@H](CC2)[C@H](CCC4)C4CC3)CC1",
    "(5aR,8aR)-5a,8,8a,9-tetrahydro-5H-[2]benzofuro[5,6-f][1,3]benzodioxol-6-one":"O=C(OC1)[C@H]2[C@H]1CC3=CC4=C(C=C3C2)OCO4",
    "(8S,14R)-1,2,6,7,8,11,12,13,14,15,16,17-dodecahydrocyclopenta[a]phenanthren-3-one": "O=C1C=C2C(=C3[C@@H](CC2)[C@H](CCC4)C4CC3)CC1",
    "(1R,9S,19S)-8,16-diazapentacyclo[10.6.1.01,9.02,7.016,19]nonadeca-2,4,6,13-tetraene":"C12=CC=CC=C1[C@@]34[C@H](CCC5[C@@H]3N(CC=C5)CC4)N2",
    "(4aR,5aR)-4,4a,5,5a,6,12a-hexahydrotetracene-1,11-dione":"O=C1C2=CC3[C@H](CC=CC3=O)C[C@@H]2CC4=CC=CC=C41",
    "2-piperidin-4-ylidene-4-azatricyclo[9.4.0.03,8]pentadeca-1(15),3(8),4,6,11,13-hexaene": "C1CNCCC1=C2C3=CC=CC=C3CCC4=CC=CN=C24",
    "(1R,2S,4R,12S)-5,7-dioxapentacyclo[10.8.0.02,9.04,8.013,18]icosa-14,17-dien-16-one": "O=C1C=C2C([C@@H]3[C@@H](CC2)[C@H](C[C@@H]4C5(OCO4))C5CC3)C=C1",
    "1H-1,2,3-triazole": "N1=NC=CN1",
    "azetidin-2-one":"O=C1NCC1",
    "oxetan-2-one":"O=C1OCC1"
}

I took a look at the rings with logP of ~4, there were 8 with 3.5 < log P < 4.5. Out of the 8, 5 were steroid rings and the other 3 also looks pretty similar:

image

Some of the steroid rings are exactly the same except for an additional chiral center, so I think that contributed to the abnormally large percentage of rings having log P ~4.

Sulstice commented 1 month ago

Read this paper:

https://pubs.acs.org/doi/10.1021/c160017a018

Conda install: https://anaconda.org/conda-forge/aria2 Browse the Zinc Database: https://zinc.docking.org/tranches/home/