devalab / DeepPocket

Ligand Binding Site detection using Deep Learning
MIT License
89 stars 26 forks source link

Could not open 'gninamap' #4

Closed mangalp closed 2 years ago

mangalp commented 2 years ago

Hi, Thank you for making the code public. I am getting an error however in the data preprocessing stage. When I try to convert a .pdb file to gninatypes, I get the error Could not open gninamap. I simply separated the data preprocessing stage (using your code without any modifications) to create a self contained example to show my error.

from Bio.PDB import PDBParser, PDBIO, Select
import Bio
import os
import sys
import molgrid
import struct
import numpy as np
import os
import sys

class NonHetSelect(Select):
    def accept_residue(self, residue):
        return 1 if Bio.PDB.Polypeptide.is_aa(residue,standard=True) else 0

def clean_pdb(input_file,output_file):
    pdb = PDBParser().get_structure("protein", input_file)
    io = PDBIO()
    io.set_structure(pdb)
    io.save(output_file, NonHetSelect())

def gninatype(file):
    # creates gninatype file for model input
    f=open(file.replace('.pdb','.types'),'w')
    f.write(file)
    f.close()
    atom_map=molgrid.FileMappedGninaTyper('gninamap')
    dataloader=molgrid.ExampleProvider(atom_map,shuffle=False,default_batch_size=1)
    train_types=file.replace('.pdb','.types')
    dataloader.populate(train_types)
    example=dataloader.next()
    coords=example.coord_sets[0].coords.tonumpy()
    types=example.coord_sets[0].type_index.tonumpy()
    types=np.int_(types)
    print(coords)
    fout=open(file.replace('.pdb','.gninatypes'),'wb')
    for i in range(coords.shape[0]):
        fout.write(struct.pack('fffi',coords[i][0],coords[i][1],coords[i][2],types[i]))
        print(struct.pack('fffi',coords[i][0],coords[i][1],coords[i][2],types[i]))
    fout.close()
    os.remove(train_types)
    return file.replace('.pdb','.gninatypes')

def create_types(file,protein):
    # create types file for model predictions
    fout=open(file.replace('.txt','.types'),'w')
    fin =open(file,'r')
    for line in fin:
        fout.write(' '.join(line.split()) + ' ' + protein +'\n')
    return file.replace('.txt','.types')

protein_file="/home/ubuntu/Data/1a8o.pdb"
protein_nowat_file=protein_file.replace('.pdb','_nowat.pdb')
clean_pdb(protein_file,protein_nowat_file)
protein_gninatype=gninatype(protein_nowat_file)

The code ends with the error

ValueError                                Traceback (most recent call last)
/tmp/ipykernel_13436/2408537986.py in <module>
      2 protein_nowat_file=protein_file.replace('.pdb','_nowat.pdb')
      3 clean_pdb(protein_file,protein_nowat_file)
----> 4 protein_gninatype=gninatype(protein_nowat_file)

/tmp/ipykernel_13436/3305498276.py in gninatype(file)
      4     f.write(file)
      5     f.close()
----> 6     atom_map=molgrid.FileMappedGninaTyper('gninamap')
      7     dataloader=molgrid.ExampleProvider(atom_map,shuffle=False,default_batch_size=1)
      8     train_types=file.replace('.pdb','.types')

ValueError: Could not open gninamap

Can you please help me with this issue? Thank you.

RishalAggarwal commented 2 years ago

Hi, the repository has a file called "gninamap"

atom_map=molgrid.FileMappedGninaTyper('gninamap')

just ensure you're passing the path to the file in this function (I assume in this line of code that you have this file in the same directory that you are running the script from).