openforcefield / protein-ligand-benchmark

Protein-Ligand Benchmark Dataset for Free Energy Calculations
MIT License
142 stars 15 forks source link

Choosing production dataset (not sample) via `set_data_dir` gives error when running example notebook #13

Open dotsdl opened 3 years ago

dotsdl commented 3 years ago

When running the example notebook with the production data specified, we get an error producing an HTML table of the target set:

from plbenchmark import targets
from IPython.core.display import HTML

targets.set_data_dir('../protein-ligand-benchmark/data/')

# it is initialized from the `plbenchmark/sample_data/targets.yml` file
target_set = targets.TargetSet()
# to see which targets are available, one can get a list of names
target_set.get_names()

HTML(target_set.get_html(columns=['name', 'fullname', 'pdb', 'references', 'numLigands', 'minDG', 'maxDG', 'associated_sets']))

we get:

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-5-9831143c195f> in <module>
----> 1 HTML(target_set.get_html(columns=['name', 'fullname', 'pdb', 'references', 'numLigands', 'minDG', 'maxDG', 'associated_sets']))

~/Projects/openff/protein-ligand/protein-ligand-benchmark/plbenchmark/targets.py in get_html(self, columns)
    348         :return: HTML string
    349         """
--> 350         df = self.get_dataframe(columns=columns)
    351         html_string = df.to_html()
    352         html_string = html_string.replace("REP1", '<a target="_blank" href="')

~/Projects/openff/protein-ligand/protein-ligand-benchmark/plbenchmark/targets.py in get_dataframe(self, columns)
    324             dfs = []
    325             for key in self.keys():
--> 326                 self[key].add_ligand_data()
    327                 self[key].find_links()
    328                 dfs.append(self[key].get_dataframe())

~/Projects/openff/protein-ligand/protein-ligand-benchmark/plbenchmark/targets.py in add_ligand_data(self)
    111         :return: None
    112         """
--> 113         lgs = self.get_ligand_set()
    114         self.ligand_data = pd.Series({"numLigands": len(lgs)})
    115         affinities = []

~/Projects/openff/protein-ligand/protein-ligand-benchmark/plbenchmark/targets.py in get_ligand_set(self)
    102         """
    103         if self._ligands is None:
--> 104             self._ligands = ligands.LigandSet(self._name)
    105         return self._ligands
    106 

~/Projects/openff/protein-ligand/protein-ligand-benchmark/plbenchmark/ligands.py in __init__(self, target, *arg, **kw)
    254         data = yaml.full_load(file)
    255         for name, d in data.items():
--> 256             lig = Ligand(d, target)
    257             lig.derive_observables(derived_type="dg")
    258             # l.find_links()

~/Projects/openff/protein-ligand/protein-ligand-benchmark/plbenchmark/ligands.py in __init__(self, d, target)
     57             unit = utils.unit_registry(self._data[("measurement", "unit")])
     58             self._data[("measurement", "error")] = (
---> 59                 self._data[("measurement", "error")] * unit
     60             )
     61             self._data[("measurement", "value")] = (

TypeError: unsupported operand type(s) for *: 'NoneType' and 'Quantity'
dotsdl commented 3 years ago

Perhaps we can make TargetSet alternatively take a directory path as an input, and avoid global state in the module?