laminlabs / bionty

Registries for biological entities, coupled to public ontologies.
Apache License 2.0
11 stars 2 forks source link

Pronto cannot read EFO 3.67.0 #65

Open Zethson opened 3 months ago

Zethson commented 3 months ago

3.66.0 still works.

{
    "name": "KeyError",
    "message": "'{http://www.w3.org/1999/02/22-rdf-syntax-ns#}resource'",
    "stack": "---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
Cell In[2], line 1
----> 1 ont = bt.base.ExperimentalFactor(source=\"efo\", version=\"3.67.0\")

File ~/PycharmProjects/bionty/bionty/base/entities/_experimentalfactor.py:32, in ExperimentalFactor.__init__(self, organism, source, version, **kwargs)
     25 def __init__(
     26     self,
     27     organism: Optional[Literal[\"all\"]] = None,
   (...)
     30     **kwargs,
     31 ) -> None:
---> 32     super().__init__(
     33         organism=organism,
     34         source=source,
     35         version=version,
     36         include_id_prefixes={\"efo\": [\"EFO\", \"http://www.ebi.ac.uk/efo/\"]},
     37         **kwargs,
     38     )

File ~/PycharmProjects/bionty/bionty/base/_public_ontology.py:99, in PublicOntology.__init__(self, source, version, organism, include_id_prefixes, **kwargs)
     96 self.include_id_prefixes = include_id_prefixes
     98 # df is only read into memory at the init to improve performance
---> 99 df = self._load_df()
    100 # self._df has no index
    101 if df.index.name is not None:

File ~/PycharmProjects/bionty/bionty/base/_public_ontology.py:304, in PublicOntology._load_df(self)
    302 if not self._url.endswith(\"parquet\"):
    303     if not self._local_parquet_path.exists():
--> 304         df = self.to_pronto().to_df(
    305             source=self.source, include_id_prefixes=self.include_id_prefixes
    306         )
    307         df.to_parquet(self._local_parquet_path)
    309 # Loading the parquet file resets the index

File ~/PycharmProjects/bionty/bionty/base/entities/_experimentalfactor.py:50, in ExperimentalFactor.to_pronto(self)
     41 \"\"\"The Pronto Ontology object.
     42 
     43 See: https://pronto.readthedocs.io/en/stable/api/pronto.Ontology.html
     44 \"\"\"
     45 self._download_ontology_file(
     46     localpath=self._local_ontology_path,  # type:ignore
     47     url=self._url,  # type:ignore
     48     md5=self._md5,  # type:ignore
     49 )
---> 50 onto = Ontology(
     51     handle=self._local_ontology_path,  # type:ignore
     52     prefix=\"http://www.ebi.ac.uk/efo/\",
     53 )
     54 # TODO: fix
     55 onto.__setattr__(\"efo_to_df\", efo_to_df)

File ~/PycharmProjects/bionty/bionty/base/_ontology.py:33, in Ontology.__init__(self, handle, import_depth, timeout, threads, prefix)
     31 self._prefix = prefix
     32 warnings.filterwarnings(\"ignore\", category=pronto.warnings.ProntoWarning)
---> 33 super().__init__(
     34     handle=handle, import_depth=import_depth, timeout=timeout, threads=threads
     35 )

File ~/miniconda3/envs/lamindb/lib/python3.11/site-packages/pronto/ontology.py:283, in Ontology.__init__(self, handle, import_depth, timeout, threads)
    281 for cls in BaseParser.__subclasses__():
    282     if cls.can_parse(typing.cast(str, self.path), buffer):
--> 283         cls(self).parse_from(_handle)  # type: ignore
    284         break
    285 else:

File ~/miniconda3/envs/lamindb/lib/python3.11/site-packages/pronto/parsers/rdfxml.py:117, in RdfXMLParser.parse_from(self, handle, threads)
    115     self._extract_annotation_property(prop, curies)
    116 for class_ in tree.iterfind(_NS[\"owl\"][\"Class\"]):
--> 117     self._extract_term(class_, curies)
    118 for axiom in tree.iterfind(_NS[\"owl\"][\"Axiom\"]):
    119     self._process_axiom(axiom, curies)

File ~/miniconda3/envs/lamindb/lib/python3.11/site-packages/pronto/parsers/rdfxml.py:399, in RdfXMLParser._extract_term(self, elem, curies)
    397         termdata.xrefs.add(Xref(text))
    398     else:
--> 399         termdata.xrefs.add(Xref(attrib[_NS[\"rdf\"][\"resource\"]]))
    400 except ValueError:
    401     pass

KeyError: '{http://www.w3.org/1999/02/22-rdf-syntax-ns#}resource'"
}

Reported upstream: https://github.com/EBISPOT/efo/issues/2264