laminlabs / bionty-base

Access public biological ontologies.
https://bionty-base.lamin.ai
Apache License 2.0
16 stars 2 forks source link

Implement inspect #354

Closed Zethson closed 1 year ago

Zethson commented 1 year ago
def inspect(iterable: Iterable, reference_id:BiontyField, return_df=False) -> InspectResult:
    """Inspect if a list of identifiers are mappable to the entity reference."""
    ...
    if return_df:
        return pd.DataFrame(index=iterable, {"__mapped__": [...]})

def map_synonyms(iterable: Iterable, reference_id=BiontyField, synonyms_key="synonyms", return_mapper=False) -> list or dict:
    """Convert synonyms to standardized ids."""
    ...

class BiontyField():

    def __init__(parent: Entity, name: str):
            self.parent = parent
            self.name = name

    def __repr__():
            return self.name

# rename Entity to Bionty
class Bionty:
        def __init__():
                for col_name in self.df().columns:
                        setattr(self, col_name, BiontyField(self, col_name))

with user story

import bionty as bt
gene_bionty = bt.Gene()

ids = ["ENSG00000148584", "ENSG00000121410", "ENSG00000188389", "ENSG0000corrupted"]

>>> bt.inspect(ids, gene_bionty.ensembl_gene_id)
✅ 3 terms (75.0%) are mapped.
🔶 1 terms (25.0%) are not mapped.
{
    "mapped": ["ENSG00000148584", "ENSG00000121410", "ENSG00000188389"],
  "not_mapped": ["ENSG0000corrupted"],
}

# preserve the original order of the input list
>>> bt.inspect(ids, "ensembl_gene_id", return_df=True)
ensembl_gene_id         __mapped__
ENSG00000148584           True
ENSG00000121410           True
ENSG00000188389           True
ENSG0000corrupted       False