Closed turbomam closed 1 year ago
{'ClassDefinition ChemicalEntity': {'id_prefixes': ['CHEBI',
'CHEMBL.COMPOUND',
'DRUGBANK',
'HMDB',
'KEGG.COMPOUND',
'MESH',
'PUBCHEM.COMPOUND',
'cas']},
'ClassDefinition GeneProduct': {'id_prefixes': ['PR', 'UniProtKB', 'gtpo']},
'ClassDefinition OrthologyGroup': {'id_prefixes': ['CATH',
'EGGNOG',
'KEGG.ORTHOLOGY',
'PANTHER.FAMILY',
'PFAM',
'SUPFAM',
'TIGRFAM']},
'ClassDefinition Pathway': {'id_prefixes': ['COG', 'KEGG_PATHWAY']},
'ClassDefinition Reaction': {'id_prefixes': ['EC',
'GO',
'KEGG.REACTION',
'MetaCyc',
'MetaNetX',
'RHEA',
'RetroRules',
'SEED']},
'SlotDefinition alternative_identifiers': {'pattern': '^[a-zA-Z0-9][a-zA-Z0-9_\\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\\-\\/\\.,]*$'},
'SlotDefinition dna_cont_well': {'pattern': '^(?!A1|A12|H1|H12)(([A-H][1-9])|([A-H]1[0-2]))$'},
'SlotDefinition dois': {'pattern': '^doi:10.\\d{2,9}/.*$'},
'SlotDefinition external_database_identifiers': {'pattern': '^[a-zA-Z0-9][a-zA-Z0-9_\\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\\-\\/\\.,]*$'},
'SlotDefinition gnps_task_identifiers': {'pattern': '^gnps\\.task:[a-f0-9]+$'},
'SlotDefinition gold_analysis_project_identifiers': {'pattern': '^gold:Ga[0-9]+$'},
'SlotDefinition gold_biosample_identifiers': {'pattern': '^gold:Gb[0-9]+$'},
'SlotDefinition gold_sequencing_project_identifiers': {'pattern': '^gold:Gp[0-9]+$'},
'SlotDefinition gold_study_identifiers': {'pattern': '^gold:Gs[0-9]+$'},
'SlotDefinition id': {'pattern': '^[a-zA-Z0-9][a-zA-Z0-9_\\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\\-\\/\\.,]*$'},
'SlotDefinition img_identifiers': {'pattern': '^img\\.taxon:[a-zA-Z0-9_][a-zA-Z0-9_\\/\\.]*$'},
'SlotDefinition insdc_analysis_identifiers': {'pattern': '^insdc.sra:(E|D|S)RR[0-9]{6,}$'},
'SlotDefinition insdc_assembly_identifiers': {'pattern': '^insdc.sra:[A-Z]+[0-9]+(\\.[0-9]+)?$'},
'SlotDefinition insdc_bioproject_identifiers': {'pattern': '^bioproject:PRJ[DEN][A-Z][0-9]+$'},
'SlotDefinition insdc_biosample_identifiers': {'pattern': '^biosample:SAM[NED]([A-Z])?[0-9]+$'},
'SlotDefinition insdc_experiment_identifiers': {'pattern': '^insdc.sra:(E|D|S)RX[0-9]{6,}$'},
'SlotDefinition insdc_secondary_sample_identifiers': {'pattern': '^biosample:(E|D|S)RS[0-9]{6,}$'},
'SlotDefinition insdc_sra_ena_study_identifiers': {'pattern': '^insdc.sra:(E|D|S)RP[0-9]{6,}$'},
'SlotDefinition jgi_portal_study_identifiers': {'id_prefixes': ['jgi.proposal'],
'pattern': '^jgi.proposal:\\d+$'},
'SlotDefinition massive_study_identifiers': {'pattern': '^MASSIVE:'},
'SlotDefinition mgnify_project_identifiers': {'pattern': '^mgnify.proj:[A-Z]+[0-9]+$'},
'SlotDefinition pres_animal_insect': {'pattern': '^(cat|dog|rodent|snake|other);\\d+$'},
'SlotDefinition rna_cont_well': {'pattern': '^(?!A1|A12|H1|H12)(([A-H][1-9])|([A-H]1[0-2]))$'},
'TypeDefinition external_identifier': {'pattern': '^[a-zA-Z0-9][a-zA-Z0-9_\\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\\-\\/\\.,]*$'},
'fire used in Biosample': {'pattern': '^[12]\\d{3}(?:(?:-(?:0[1-9]|1[0-2]))(?:-(?:0[1-9]|[12]\\d|3[01]))?)?(\\s+to\\s+[12]\\d{3}(?:(?:-(?:0[1-9]|1[0-2]))(?:-(?:0[1-9]|[12]\\d|3[01]))?)?)?$'},
'has_function used in FunctionalAnnotation': {'pattern': '^(KEGG_PATHWAY:\\w{2,4}\\d{5}|KEGG.REACTION:R\\d+|RHEA:\\d{5}|MetaCyc:[A-Za-z0-9+_.%-:]+|EC:\\d{1,2}(\\.\\d{0,3}){0,3}|GO:\\d{7}|MetaNetX:(MNXR\\d+|EMPTY)|SEED:\\w+|KEGG\\.ORTHOLOGY:K\\d+|EGGNOG:\\w+|PFAM:PF\\d{5}|TIGRFAM:TIGR\\d+|SUPFAM:\\w+|CATH:[1-6]\\.[0-9]+\\.[0-9]+\\.[0-9]+|PANTHER.FAMILY:PTHR\\d{5}(\\:SF\\d{1,3})?)$'},
'id used in OntologyClass': {'pattern': '^[a-zA-Z0-9][a-zA-Z0-9_\\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\\-\\/\\.,]*$'}}
see nmdc_schema/list_id_prefixes_and_patterns.py