Open nathandunn opened 4 years ago
JSON feature in Apollo 2. This is two mRNA's (same gene) with attached data, which includes GO data, but not gene products or povenance:
{
"features": [
{
"owner": "nathandunn@lbl.gov",
"parent_name": "Merlin_58_mRNA",
"dbxrefs": [
{
"accession": "2131",
"db": {
"name": "asdf"
}
},
{
"accession": "1212",
"db": {
"name": "aasd"
}
}
],
"uniquename": "efec3854-efd8-47c0-b41f-0d8ad5e3c17e",
"synonyms": "[alias2]",
"go_annotations": [
{
"reference": "asdf:12312",
"geneRelationship": "BFO:0000050",
"goTerm": "GO:0048183",
"notes": "[\"asdf\"]",
"feature": "efec3854-efd8-47c0-b41f-0d8ad5e3c17e",
"evidenceCodeLabel": "NAS (ECO:0000303): non-traceable author statement",
"negate": false,
"aspect": "CC",
"goTermLabel": "activin AB complex (GO:0048183) ",
"evidenceCode": "ECO:0000303",
"id": 315503,
"withOrFrom": "[\"xxx:1221\",\"asdf:123123\"]"
},
{
"reference": "asdf:123",
"geneRelationship": "RO:0002327",
"goTerm": "GO:0004796",
"notes": "[\"asdf\"]",
"feature": "efec3854-efd8-47c0-b41f-0d8ad5e3c17e",
"evidenceCodeLabel": "NAS (ECO:0000303): non-traceable author statement",
"negate": false,
"aspect": "MF",
"goTermLabel": "thromboxane-A synthase activity (GO:0004796) ",
"evidenceCode": "ECO:0000303",
"id": 315501,
"withOrFrom": "[\"asdf:123\"]"
}
],
"description": "describe2",
"type": {
"cv": {
"name": "sequence"
},
"name": "mRNA"
},
"parent_type": {
"cv": {
"name": "sequence"
},
"name": "gene"
},
"date_creation": 1597706984613,
"sequence": "Merlin",
"children": [
{
"owner": "None",
"parent_name": "Merlin_58_mRNA-00001",
"uniquename": "efec3854-efd8-47c0-b41f-0d8ad5e3c17e-non_canonical_five_prime_splice_site-14363",
"type": {
"cv": {
"name": "sequence"
},
"name": "non_canonical_five_prime_splice_site"
},
"parent_type": {
"cv": {
"name": "sequence"
},
"name": "mRNA"
},
"date_creation": 1597706985177,
"sequence": "Merlin",
"parent_id": "efec3854-efd8-47c0-b41f-0d8ad5e3c17e",
"name": "efec3854-efd8-47c0-b41f-0d8ad5e3c17e-non_canonical_five_prime_splice_site-14363",
"location": {
"strand": -1,
"id": 315430,
"fmin": 14363,
"fmax": 14363
},
"id": 315429,
"properties": [],
"date_last_modified": 1597706985236
},
{
"owner": "None",
"parent_name": "Merlin_58_mRNA-00001",
"uniquename": "481aebac-24c1-4816-ac9a-ddd697902161",
"type": {
"cv": {
"name": "sequence"
},
"name": "exon"
},
"parent_type": {
"cv": {
"name": "sequence"
},
"name": "mRNA"
},
"date_creation": 1597706984547,
"sequence": "Merlin",
"parent_id": "efec3854-efd8-47c0-b41f-0d8ad5e3c17e",
"name": "481aebac-24c1-4816-ac9a-ddd697902161",
"location": {
"strand": -1,
"id": 315420,
"fmin": 13065,
"fmax": 14296
},
"id": 315419,
"properties": [],
"date_last_modified": 1597706984596
},
{
"owner": "None",
"parent_name": "Merlin_58_mRNA-00001",
"uniquename": "efec3854-efd8-47c0-b41f-0d8ad5e3c17e-CDS",
"type": {
"cv": {
"name": "sequence"
},
"name": "CDS"
},
"parent_type": {
"cv": {
"name": "sequence"
},
"name": "mRNA"
},
"date_creation": 1597706984870,
"sequence": "Merlin",
"parent_id": "efec3854-efd8-47c0-b41f-0d8ad5e3c17e",
"name": "efec3854-efd8-47c0-b41f-0d8ad5e3c17e-CDS",
"location": {
"strand": -1,
"id": 315427,
"fmin": 13095,
"fmax": 13230
},
"id": 315426,
"properties": [],
"date_last_modified": 1597706984914
},
{
"owner": "None",
"parent_name": "Merlin_58_mRNA-00001",
"uniquename": "2abc490c-d813-4295-b86f-f74c012dad5d",
"type": {
"cv": {
"name": "sequence"
},
"name": "exon"
},
"parent_type": {
"cv": {
"name": "sequence"
},
"name": "mRNA"
},
"date_creation": 1597706984607,
"sequence": "Merlin",
"parent_id": "efec3854-efd8-47c0-b41f-0d8ad5e3c17e",
"name": "2abc490c-d813-4295-b86f-f74c012dad5d",
"location": {
"strand": -1,
"id": 315423,
"fmin": 14365,
"fmax": 14796
},
"id": 315422,
"properties": [],
"date_last_modified": 1597706984884
}
],
"parent_id": "095526e1-e79f-4db0-bb52-630f4ec126fa",
"name": "Merlin_58_mRNA-00001",
"location": {
"strand": -1,
"id": 315418,
"fmin": 13065,
"fmax": 14796
},
"id": 315417,
"properties": [
{
"name": "asd",
"type": {
"cv": {
"name": "feature_property"
}
},
"value": "1212"
},
{
"name": "comment",
"type": {
"cv": {
"name": "feature_property"
}
},
"value": "asdfasdf"
},
{
"name": "aba",
"type": {
"cv": {
"name": "feature_property"
}
},
"value": "1212"
},
{
"name": "comment",
"type": {
"cv": {
"name": "feature_property"
}
},
"value": "asdf"
}
],
"date_last_modified": 1597966272157
},
{
"owner": "nathandunn@lbl.gov",
"parent_name": "Merlin_58_mRNA",
"uniquename": "c84ed483-a2d9-41ed-96f9-96675ab8a477",
"type": {
"cv": {
"name": "sequence"
},
"name": "mRNA"
},
"parent_type": {
"cv": {
"name": "sequence"
},
"name": "gene"
},
"date_creation": 1597706985424,
"sequence": "Merlin",
"children": [
{
"owner": "None",
"parent_name": "Merlin_58_mRNA-00002",
"uniquename": "c84ed483-a2d9-41ed-96f9-96675ab8a477-non_canonical_five_prime_splice_site-14463",
"type": {
"cv": {
"name": "sequence"
},
"name": "non_canonical_five_prime_splice_site"
},
"parent_type": {
"cv": {
"name": "sequence"
},
"name": "mRNA"
},
"date_creation": 1597706985545,
"sequence": "Merlin",
"parent_id": "c84ed483-a2d9-41ed-96f9-96675ab8a477",
"name": "c84ed483-a2d9-41ed-96f9-96675ab8a477-non_canonical_five_prime_splice_site-14463",
"location": {
"strand": -1,
"id": 315446,
"fmin": 14463,
"fmax": 14463
},
"id": 315445,
"properties": [],
"date_last_modified": 1597706985595
},
{
"owner": "None",
"parent_name": "Merlin_58_mRNA-00002",
"uniquename": "c84ed483-a2d9-41ed-96f9-96675ab8a477-CDS",
"type": {
"cv": {
"name": "sequence"
},
"name": "CDS"
},
"parent_type": {
"cv": {
"name": "sequence"
},
"name": "mRNA"
},
"date_creation": 1597706985433,
"sequence": "Merlin",
"parent_id": "c84ed483-a2d9-41ed-96f9-96675ab8a477",
"name": "c84ed483-a2d9-41ed-96f9-96675ab8a477-CDS",
"location": {
"strand": -1,
"id": 315442,
"fmin": 13095,
"fmax": 13230
},
"id": 315441,
"properties": [],
"date_last_modified": 1597706985466
},
{
"owner": "None",
"parent_name": "Merlin_58_mRNA-00002",
"uniquename": "75fa77c0-6e3b-4843-916e-bfac9bf466b0",
"type": {
"cv": {
"name": "sequence"
},
"name": "exon"
},
"parent_type": {
"cv": {
"name": "sequence"
},
"name": "mRNA"
},
"date_creation": 1597706985414,
"sequence": "Merlin",
"parent_id": "c84ed483-a2d9-41ed-96f9-96675ab8a477",
"name": "75fa77c0-6e3b-4843-916e-bfac9bf466b0",
"location": {
"strand": -1,
"id": 315436,
"fmin": 13065,
"fmax": 14096
},
"id": 315435,
"properties": [],
"date_last_modified": 1597706985419
},
{
"owner": "None",
"parent_name": "Merlin_58_mRNA-00002",
"uniquename": "c7a8bac5-37c6-4d97-9102-d66b56d23b93",
"type": {
"cv": {
"name": "sequence"
},
"name": "exon"
},
"parent_type": {
"cv": {
"name": "sequence"
},
"name": "mRNA"
},
"date_creation": 1597706985422,
"sequence": "Merlin",
"parent_id": "c84ed483-a2d9-41ed-96f9-96675ab8a477",
"name": "c7a8bac5-37c6-4d97-9102-d66b56d23b93",
"location": {
"strand": -1,
"id": 315439,
"fmin": 14465,
"fmax": 14796
},
"id": 315438,
"properties": [],
"date_last_modified": 1597706985440
},
{
"owner": "None",
"parent_name": "Merlin_58_mRNA-00002",
"uniquename": "c84ed483-a2d9-41ed-96f9-96675ab8a477-non_canonical_three_prime_splice_site-14096",
"type": {
"cv": {
"name": "sequence"
},
"name": "non_canonical_three_prime_splice_site"
},
"parent_type": {
"cv": {
"name": "sequence"
},
"name": "mRNA"
},
"date_creation": 1597706985557,
"sequence": "Merlin",
"parent_id": "c84ed483-a2d9-41ed-96f9-96675ab8a477",
"name": "c84ed483-a2d9-41ed-96f9-96675ab8a477-non_canonical_three_prime_splice_site-14096",
"location": {
"strand": -1,
"id": 315449,
"fmin": 14096,
"fmax": 14096
},
"id": 315448,
"properties": [],
"date_last_modified": 1597706985596
}
],
"parent_id": "095526e1-e79f-4db0-bb52-630f4ec126fa",
"name": "Merlin_58_mRNA-00002",
"location": {
"strand": -1,
"id": 315434,
"fmin": 13065,
"fmax": 14796
},
"id": 315433,
"properties": [],
"date_last_modified": 1597706985595
}
],
"clientToken": "12741115651957302207",
"track": "Merlin",
"operation": "get_features"
}
Similarly, this is the equivalent GFF:
##gff-version 3
##sequence-region Merlin 1 172788
Merlin . gene 13066 14796 . - . owner=nathandunn@lbl.gov;symbol=asymbol;go_annotations=rank%3D1%3Baspect%3DMF%3Bterm%3DGO:0008143%3Bdb_xref%3Dasdf:123%3Bevidence%3DECO:0000270%3Bgene_product_relationship%3DRO:0002327%3Bnegate%3Dfalse%3Bnote%3D["asdf"]%3Bbased_on%3D["aasdf:213"]%3Blast_updated%3D2020-08-20 23:24:39.175%3Bdate_created%3D2020-08-20 23:24:39.175%2Crank%3D2%3Baspect%3DBP%3Bterm%3DGO:0033304%3Bdb_xref%3DPMID:1231%3Bevidence%3DECO:0007003%3Bgene_product_relationship%3DRO:0002331%3Bnegate%3Dfalse%3Bnote%3D["asfd"]%3Bbased_on%3D["Uniprot:someonumber"]%3Blast_updated%3D2020-08-20 23:24:23.73%3Bdate_created%3D2020-08-20 23:24:23.73;description=asdfasdfasdf;Name=Merlin_58_mRNA;date_creation=2020-08-17;provenance=rank%3D1%3Bfield%3DSYNONYM%3Bdb_xref%3Dasdf:123%3Bevidence%3DECO:0000318%3Bnote%3D["asdf"]%3Bbased_on%3D["asdf:1212"]%3Blast_updated%3D2020-08-20 23:27:29.367%3Bdate_created%3D2020-08-20 23:27:29.367%2Crank%3D2%3Bfield%3DTYPE%3Bdb_xref%3Dasdf:123%3Bevidence%3DECO:0000318%3Bnote%3D["asdfasdf"]%3Bbased_on%3D["asdf:123"]%3Blast_updated%3D2020-08-20 23:27:37.012%3Bdate_created%3D2020-08-20 23:27:37.012;Alias=alias1,alias2,alias3;Note=asdf,zxcvzxcv;asdf=123,123;gene_product=rank%3D1%3Bterm%3Dasdf%3Bdb_xref%3Dasdf:123%3Bevidence%3DECO:0000305%3Balternate%3Dtrue%3Bnote%3D[]%3Bbased_on%3D["asdf:12"]%3Blast_updated%3D2020-08-20 23:25:53.554%3Bdate_created%3D2020-08-20 23:25:53.554%2Crank%3D2%3Bterm%3Dabdc%3Bdb_xref%3Dasdf:123%3Bevidence%3DECO:0007007%3Balternate%3Dfalse%3Bnote%3D["adsf"]%3Bbased_on%3D["asdf:123"]%3Blast_updated%3D2020-08-20 23:24:50.251%3Bdate_created%3D2020-08-20 23:24:50.251%2Crank%3D3%3Bterm%3Dhair%3Bdb_xref%3Dasasdf:123123%3Bevidence%3DECO:0000266%3Balternate%3Dtrue%3Bnote%3D["asfasdfasdf"]%3Bbased_on%3D["asda:1212"]%3Blast_updated%3D2020-08-20 23:25:36.124%3Bdate_created%3D2020-08-20 23:25:36.124;ID=095526e1-e79f-4db0-bb52-630f4ec126fa;date_last_modified=2020-08-20;Dbxref=baasdf:123,asdf:1231
Merlin . mRNA 13066 14796 . - . owner=nathandunn@lbl.gov;aba=1212;Parent=095526e1-e79f-4db0-bb52-630f4ec126fa;go_annotations=rank%3D1%3Baspect%3DMF%3Bterm%3DGO:0004796%3Bdb_xref%3Dasdf:123%3Bevidence%3DECO:0000303%3Bgene_product_relationship%3DRO:0002327%3Bnegate%3Dfalse%3Bnote%3D["asdf"]%3Bbased_on%3D["asdf:123"]%3Blast_updated%3D2020-08-20 23:28:35.898%3Bdate_created%3D2020-08-20 23:28:35.898%2Crank%3D2%3Baspect%3DCC%3Bterm%3DGO:0048183%3Bdb_xref%3Dasdf:12312%3Bevidence%3DECO:0000303%3Bgene_product_relationship%3DBFO:0000050%3Bnegate%3Dfalse%3Bnote%3D["asdf"]%3Bbased_on%3D["xxx:1221"%2C"asdf:123123"]%3Blast_updated%3D2020-08-20 23:29:00.387%3Bdate_created%3D2020-08-20 23:29:00.387;description=describe2;Name=Merlin_58_mRNA-00001;date_creation=2020-08-17;provenance=rank%3D1%3Bfield%3DNAME%3Bdb_xref%3Dasdf:123%3Bevidence%3DECO:0007005%3Bnote%3D["asdf"]%3Bbased_on%3D["asdf:123"]%3Blast_updated%3D2020-08-20 23:30:47.757%3Bdate_created%3D2020-08-20 23:30:47.757;asd=1212;Alias=alias2;Note=asdf,asdfasdf;gene_product=rank%3D1%3Bterm%3Db%3Bdb_xref%3Dasdf:123%3Bevidence%3DECO:0000315%3Balternate%3Dfalse%3Bnote%3D["asdf"]%3Bbased_on%3D["asdf:123"]%3Blast_updated%3D2020-08-20 23:30:38.138%3Bdate_created%3D2020-08-20 23:30:38.138%2Crank%3D2%3Bterm%3Dasdf%3Bdb_xref%3Dasdf:123%3Bevidence%3DECO:0000321%3Balternate%3Dfalse%3Bnote%3D["asdf"]%3Bbased_on%3D["asdf:12312"]%3Blast_updated%3D2020-08-20 23:30:16.566%3Bdate_created%3D2020-08-20 23:30:16.566;ID=efec3854-efd8-47c0-b41f-0d8ad5e3c17e;date_last_modified=2020-08-20;Dbxref=asdf:2131,aasd:1212
Merlin . non_canonical_five_prime_splice_site 14364 14364 . - . Parent=efec3854-efd8-47c0-b41f-0d8ad5e3c17e;ID=efec3854-efd8-47c0-b41f-0d8ad5e3c17e-non_canonical_five_prime_splice_site-14363;Name=efec3854-efd8-47c0-b41f-0d8ad5e3c17e-non_canonical_five_prime_splice_site-14363
Merlin . exon 13066 14296 . - . Parent=efec3854-efd8-47c0-b41f-0d8ad5e3c17e;ID=481aebac-24c1-4816-ac9a-ddd697902161;Name=481aebac-24c1-4816-ac9a-ddd697902161
Merlin . CDS 13096 13230 . - 0 Parent=efec3854-efd8-47c0-b41f-0d8ad5e3c17e;ID=efec3854-efd8-47c0-b41f-0d8ad5e3c17e-CDS;Name=efec3854-efd8-47c0-b41f-0d8ad5e3c17e-CDS
Merlin . exon 14366 14796 . - . Parent=efec3854-efd8-47c0-b41f-0d8ad5e3c17e;ID=2abc490c-d813-4295-b86f-f74c012dad5d;Name=2abc490c-d813-4295-b86f-f74c012dad5d
Merlin . mRNA 13066 14796 . - . owner=nathandunn@lbl.gov;Parent=095526e1-e79f-4db0-bb52-630f4ec126fa;ID=c84ed483-a2d9-41ed-96f9-96675ab8a477;date_last_modified=2020-08-17;Name=Merlin_58_mRNA-00002;date_creation=2020-08-17
Merlin . non_canonical_five_prime_splice_site 14464 14464 . - . Parent=c84ed483-a2d9-41ed-96f9-96675ab8a477;ID=c84ed483-a2d9-41ed-96f9-96675ab8a477-non_canonical_five_prime_splice_site-14463;Name=c84ed483-a2d9-41ed-96f9-96675ab8a477-non_canonical_five_prime_splice_site-14463
Merlin . CDS 13096 13230 . - 0 Parent=c84ed483-a2d9-41ed-96f9-96675ab8a477;ID=c84ed483-a2d9-41ed-96f9-96675ab8a477-CDS;Name=c84ed483-a2d9-41ed-96f9-96675ab8a477-CDS
Merlin . exon 13066 14096 . - . Parent=c84ed483-a2d9-41ed-96f9-96675ab8a477;ID=75fa77c0-6e3b-4843-916e-bfac9bf466b0;Name=75fa77c0-6e3b-4843-916e-bfac9bf466b0
Merlin . exon 14466 14796 . - . Parent=c84ed483-a2d9-41ed-96f9-96675ab8a477;ID=c7a8bac5-37c6-4d97-9102-d66b56d23b93;Name=c7a8bac5-37c6-4d97-9102-d66b56d23b93
Merlin . non_canonical_three_prime_splice_site 14097 14097 . - . Parent=c84ed483-a2d9-41ed-96f9-96675ab8a477;ID=c84ed483-a2d9-41ed-96f9-96675ab8a477-non_canonical_three_prime_splice_site-14096;Name=c84ed483-a2d9-41ed-96f9-96675ab8a477-non_canonical_three_prime_splice_site-14096
###
Things to change before we do this:
"go_annotations": [
{
"reference": "asdf:12312",
"geneRelationship": "BFO:0000050",
"goTerm": "GO:0048183",
"notes": "[\"asdf\"]",
"feature": "efec3854-efd8-47c0-b41f-0d8ad5e3c17e",
"evidenceCodeLabel": "NAS (ECO:0000303): non-traceable author statement",
"negate": false,
"aspect": "CC",
"goTermLabel": "activin AB complex (GO:0048183) ",
"evidenceCode": "ECO:0000303",
"id": 315503,
"withOrFrom": "[\"xxx:1221\",\"asdf:123123\"]"
},
Should use more allowed values within the GO via the GPAD spec. The JSON Array string should be a full array.
Should be more gene centric with allowing for more functional annotations in the genes.
Need to remove internal long ids.
Note: the "parent" class with functional annotations is entirely missing from this, because this is for JBrowse rendering, which is mRNA specific for visualization.
Sync against this model: https://github.com/biolink/biolink-model/blob/master/biolink-model.yaml
Graphql references:
https://github.com/biolink/biolink-model/blob/master/biolink-model.graphql#L687
https://biolink.github.io/biolink-model/docs/Gene.html
[x] generate JSON Feature to reflect the Apollo model
[x] describe tree
[ ] upload biolink schema
[ ] do comparison