GMOD / Apollo3Server

Apollo 3 Server: Grails 4 + Neo4j 3.5 + JB2
BSD 3-Clause "New" or "Revised" License
0 stars 0 forks source link

sync with biolink model #28

Open nathandunn opened 4 years ago

nathandunn commented 4 years ago

Sync against this model: https://github.com/biolink/biolink-model/blob/master/biolink-model.yaml

Graphql references:

nathandunn commented 4 years ago

JSON feature in Apollo 2. This is two mRNA's (same gene) with attached data, which includes GO data, but not gene products or povenance:

{
  "features": [
    {
      "owner": "nathandunn@lbl.gov",
      "parent_name": "Merlin_58_mRNA",
      "dbxrefs": [
        {
          "accession": "2131",
          "db": {
            "name": "asdf"
          }
        },
        {
          "accession": "1212",
          "db": {
            "name": "aasd"
          }
        }
      ],
      "uniquename": "efec3854-efd8-47c0-b41f-0d8ad5e3c17e",
      "synonyms": "[alias2]",
      "go_annotations": [
        {
          "reference": "asdf:12312",
          "geneRelationship": "BFO:0000050",
          "goTerm": "GO:0048183",
          "notes": "[\"asdf\"]",
          "feature": "efec3854-efd8-47c0-b41f-0d8ad5e3c17e",
          "evidenceCodeLabel": "NAS (ECO:0000303): non-traceable author statement",
          "negate": false,
          "aspect": "CC",
          "goTermLabel": "activin AB complex (GO:0048183) ",
          "evidenceCode": "ECO:0000303",
          "id": 315503,
          "withOrFrom": "[\"xxx:1221\",\"asdf:123123\"]"
        },
        {
          "reference": "asdf:123",
          "geneRelationship": "RO:0002327",
          "goTerm": "GO:0004796",
          "notes": "[\"asdf\"]",
          "feature": "efec3854-efd8-47c0-b41f-0d8ad5e3c17e",
          "evidenceCodeLabel": "NAS (ECO:0000303): non-traceable author statement",
          "negate": false,
          "aspect": "MF",
          "goTermLabel": "thromboxane-A synthase activity (GO:0004796) ",
          "evidenceCode": "ECO:0000303",
          "id": 315501,
          "withOrFrom": "[\"asdf:123\"]"
        }
      ],
      "description": "describe2",
      "type": {
        "cv": {
          "name": "sequence"
        },
        "name": "mRNA"
      },
      "parent_type": {
        "cv": {
          "name": "sequence"
        },
        "name": "gene"
      },
      "date_creation": 1597706984613,
      "sequence": "Merlin",
      "children": [
        {
          "owner": "None",
          "parent_name": "Merlin_58_mRNA-00001",
          "uniquename": "efec3854-efd8-47c0-b41f-0d8ad5e3c17e-non_canonical_five_prime_splice_site-14363",
          "type": {
            "cv": {
              "name": "sequence"
            },
            "name": "non_canonical_five_prime_splice_site"
          },
          "parent_type": {
            "cv": {
              "name": "sequence"
            },
            "name": "mRNA"
          },
          "date_creation": 1597706985177,
          "sequence": "Merlin",
          "parent_id": "efec3854-efd8-47c0-b41f-0d8ad5e3c17e",
          "name": "efec3854-efd8-47c0-b41f-0d8ad5e3c17e-non_canonical_five_prime_splice_site-14363",
          "location": {
            "strand": -1,
            "id": 315430,
            "fmin": 14363,
            "fmax": 14363
          },
          "id": 315429,
          "properties": [],
          "date_last_modified": 1597706985236
        },
        {
          "owner": "None",
          "parent_name": "Merlin_58_mRNA-00001",
          "uniquename": "481aebac-24c1-4816-ac9a-ddd697902161",
          "type": {
            "cv": {
              "name": "sequence"
            },
            "name": "exon"
          },
          "parent_type": {
            "cv": {
              "name": "sequence"
            },
            "name": "mRNA"
          },
          "date_creation": 1597706984547,
          "sequence": "Merlin",
          "parent_id": "efec3854-efd8-47c0-b41f-0d8ad5e3c17e",
          "name": "481aebac-24c1-4816-ac9a-ddd697902161",
          "location": {
            "strand": -1,
            "id": 315420,
            "fmin": 13065,
            "fmax": 14296
          },
          "id": 315419,
          "properties": [],
          "date_last_modified": 1597706984596
        },
        {
          "owner": "None",
          "parent_name": "Merlin_58_mRNA-00001",
          "uniquename": "efec3854-efd8-47c0-b41f-0d8ad5e3c17e-CDS",
          "type": {
            "cv": {
              "name": "sequence"
            },
            "name": "CDS"
          },
          "parent_type": {
            "cv": {
              "name": "sequence"
            },
            "name": "mRNA"
          },
          "date_creation": 1597706984870,
          "sequence": "Merlin",
          "parent_id": "efec3854-efd8-47c0-b41f-0d8ad5e3c17e",
          "name": "efec3854-efd8-47c0-b41f-0d8ad5e3c17e-CDS",
          "location": {
            "strand": -1,
            "id": 315427,
            "fmin": 13095,
            "fmax": 13230
          },
          "id": 315426,
          "properties": [],
          "date_last_modified": 1597706984914
        },
        {
          "owner": "None",
          "parent_name": "Merlin_58_mRNA-00001",
          "uniquename": "2abc490c-d813-4295-b86f-f74c012dad5d",
          "type": {
            "cv": {
              "name": "sequence"
            },
            "name": "exon"
          },
          "parent_type": {
            "cv": {
              "name": "sequence"
            },
            "name": "mRNA"
          },
          "date_creation": 1597706984607,
          "sequence": "Merlin",
          "parent_id": "efec3854-efd8-47c0-b41f-0d8ad5e3c17e",
          "name": "2abc490c-d813-4295-b86f-f74c012dad5d",
          "location": {
            "strand": -1,
            "id": 315423,
            "fmin": 14365,
            "fmax": 14796
          },
          "id": 315422,
          "properties": [],
          "date_last_modified": 1597706984884
        }
      ],
      "parent_id": "095526e1-e79f-4db0-bb52-630f4ec126fa",
      "name": "Merlin_58_mRNA-00001",
      "location": {
        "strand": -1,
        "id": 315418,
        "fmin": 13065,
        "fmax": 14796
      },
      "id": 315417,
      "properties": [
        {
          "name": "asd",
          "type": {
            "cv": {
              "name": "feature_property"
            }
          },
          "value": "1212"
        },
        {
          "name": "comment",
          "type": {
            "cv": {
              "name": "feature_property"
            }
          },
          "value": "asdfasdf"
        },
        {
          "name": "aba",
          "type": {
            "cv": {
              "name": "feature_property"
            }
          },
          "value": "1212"
        },
        {
          "name": "comment",
          "type": {
            "cv": {
              "name": "feature_property"
            }
          },
          "value": "asdf"
        }
      ],
      "date_last_modified": 1597966272157
    },
    {
      "owner": "nathandunn@lbl.gov",
      "parent_name": "Merlin_58_mRNA",
      "uniquename": "c84ed483-a2d9-41ed-96f9-96675ab8a477",
      "type": {
        "cv": {
          "name": "sequence"
        },
        "name": "mRNA"
      },
      "parent_type": {
        "cv": {
          "name": "sequence"
        },
        "name": "gene"
      },
      "date_creation": 1597706985424,
      "sequence": "Merlin",
      "children": [
        {
          "owner": "None",
          "parent_name": "Merlin_58_mRNA-00002",
          "uniquename": "c84ed483-a2d9-41ed-96f9-96675ab8a477-non_canonical_five_prime_splice_site-14463",
          "type": {
            "cv": {
              "name": "sequence"
            },
            "name": "non_canonical_five_prime_splice_site"
          },
          "parent_type": {
            "cv": {
              "name": "sequence"
            },
            "name": "mRNA"
          },
          "date_creation": 1597706985545,
          "sequence": "Merlin",
          "parent_id": "c84ed483-a2d9-41ed-96f9-96675ab8a477",
          "name": "c84ed483-a2d9-41ed-96f9-96675ab8a477-non_canonical_five_prime_splice_site-14463",
          "location": {
            "strand": -1,
            "id": 315446,
            "fmin": 14463,
            "fmax": 14463
          },
          "id": 315445,
          "properties": [],
          "date_last_modified": 1597706985595
        },
        {
          "owner": "None",
          "parent_name": "Merlin_58_mRNA-00002",
          "uniquename": "c84ed483-a2d9-41ed-96f9-96675ab8a477-CDS",
          "type": {
            "cv": {
              "name": "sequence"
            },
            "name": "CDS"
          },
          "parent_type": {
            "cv": {
              "name": "sequence"
            },
            "name": "mRNA"
          },
          "date_creation": 1597706985433,
          "sequence": "Merlin",
          "parent_id": "c84ed483-a2d9-41ed-96f9-96675ab8a477",
          "name": "c84ed483-a2d9-41ed-96f9-96675ab8a477-CDS",
          "location": {
            "strand": -1,
            "id": 315442,
            "fmin": 13095,
            "fmax": 13230
          },
          "id": 315441,
          "properties": [],
          "date_last_modified": 1597706985466
        },
        {
          "owner": "None",
          "parent_name": "Merlin_58_mRNA-00002",
          "uniquename": "75fa77c0-6e3b-4843-916e-bfac9bf466b0",
          "type": {
            "cv": {
              "name": "sequence"
            },
            "name": "exon"
          },
          "parent_type": {
            "cv": {
              "name": "sequence"
            },
            "name": "mRNA"
          },
          "date_creation": 1597706985414,
          "sequence": "Merlin",
          "parent_id": "c84ed483-a2d9-41ed-96f9-96675ab8a477",
          "name": "75fa77c0-6e3b-4843-916e-bfac9bf466b0",
          "location": {
            "strand": -1,
            "id": 315436,
            "fmin": 13065,
            "fmax": 14096
          },
          "id": 315435,
          "properties": [],
          "date_last_modified": 1597706985419
        },
        {
          "owner": "None",
          "parent_name": "Merlin_58_mRNA-00002",
          "uniquename": "c7a8bac5-37c6-4d97-9102-d66b56d23b93",
          "type": {
            "cv": {
              "name": "sequence"
            },
            "name": "exon"
          },
          "parent_type": {
            "cv": {
              "name": "sequence"
            },
            "name": "mRNA"
          },
          "date_creation": 1597706985422,
          "sequence": "Merlin",
          "parent_id": "c84ed483-a2d9-41ed-96f9-96675ab8a477",
          "name": "c7a8bac5-37c6-4d97-9102-d66b56d23b93",
          "location": {
            "strand": -1,
            "id": 315439,
            "fmin": 14465,
            "fmax": 14796
          },
          "id": 315438,
          "properties": [],
          "date_last_modified": 1597706985440
        },
        {
          "owner": "None",
          "parent_name": "Merlin_58_mRNA-00002",
          "uniquename": "c84ed483-a2d9-41ed-96f9-96675ab8a477-non_canonical_three_prime_splice_site-14096",
          "type": {
            "cv": {
              "name": "sequence"
            },
            "name": "non_canonical_three_prime_splice_site"
          },
          "parent_type": {
            "cv": {
              "name": "sequence"
            },
            "name": "mRNA"
          },
          "date_creation": 1597706985557,
          "sequence": "Merlin",
          "parent_id": "c84ed483-a2d9-41ed-96f9-96675ab8a477",
          "name": "c84ed483-a2d9-41ed-96f9-96675ab8a477-non_canonical_three_prime_splice_site-14096",
          "location": {
            "strand": -1,
            "id": 315449,
            "fmin": 14096,
            "fmax": 14096
          },
          "id": 315448,
          "properties": [],
          "date_last_modified": 1597706985596
        }
      ],
      "parent_id": "095526e1-e79f-4db0-bb52-630f4ec126fa",
      "name": "Merlin_58_mRNA-00002",
      "location": {
        "strand": -1,
        "id": 315434,
        "fmin": 13065,
        "fmax": 14796
      },
      "id": 315433,
      "properties": [],
      "date_last_modified": 1597706985595
    }
  ],
  "clientToken": "12741115651957302207",
  "track": "Merlin",
  "operation": "get_features"
}

Similarly, this is the equivalent GFF:

##gff-version 3
##sequence-region Merlin 1 172788
Merlin  .   gene    13066   14796   .   -   .   owner=nathandunn@lbl.gov;symbol=asymbol;go_annotations=rank%3D1%3Baspect%3DMF%3Bterm%3DGO:0008143%3Bdb_xref%3Dasdf:123%3Bevidence%3DECO:0000270%3Bgene_product_relationship%3DRO:0002327%3Bnegate%3Dfalse%3Bnote%3D["asdf"]%3Bbased_on%3D["aasdf:213"]%3Blast_updated%3D2020-08-20 23:24:39.175%3Bdate_created%3D2020-08-20 23:24:39.175%2Crank%3D2%3Baspect%3DBP%3Bterm%3DGO:0033304%3Bdb_xref%3DPMID:1231%3Bevidence%3DECO:0007003%3Bgene_product_relationship%3DRO:0002331%3Bnegate%3Dfalse%3Bnote%3D["asfd"]%3Bbased_on%3D["Uniprot:someonumber"]%3Blast_updated%3D2020-08-20 23:24:23.73%3Bdate_created%3D2020-08-20 23:24:23.73;description=asdfasdfasdf;Name=Merlin_58_mRNA;date_creation=2020-08-17;provenance=rank%3D1%3Bfield%3DSYNONYM%3Bdb_xref%3Dasdf:123%3Bevidence%3DECO:0000318%3Bnote%3D["asdf"]%3Bbased_on%3D["asdf:1212"]%3Blast_updated%3D2020-08-20 23:27:29.367%3Bdate_created%3D2020-08-20 23:27:29.367%2Crank%3D2%3Bfield%3DTYPE%3Bdb_xref%3Dasdf:123%3Bevidence%3DECO:0000318%3Bnote%3D["asdfasdf"]%3Bbased_on%3D["asdf:123"]%3Blast_updated%3D2020-08-20 23:27:37.012%3Bdate_created%3D2020-08-20 23:27:37.012;Alias=alias1,alias2,alias3;Note=asdf,zxcvzxcv;asdf=123,123;gene_product=rank%3D1%3Bterm%3Dasdf%3Bdb_xref%3Dasdf:123%3Bevidence%3DECO:0000305%3Balternate%3Dtrue%3Bnote%3D[]%3Bbased_on%3D["asdf:12"]%3Blast_updated%3D2020-08-20 23:25:53.554%3Bdate_created%3D2020-08-20 23:25:53.554%2Crank%3D2%3Bterm%3Dabdc%3Bdb_xref%3Dasdf:123%3Bevidence%3DECO:0007007%3Balternate%3Dfalse%3Bnote%3D["adsf"]%3Bbased_on%3D["asdf:123"]%3Blast_updated%3D2020-08-20 23:24:50.251%3Bdate_created%3D2020-08-20 23:24:50.251%2Crank%3D3%3Bterm%3Dhair%3Bdb_xref%3Dasasdf:123123%3Bevidence%3DECO:0000266%3Balternate%3Dtrue%3Bnote%3D["asfasdfasdf"]%3Bbased_on%3D["asda:1212"]%3Blast_updated%3D2020-08-20 23:25:36.124%3Bdate_created%3D2020-08-20 23:25:36.124;ID=095526e1-e79f-4db0-bb52-630f4ec126fa;date_last_modified=2020-08-20;Dbxref=baasdf:123,asdf:1231
Merlin  .   mRNA    13066   14796   .   -   .   owner=nathandunn@lbl.gov;aba=1212;Parent=095526e1-e79f-4db0-bb52-630f4ec126fa;go_annotations=rank%3D1%3Baspect%3DMF%3Bterm%3DGO:0004796%3Bdb_xref%3Dasdf:123%3Bevidence%3DECO:0000303%3Bgene_product_relationship%3DRO:0002327%3Bnegate%3Dfalse%3Bnote%3D["asdf"]%3Bbased_on%3D["asdf:123"]%3Blast_updated%3D2020-08-20 23:28:35.898%3Bdate_created%3D2020-08-20 23:28:35.898%2Crank%3D2%3Baspect%3DCC%3Bterm%3DGO:0048183%3Bdb_xref%3Dasdf:12312%3Bevidence%3DECO:0000303%3Bgene_product_relationship%3DBFO:0000050%3Bnegate%3Dfalse%3Bnote%3D["asdf"]%3Bbased_on%3D["xxx:1221"%2C"asdf:123123"]%3Blast_updated%3D2020-08-20 23:29:00.387%3Bdate_created%3D2020-08-20 23:29:00.387;description=describe2;Name=Merlin_58_mRNA-00001;date_creation=2020-08-17;provenance=rank%3D1%3Bfield%3DNAME%3Bdb_xref%3Dasdf:123%3Bevidence%3DECO:0007005%3Bnote%3D["asdf"]%3Bbased_on%3D["asdf:123"]%3Blast_updated%3D2020-08-20 23:30:47.757%3Bdate_created%3D2020-08-20 23:30:47.757;asd=1212;Alias=alias2;Note=asdf,asdfasdf;gene_product=rank%3D1%3Bterm%3Db%3Bdb_xref%3Dasdf:123%3Bevidence%3DECO:0000315%3Balternate%3Dfalse%3Bnote%3D["asdf"]%3Bbased_on%3D["asdf:123"]%3Blast_updated%3D2020-08-20 23:30:38.138%3Bdate_created%3D2020-08-20 23:30:38.138%2Crank%3D2%3Bterm%3Dasdf%3Bdb_xref%3Dasdf:123%3Bevidence%3DECO:0000321%3Balternate%3Dfalse%3Bnote%3D["asdf"]%3Bbased_on%3D["asdf:12312"]%3Blast_updated%3D2020-08-20 23:30:16.566%3Bdate_created%3D2020-08-20 23:30:16.566;ID=efec3854-efd8-47c0-b41f-0d8ad5e3c17e;date_last_modified=2020-08-20;Dbxref=asdf:2131,aasd:1212
Merlin  .   non_canonical_five_prime_splice_site    14364   14364   .   -   .   Parent=efec3854-efd8-47c0-b41f-0d8ad5e3c17e;ID=efec3854-efd8-47c0-b41f-0d8ad5e3c17e-non_canonical_five_prime_splice_site-14363;Name=efec3854-efd8-47c0-b41f-0d8ad5e3c17e-non_canonical_five_prime_splice_site-14363
Merlin  .   exon    13066   14296   .   -   .   Parent=efec3854-efd8-47c0-b41f-0d8ad5e3c17e;ID=481aebac-24c1-4816-ac9a-ddd697902161;Name=481aebac-24c1-4816-ac9a-ddd697902161
Merlin  .   CDS 13096   13230   .   -   0   Parent=efec3854-efd8-47c0-b41f-0d8ad5e3c17e;ID=efec3854-efd8-47c0-b41f-0d8ad5e3c17e-CDS;Name=efec3854-efd8-47c0-b41f-0d8ad5e3c17e-CDS
Merlin  .   exon    14366   14796   .   -   .   Parent=efec3854-efd8-47c0-b41f-0d8ad5e3c17e;ID=2abc490c-d813-4295-b86f-f74c012dad5d;Name=2abc490c-d813-4295-b86f-f74c012dad5d
Merlin  .   mRNA    13066   14796   .   -   .   owner=nathandunn@lbl.gov;Parent=095526e1-e79f-4db0-bb52-630f4ec126fa;ID=c84ed483-a2d9-41ed-96f9-96675ab8a477;date_last_modified=2020-08-17;Name=Merlin_58_mRNA-00002;date_creation=2020-08-17
Merlin  .   non_canonical_five_prime_splice_site    14464   14464   .   -   .   Parent=c84ed483-a2d9-41ed-96f9-96675ab8a477;ID=c84ed483-a2d9-41ed-96f9-96675ab8a477-non_canonical_five_prime_splice_site-14463;Name=c84ed483-a2d9-41ed-96f9-96675ab8a477-non_canonical_five_prime_splice_site-14463
Merlin  .   CDS 13096   13230   .   -   0   Parent=c84ed483-a2d9-41ed-96f9-96675ab8a477;ID=c84ed483-a2d9-41ed-96f9-96675ab8a477-CDS;Name=c84ed483-a2d9-41ed-96f9-96675ab8a477-CDS
Merlin  .   exon    13066   14096   .   -   .   Parent=c84ed483-a2d9-41ed-96f9-96675ab8a477;ID=75fa77c0-6e3b-4843-916e-bfac9bf466b0;Name=75fa77c0-6e3b-4843-916e-bfac9bf466b0
Merlin  .   exon    14466   14796   .   -   .   Parent=c84ed483-a2d9-41ed-96f9-96675ab8a477;ID=c7a8bac5-37c6-4d97-9102-d66b56d23b93;Name=c7a8bac5-37c6-4d97-9102-d66b56d23b93
Merlin  .   non_canonical_three_prime_splice_site   14097   14097   .   -   .   Parent=c84ed483-a2d9-41ed-96f9-96675ab8a477;ID=c84ed483-a2d9-41ed-96f9-96675ab8a477-non_canonical_three_prime_splice_site-14096;Name=c84ed483-a2d9-41ed-96f9-96675ab8a477-non_canonical_three_prime_splice_site-14096
###
nathandunn commented 3 years ago

Things to change before we do this:

     "go_annotations": [
        {
          "reference": "asdf:12312",
          "geneRelationship": "BFO:0000050",
          "goTerm": "GO:0048183",
          "notes": "[\"asdf\"]",
          "feature": "efec3854-efd8-47c0-b41f-0d8ad5e3c17e",
          "evidenceCodeLabel": "NAS (ECO:0000303): non-traceable author statement",
          "negate": false,
          "aspect": "CC",
          "goTermLabel": "activin AB complex (GO:0048183) ",
          "evidenceCode": "ECO:0000303",
          "id": 315503,
          "withOrFrom": "[\"xxx:1221\",\"asdf:123123\"]"
        },

Should use more allowed values within the GO via the GPAD spec. The JSON Array string should be a full array.

Should be more gene centric with allowing for more functional annotations in the genes.

Need to remove internal long ids.

nathandunn commented 3 years ago

Note: the "parent" class with functional annotations is entirely missing from this, because this is for JBrowse rendering, which is mRNA specific for visualization.