Open anngvu opened 1 month ago
Random thoughts 1) We can pull out the schema.org metadata (particularly in the zenodo datasets) to base our structure off theirs. 2) potentially use the embedded metadata as a mechanism for importing details
examples:
<script type='application/ld+json'>{
"@context": "http://schema.org",
"@id": "https://doi.org/10.5281/zenodo.5883313",
"@type": "https://schema.org/Dataset",
"author": [
{
"@id": "https://orcid.org/0000-0003-0664-9700",
"@type": "Person",
"affiliation": [
{
"@type": "Organization",
"name": "Fondazione IRCCS Istituto Neurologico Carlo Besta"
}
],
"familyName": "Vetrano",
"givenName": "Ignazio Gaspare",
"name": "Vetrano, Ignazio Gaspare"
},
{
"@id": "https://orcid.org/0000-0001-7359-8558",
"@type": "Person",
"affiliation": [
{
"@type": "Organization",
"name": "Universit\u00e0 degli Studi di Milano"
}
],
"familyName": "Dei Cas",
"givenName": "Michele",
"name": "Dei Cas, Michele"
},
{
"@id": "https://orcid.org/0000-0003-4744-9377",
"@type": "Person",
"affiliation": [
{
"@type": "Organization",
"name": "Fondazione IRCCS Istituto Neurologico Carlo Besta"
}
],
"familyName": "Eoli",
"givenName": "Marica",
"name": "Eoli, Marica"
},
{
"@id": "https://orcid.org/0000-0001-6811-6112",
"@type": "Person",
"affiliation": [
{
"@type": "Organization",
"name": "Fondazione IRCCS Istituto Neurologico Carlo Besta"
}
],
"familyName": "Potenza",
"givenName": "Antonella",
"name": "Potenza, Antonella"
},
{
"@id": "https://orcid.org/0000-0001-5752-1374",
"@type": "Person",
"affiliation": [
{
"@type": "Organization",
"name": "Fondazione IRCCS Istituto Neurologico Carlo Besta"
}
],
"familyName": "Carrozzini",
"givenName": "Tatiana",
"name": "Carrozzini, Tatiana"
},
{
"@id": "https://orcid.org/0000-0001-8483-3509",
"@type": "Person",
"affiliation": [
{
"@type": "Organization",
"name": "Fondazione IRCCS Istituto Neurologico Carlo Besta"
}
],
"familyName": "Gorla",
"givenName": "Gemma",
"name": "Gorla, Gemma"
},
{
"@id": "https://orcid.org/0000-0002-3186-8860",
"@type": "Person",
"affiliation": [
{
"@type": "Organization",
"name": "Universit\u00e0 degli Studi di Milano"
}
],
"familyName": "Paroni",
"givenName": "Rita",
"name": "Paroni, Rita"
},
{
"@id": "https://orcid.org/0000-0002-6300-5867",
"@type": "Person",
"affiliation": [
{
"@type": "Organization",
"name": "Universit\u00e0 degli Studi di Milano"
}
],
"familyName": "Ghidoni",
"givenName": "Riccardo",
"name": "Ghidoni, Riccardo"
},
{
"@id": "https://orcid.org/0000-0001-6751-5031",
"@type": "Person",
"affiliation": [
{
"@type": "Organization",
"name": "Fondazione IRCCS Istituto Neurologico Carlo Besta"
}
],
"familyName": "Gatti",
"givenName": "Laura",
"name": "Gatti, Laura"
}
],
"creator": [
{
"@id": "https://orcid.org/0000-0003-0664-9700",
"@type": "Person",
"affiliation": [
{
"@type": "Organization",
"name": "Fondazione IRCCS Istituto Neurologico Carlo Besta"
}
],
"familyName": "Vetrano",
"givenName": "Ignazio Gaspare",
"name": "Vetrano, Ignazio Gaspare"
},
{
"@id": "https://orcid.org/0000-0001-7359-8558",
"@type": "Person",
"affiliation": [
{
"@type": "Organization",
"name": "Universit\u00e0 degli Studi di Milano"
}
],
"familyName": "Dei Cas",
"givenName": "Michele",
"name": "Dei Cas, Michele"
},
{
"@id": "https://orcid.org/0000-0003-4744-9377",
"@type": "Person",
"affiliation": [
{
"@type": "Organization",
"name": "Fondazione IRCCS Istituto Neurologico Carlo Besta"
}
],
"familyName": "Eoli",
"givenName": "Marica",
"name": "Eoli, Marica"
},
{
"@id": "https://orcid.org/0000-0001-6811-6112",
"@type": "Person",
"affiliation": [
{
"@type": "Organization",
"name": "Fondazione IRCCS Istituto Neurologico Carlo Besta"
}
],
"familyName": "Potenza",
"givenName": "Antonella",
"name": "Potenza, Antonella"
},
{
"@id": "https://orcid.org/0000-0001-5752-1374",
"@type": "Person",
"affiliation": [
{
"@type": "Organization",
"name": "Fondazione IRCCS Istituto Neurologico Carlo Besta"
}
],
"familyName": "Carrozzini",
"givenName": "Tatiana",
"name": "Carrozzini, Tatiana"
},
{
"@id": "https://orcid.org/0000-0001-8483-3509",
"@type": "Person",
"affiliation": [
{
"@type": "Organization",
"name": "Fondazione IRCCS Istituto Neurologico Carlo Besta"
}
],
"familyName": "Gorla",
"givenName": "Gemma",
"name": "Gorla, Gemma"
},
{
"@id": "https://orcid.org/0000-0002-3186-8860",
"@type": "Person",
"affiliation": [
{
"@type": "Organization",
"name": "Universit\u00e0 degli Studi di Milano"
}
],
"familyName": "Paroni",
"givenName": "Rita",
"name": "Paroni, Rita"
},
{
"@id": "https://orcid.org/0000-0002-6300-5867",
"@type": "Person",
"affiliation": [
{
"@type": "Organization",
"name": "Universit\u00e0 degli Studi di Milano"
}
],
"familyName": "Ghidoni",
"givenName": "Riccardo",
"name": "Ghidoni, Riccardo"
},
{
"@id": "https://orcid.org/0000-0001-6751-5031",
"@type": "Person",
"affiliation": [
{
"@type": "Organization",
"name": "Fondazione IRCCS Istituto Neurologico Carlo Besta"
}
],
"familyName": "Gatti",
"givenName": "Laura",
"name": "Gatti, Laura"
}
],
"dateCreated": "2022-01-20T12:45:56.958153+00:00",
"dateModified": "2022-01-20T13:48:56.704381+00:00",
"datePublished": "2022-01-20",
"description": "\u003cp\u003e\u003cstrong\u003eLipid species derived from analyses, anonymized sample series\u003c/strong\u003e\u003c/p\u003e",
"identifier": "https://doi.org/10.5281/zenodo.5883313",
"keywords": "ceramide, phospholipids, lipidomic, neurofibroma, plexiform neurofibroma, PNST, schwannoma, sphingolipids",
"name": "dataset related to article \"The Lipid Asset Is Unbalanced in Peripheral Nerve Sheath Tumors\"",
"publisher": {
"@type": "Organization",
"name": "Zenodo"
},
"url": "https://zenodo.org/records/5883313"
}</script>
example 2:
<script type='application/ld+json'>{
"@context": "http://schema.org",
"@id": "https://doi.org/10.5281/zenodo.56735",
"@type": "https://schema.org/Dataset",
"author": [
{
"@type": "Person",
"affiliation": [
{
"@type": "Organization",
"name": "University of Pennsylvania"
}
],
"familyName": "Way",
"givenName": "Gregory",
"name": "Way, Gregory"
}
],
"contentSize": "531.82 MB",
"creator": [
{
"@type": "Person",
"affiliation": [
{
"@type": "Organization",
"name": "University of Pennsylvania"
}
],
"familyName": "Way",
"givenName": "Gregory",
"name": "Way, Gregory"
}
],
"dateCreated": "2016-06-30T16:38:24+00:00",
"dateModified": "2020-01-24T19:24:03.056777+00:00",
"datePublished": "2016-06-30",
"description": "\u003cp\u003eAll data is publicly available and downloaded from UCSC Xena\u003cbr /\u003e\nhttps://genome-cancer.ucsc.edu/proj/site/xena/datapages/?cohort=TCGA%20Pan-Cancer\u003c/p\u003e\n\n\u003cp\u003eBecause the database is continously updated and to ensure reproducibility,\u0026nbsp;access data from this cached download.\u003c/p\u003e\n\n\u003cp\u003eRNAseq and Clincal data were downloaded on 8 March 2016\u003cbr /\u003e\nMutation data was downloaded on 12 June 2015\u003c/p\u003e",
"distribution": [
{
"@type": "DataDownload",
"contentUrl": "https://zenodo.org/api/records/56735/files/gbm_classifier_data.tar.gz/content",
"encodingFormat": "application/gzip"
}
],
"identifier": "https://doi.org/10.5281/zenodo.56735",
"license": "https://creativecommons.org/publicdomain/zero/1.0/legalcode",
"name": "Data used for training glioblastoma NF1 classifier",
"publisher": {
"@type": "Organization",
"name": "Zenodo"
},
"size": "531.82 MB",
"url": "https://zenodo.org/records/56735"
}</script>
We have two arms of dataset curation. One is to use existing Synapse resources like manifest.csv and/or Wikis and/or tables to create dataset metadata; let's call this the internal arm. The other is to curate external datasets (see Robert's idea that we should enable thangs.com-like discovery). Here are some identified, actually-interesting datasets for the external arm.
Note: These would have to go into separate view than the current dataset collections (which requires Synapse Datasets), so ultimately we use another table to union the two.