Closed MeLonJ10 closed 5 years ago
@MeLonJ10 As I know, the xml file is annotation for Aperio's selected ROIs. For runnning the code, it need svs and .json file. Do you know where to download the .json for sorting images? I can not find it.... Thanks a lot.
Once you add the images to your cart you can download both the manifest and the json file. xml are indeed Aperio's selected ROIs.
@ncoudray I am trying to run the DeepPATH code, but the input json file is not correct. Could you please share two or three .svs example files and corresponding .json files? I also want to know whether the gene mutation information is in the json file? or am i need to download other files for mutation data?
Please help me check that I am choosing the right data. query
Thanks a lot in advance.
I think the website(https://portal.gdc.cancer.gov/) is changed a lot after you guys had done the experiments, maybe the format and information of the metadata files are not same as yours now.
@hongyiyu about the mutation data, we used the mutect2 "masked somatic mutations". I adjusted your query here
Let me know if you need further help!
@hongyiyu As for the json "metadata" file that you download at the same time as the svs images, it looks like this for one image:
{
"center": {
"code": "36",
"name": "Nationwide Children's Hospital BCR",
"short_name": "NCH",
"center_id": "a6b3bcf1-9ca6-56e9-8f04-0e3a63e60a6a",
"namespace": "nationwidechildrens.org",
"center_type": "BCR"
},
"data_type": "Tissue slide image",
"tags": [
"image"
],
"file_name": "TCGA-NJ-A4YF-01A-01-TSA.609E6AD3-35AD-4348-9376-A779576E7DCF.svs",
"md5sum": "c639fb74ac74a6d85f9bb35c84ffbb5b",
"data_format": "SVS",
"submitter_id": null,
"access": "open",
"platform": "Clinical",
"state": "live",
"file_size": 146714961,
"file_id": "4a600861-f669-4e2e-840f-c8edb5bc82a7",
"data_category": "Clinical",
"associated_entities": [
{
"entity_id": "609e6ad3-35ad-4348-9376-a779576e7dcf",
"case_id": "595fc3ad-f603-421b-b130-52f1f617050b",
"entity_submitter_id": "TCGA-NJ-A4YF-01A-01-TSA",
"entity_type": "slide"
}
],
"cases": [
{
"diagnoses": [
{
"classification_of_tumor": "not reported",
"last_known_disease_status": "not reported",
"updated_datetime": "2016-09-02T19:13:43.251687-05:00",
"primary_diagnosis": "c34.1",
"submitter_id": "TCGA-NJ-A4YF_diagnosis",
"tumor_stage": "stage ia",
"age_at_diagnosis": 18584.0,
"vital_status": "alive",
"morphology": "8255/3",
"days_to_death": null,
"days_to_last_known_disease_status": null,
"days_to_last_follow_up": 2161.0,
"state": null,
"days_to_recurrence": null,
"diagnosis_id": "1813a5e5-353c-5dae-a0fc-f7963c763002",
"tumor_grade": "not reported",
"treatments": [
{
"days_to_treatment": null,
"updated_datetime": "2016-09-02T19:13:43.251687-05:00",
"created_datetime": null,
"submitter_id": "TCGA-NJ-A4YF_treatment",
"treatment_id": "fdf867ed-b6e9-522d-8415-93cf87b2febd",
"state": null,
"therapeutic_agents": null,
"treatment_intent_type": null,
"treatment_or_therapy": null
}
],
"tissue_or_organ_of_origin": "c34.1",
"days_to_birth": -18584.0,
"progression_or_recurrence": "not reported",
"prior_malignancy": "not reported",
"site_of_resection_or_biopsy": "c34.1",
"created_datetime": null
}
],
"updated_datetime": "2016-09-08T13:17:57.351988-05:00",
"created_datetime": null,
"demographic": {
"updated_datetime": "2016-09-02T19:13:43.251687-05:00",
"created_datetime": null,
"gender": "female",
"state": null,
"submitter_id": "TCGA-NJ-A4YF_demographic",
"year_of_birth": 1957,
"race": "black or african american",
"demographic_id": "bc7068cd-16c2-57ca-8c86-e7f8a1541d10",
"ethnicity": "not hispanic or latino",
"year_of_death": null
},
"submitter_id": "TCGA-NJ-A4YF",
"project": {
"project_id": "TCGA-LUAD"
},
"state": null,
"case_id": "595fc3ad-f603-421b-b130-52f1f617050b",
"samples": [
{
"sample_type_id": "01",
"updated_datetime": "2016-09-08T13:17:57.351988-05:00",
"time_between_excision_and_freezing": null,
"oct_embedded": "true",
"tumor_code_id": null,
"submitter_id": "TCGA-NJ-A4YF-01A",
"intermediate_dimension": null,
"sample_id": "ecbaf334-d86b-4316-ae93-84feff5da296",
"is_ffpe": false,
"pathology_report_uuid": "1C272E41-4BE4-4E8A-BD33-A5EC5C33149C",
"created_datetime": null,
"tumor_descriptor": null,
"sample_type": "Primary Tumor",
"state": null,
"current_weight": null,
"composition": null,
"time_between_clamping_and_freezing": null,
"shortest_dimension": null,
"tumor_code": null,
"tissue_type": null,
"days_to_sample_procurement": null,
"freezing_method": null,
"portions": [
{
"creation_datetime": 1352764800,
"portion_number": "12",
"weight": 20.0,
"updated_datetime": "2016-09-08T13:17:57.351988-05:00",
"created_datetime": null,
"analytes": [],
"submitter_id": "TCGA-NJ-A4YF-01A-12",
"slides": [
{
"percent_tumor_nuclei": 80.0,
"percent_monocyte_infiltration": 0.0,
"percent_normal_cells": 0.0,
"percent_stromal_cells": 20.0,
"updated_datetime": "2016-09-08T13:17:57.351988-05:00",
"created_datetime": null,
"percent_eosinophil_infiltration": null,
"percent_lymphocyte_infiltration": 0.0,
"percent_neutrophil_infiltration": 0.0,
"state": null,
"section_location": "TOP",
"percent_granulocyte_infiltration": null,
"percent_necrosis": 0.0,
"slide_id": "609e6ad3-35ad-4348-9376-a779576e7dcf",
"percent_inflam_infiltration": null,
"submitter_id": "TCGA-NJ-A4YF-01A-01-TSA",
"number_proliferating_cells": null,
"percent_tumor_cells": 80.0
}
],
"state": null,
"portion_id": "132df299-4e74-4713-a6e8-40c056742d79",
"is_ffpe": false
}
],
"preservation_method": null,
"days_to_collection": 1819,
"initial_weight": 90.0,
"longest_dimension": null
}
],
"exposures": [
{
"cigarettes_per_day": 1.643835616438356,
"weight": null,
"updated_datetime": "2016-09-02T19:13:43.251687-05:00",
"alcohol_history": null,
"alcohol_intensity": null,
"bmi": null,
"years_smoked": null,
"height": null,
"created_datetime": null,
"state": null,
"exposure_id": "9115f597-b265-5167-8a1b-654eda2cf481",
"submitter_id": "TCGA-NJ-A4YF_exposure"
}
]
}
],
"archive": {
"archive_id": "947d1348-9a6e-41c7-a810-e064127679f5",
"data_type": "TCGA DCC Archive",
"updated_datetime": "2016-09-29T21:29:28.480965-05:00",
"file_name": "nationwidechildrens.org_LUAD.tissue_images.Level_1.278.0.0.tar.gz",
"md5sum": "9f0d09b9cd4e125562dd7db2300f7571",
"data_format": "TARGZ",
"submitter_id": "nationwidechildrens.org_LUAD.tissue_images.Level_1.278",
"state": "live",
"data_category": "Archive",
"file_size": 3999211490,
"revision": 0
}
},
@hongyiyu As for the json "metadata" file that you download at the same time as the svs images, it looks like this for one image:
{ "center": { "code": "36", "name": "Nationwide Children's Hospital BCR", "short_name": "NCH", "center_id": "a6b3bcf1-9ca6-56e9-8f04-0e3a63e60a6a", "namespace": "nationwidechildrens.org", "center_type": "BCR" }, "data_type": "Tissue slide image", "tags": [ "image" ], "file_name": "TCGA-NJ-A4YF-01A-01-TSA.609E6AD3-35AD-4348-9376-A779576E7DCF.svs", "md5sum": "c639fb74ac74a6d85f9bb35c84ffbb5b", "data_format": "SVS", "submitter_id": null, "access": "open", "platform": "Clinical", "state": "live", "file_size": 146714961, "file_id": "4a600861-f669-4e2e-840f-c8edb5bc82a7", "data_category": "Clinical", "associated_entities": [ { "entity_id": "609e6ad3-35ad-4348-9376-a779576e7dcf", "case_id": "595fc3ad-f603-421b-b130-52f1f617050b", "entity_submitter_id": "TCGA-NJ-A4YF-01A-01-TSA", "entity_type": "slide" } ], "cases": [ { "diagnoses": [ { "classification_of_tumor": "not reported", "last_known_disease_status": "not reported", "updated_datetime": "2016-09-02T19:13:43.251687-05:00", "primary_diagnosis": "c34.1", "submitter_id": "TCGA-NJ-A4YF_diagnosis", "tumor_stage": "stage ia", "age_at_diagnosis": 18584.0, "vital_status": "alive", "morphology": "8255/3", "days_to_death": null, "days_to_last_known_disease_status": null, "days_to_last_follow_up": 2161.0, "state": null, "days_to_recurrence": null, "diagnosis_id": "1813a5e5-353c-5dae-a0fc-f7963c763002", "tumor_grade": "not reported", "treatments": [ { "days_to_treatment": null, "updated_datetime": "2016-09-02T19:13:43.251687-05:00", "created_datetime": null, "submitter_id": "TCGA-NJ-A4YF_treatment", "treatment_id": "fdf867ed-b6e9-522d-8415-93cf87b2febd", "state": null, "therapeutic_agents": null, "treatment_intent_type": null, "treatment_or_therapy": null } ], "tissue_or_organ_of_origin": "c34.1", "days_to_birth": -18584.0, "progression_or_recurrence": "not reported", "prior_malignancy": "not reported", "site_of_resection_or_biopsy": "c34.1", "created_datetime": null } ], "updated_datetime": "2016-09-08T13:17:57.351988-05:00", "created_datetime": null, "demographic": { "updated_datetime": "2016-09-02T19:13:43.251687-05:00", "created_datetime": null, "gender": "female", "state": null, "submitter_id": "TCGA-NJ-A4YF_demographic", "year_of_birth": 1957, "race": "black or african american", "demographic_id": "bc7068cd-16c2-57ca-8c86-e7f8a1541d10", "ethnicity": "not hispanic or latino", "year_of_death": null }, "submitter_id": "TCGA-NJ-A4YF", "project": { "project_id": "TCGA-LUAD" }, "state": null, "case_id": "595fc3ad-f603-421b-b130-52f1f617050b", "samples": [ { "sample_type_id": "01", "updated_datetime": "2016-09-08T13:17:57.351988-05:00", "time_between_excision_and_freezing": null, "oct_embedded": "true", "tumor_code_id": null, "submitter_id": "TCGA-NJ-A4YF-01A", "intermediate_dimension": null, "sample_id": "ecbaf334-d86b-4316-ae93-84feff5da296", "is_ffpe": false, "pathology_report_uuid": "1C272E41-4BE4-4E8A-BD33-A5EC5C33149C", "created_datetime": null, "tumor_descriptor": null, "sample_type": "Primary Tumor", "state": null, "current_weight": null, "composition": null, "time_between_clamping_and_freezing": null, "shortest_dimension": null, "tumor_code": null, "tissue_type": null, "days_to_sample_procurement": null, "freezing_method": null, "portions": [ { "creation_datetime": 1352764800, "portion_number": "12", "weight": 20.0, "updated_datetime": "2016-09-08T13:17:57.351988-05:00", "created_datetime": null, "analytes": [], "submitter_id": "TCGA-NJ-A4YF-01A-12", "slides": [ { "percent_tumor_nuclei": 80.0, "percent_monocyte_infiltration": 0.0, "percent_normal_cells": 0.0, "percent_stromal_cells": 20.0, "updated_datetime": "2016-09-08T13:17:57.351988-05:00", "created_datetime": null, "percent_eosinophil_infiltration": null, "percent_lymphocyte_infiltration": 0.0, "percent_neutrophil_infiltration": 0.0, "state": null, "section_location": "TOP", "percent_granulocyte_infiltration": null, "percent_necrosis": 0.0, "slide_id": "609e6ad3-35ad-4348-9376-a779576e7dcf", "percent_inflam_infiltration": null, "submitter_id": "TCGA-NJ-A4YF-01A-01-TSA", "number_proliferating_cells": null, "percent_tumor_cells": 80.0 } ], "state": null, "portion_id": "132df299-4e74-4713-a6e8-40c056742d79", "is_ffpe": false } ], "preservation_method": null, "days_to_collection": 1819, "initial_weight": 90.0, "longest_dimension": null } ], "exposures": [ { "cigarettes_per_day": 1.643835616438356, "weight": null, "updated_datetime": "2016-09-02T19:13:43.251687-05:00", "alcohol_history": null, "alcohol_intensity": null, "bmi": null, "years_smoked": null, "height": null, "created_datetime": null, "state": null, "exposure_id": "9115f597-b265-5167-8a1b-654eda2cf481", "submitter_id": "TCGA-NJ-A4YF_exposure" } ] } ], "archive": { "archive_id": "947d1348-9a6e-41c7-a810-e064127679f5", "data_type": "TCGA DCC Archive", "updated_datetime": "2016-09-29T21:29:28.480965-05:00", "file_name": "nationwidechildrens.org_LUAD.tissue_images.Level_1.278.0.0.tar.gz", "md5sum": "9f0d09b9cd4e125562dd7db2300f7571", "data_format": "TARGZ", "submitter_id": "nationwidechildrens.org_LUAD.tissue_images.Level_1.278", "state": "live", "data_category": "Archive", "file_size": 3999211490, "revision": 0 } },
I have tried all the combination of the options, I still can not get a Json file like above. Could you detailed describe the process you get the Json file? Thank you very much!
@Docurdt Have you managed to get the json file? You need to go to the legacy-archive portal , add all the svs slides to your cart, and click to your cart and download the metadata.
@Docurdt Have you managed to get the json file? You need to go to the legacy-archive portal , add all the svs slides to your cart, and click to your cart and download the metadata.
Yes, It works for me now, thank you very much!
I download the data from this website, https://portal.gdc.cancer.gov/repository But only the svs file is available. Where can i get the xml file, and what's the main use of the xml? Thanks a lot for your kindness!