HumanCellAtlas / ingest-central

Ingest Central is the hub repository for the ingest service
Apache License 2.0
0 stars 1 forks source link

Populate the provenance fields before validation #504

Open aaclan-ebi opened 5 years ago

aaclan-ebi commented 5 years ago

In the current code, it's possible to export invalid json to DS because provenance fields are being populated during exporting and validation has already happened by then.

These fields should be populated prior to submission to the ingest-api so that they can be validated

{
    "describedBy": "https://schema.integration.data.humancellatlas.org/type/biomaterial/10.2.1/specimen_from_organism",
    "schema_type": "biomaterial",
    "biomaterial_core": {
        "biomaterial_id": "updated_donor_id_2019-08-14T072800.415232Z",
        "biomaterial_name": "This is a dummy specimen",
        "biomaterial_description": "This is a dummy donor specimen",
        "ncbi_taxon_id": [
            9606
        ],
        "genotype": "DRB1 0401 protective allele",
        "supplementary_files": [
            "metadata_dog.png"
        ],
        "biosamples_accession": "SAMN00000000",
        "insdc_sample_accession": "SRS0000000"
    },
    "genus_species": [{
        "text": "Homo sapiens",
        "ontology": "NCBITaxon:9606",
        "ontology_label": "Homo sapiens"
    }],
    "organ": {
        "text": "kidney",
        "ontology": "UBERON:0002113",
        "ontology_label": "kidney"
    },
    "organ_parts": [{
        "text": "tongue taste bud",
        "ontology": "UBERON:0014451",
        "ontology_label": "tongue taste bud"
    }],
    "diseases": [{
        "text": "H syndrome",
        "ontology": "MONDO:0011273",
        "ontology_label": "H syndrome"
    }],
    "state_of_specimen": {
        "autolysis_score": "moderate",
        "gross_description": "normal",
        "gross_images": [
            "my_gross_image_file.jpg"
        ],
        "ischemic_temperature": "warm",
        "ischemic_time": 7200,
        "microscopic_description": "Mixture of different cell sizes apparent.",
        "microscopic_images": [
            "my_microscopic_image_file.jpg"
        ],
        "postmortem_interval": 2400
    },
    "preservation_storage": {
        "storage_method": "ambient temperature",
        "storage_time": 5,
        "storage_time_unit": {
            "text": "days",
            "ontology": "UO:0000033",
            "ontology_label": "day"
        },
        "preservation_method": "cryopreservation in liquid nitrogen (dead tissue)"
    },
    "collection_time": "2017-03-19T07:22:00Z",
    "provenance": {
        "document_id": "922bd46e-281f-4dc4-b7b2-08f8195b8224",
        "submission_date": "2019-08-14T07:28:00.844Z",
        "update_date": "2019-08-14T07:28:06.997Z",
        "schema_major_version": 10,
        "schema_minor_version": 2
    }
}

This DSS file json has extra fields in the provenance property but the project schema for this json still contains old provenance fields in the schema

https://dss.integration.data.humancellatlas.org/v1/files/922bd46e-281f-4dc4-b7b2-08f8195b8224/?replica=aws

aaclan-ebi commented 5 years ago

This would mean that analysis would have to populate it too then. -@rolando-ebi

justincc commented 5 years ago

We are postponing pending further agreement on where this validation should happen. If we generate an error because of a validation failure this might end up being reported to a user who can do nothing about it.