cpfaff / ease

EASE (Essential Annotation Schema for Ecology)
0 stars 2 forks source link

Organism (Make decision about reuse or argue) #17

Closed cpfaff closed 8 years ago

cpfaff commented 8 years ago

In Organisms we do:

<xs:element name="organism" minOccurs="1" maxOccurs="1">
    <xs:complexType>
    <xs:sequence>
        <xs:element name="organismicCoverage" minOccurs="1" maxOccurs="1">
        <xs:complexType>
            <xs:sequence>
            <xs:element name="organismRange" minOccurs="1" maxOccurs="1">
                <xs:complexType>
                <xs:sequence>
                    <xs:element name="organismItem" minOccurs="1" maxOccurs="unbounded">
                    <xs:complexType>
                        <xs:sequence>
                        <xs:element name="domain" type="xs:string" minOccurs="0" maxOccurs="1"/>
                        <xs:element name="kingdom" type="xs:string" minOccurs="0" maxOccurs="1"/>
                        <xs:choice>
                            <xs:element name="phylum" type="xs:string" minOccurs="0" maxOccurs="1"/>
                            <xs:element name="division" type="xs:string" minOccurs="0" maxOccurs="1"/>
                        </xs:choice>
                        <xs:element name="class" type="xs:string" minOccurs="0" maxOccurs="1"/>
                        <xs:element name="order" type="xs:string" minOccurs="0" maxOccurs="1"/>
                        <xs:element name="family" type="xs:string" minOccurs="0" maxOccurs="1"/>
                        <xs:element name="genus" type="xs:string" minOccurs="0" maxOccurs="1"/>
                        <xs:element name="species" type="xs:string" minOccurs="0" maxOccurs="1"/>
                        </xs:sequence>
                    </xs:complexType>
                    </xs:element>
                </xs:sequence>
                </xs:complexType>
            </xs:element>
            </xs:sequence>
        </xs:complexType>
        </xs:element>
        <xs:element name="lifeFormCoverage" minOccurs="0" maxOccurs="1">
        <xs:complexType>
            <xs:sequence>
            <xs:element name="lifeFormRange" minOccurs="1" maxOccurs="1">
                <xs:complexType>
                <xs:sequence>
                    <xs:element name="lifeFormItem" minOccurs="1" maxOccurs="unbounded">
                    <xs:complexType>
                        <xs:sequence>
                        <xs:element name="lifeFormClass" type="lifeFormClassType" minOccurs="1" maxOccurs="1">
                        </xs:element>
                        <xs:element name="lifeFormType" type="lifeFormTypeType" minOccurs="1" maxOccurs="1">
                        </xs:element>
                        </xs:sequence>
                    </xs:complexType>
                    </xs:element>
                </xs:sequence>
                </xs:complexType>
            </xs:element>
            </xs:sequence>
        </xs:complexType>
        </xs:element>
        <xs:element name="organismSizeCoverage" minOccurs="0" maxOccurs="1">
        <xs:complexType>
            <xs:sequence>
            <xs:element name="organismSizeRange" minOccurs="1" maxOccurs="1">
                <xs:complexType>
                <xs:sequence>
                    <xs:element name="organismSizeItem" minOccurs="1" maxOccurs="unbounded">
                    <xs:complexType>
                        <xs:sequence>
                        <xs:element name="organismSizeMinimum" type="nonNegativeDecimal" minOccurs="1" maxOccurs="1">
                        </xs:element>
                        <xs:element name="organismSizeMinimumUnit" type="organismSizeUnitType" minOccurs="1" maxOccurs="1">
                        </xs:element>
                        <xs:element name="organismSizeMaximum" type="nonNegativeDecimal" minOccurs="1" maxOccurs="1"> 
                        </xs:element>
                        <xs:element name="organismSizeMaximumUnit" type="organismSizeUnitType" minOccurs="1" maxOccurs="1"> 
                        </xs:element>
                        <xs:element name="organismSizeClass" type="organismSizeClassType" minOccurs="0" maxOccurs="1"> 
                        </xs:element>
                        </xs:sequence>
                    </xs:complexType>
                    </xs:element>
                </xs:sequence>
                </xs:complexType>
            </xs:element>
            </xs:sequence>
        </xs:complexType>
        </xs:element>
    </xs:sequence>
    </xs:complexType>
</xs:element>

ABCD does:

<xs:element name="NameAtomised" minOccurs="0">
    <xs:complexType>
    <xs:choice>
        <xs:element type="NameBacterial" name="Bacterial" minOccurs="0">
        </xs:element>
        <xs:element type="NameBotanical" name="Botanical" minOccurs="0">
        </xs:element>
        <xs:element type="NameZoological" name="Zoological" minOccurs="0">
        </xs:element>
        <xs:element type="NameViral" name="Viral" minOccurs="0">
        </xs:element>
    </xs:choice>
    </xs:complexType>
</xs:element>

<xs:complexType name="NameBacterial">
    <xs:sequence>
    <xs:element type="Monomial" name="GenusOrMonomial" minOccurs="0">
    </xs:element>
    <xs:element type="Monomial" name="Subgenus" minOccurs="0">
    </xs:element>
    <xs:element type="String" name="SubgenusAuthorAndYear" minOccurs="0">
    </xs:element>
    <xs:element type="Epithet" name="SpeciesEpithet" minOccurs="0">
    </xs:element>
    <xs:element type="Epithet" name="SubspeciesEpithet" minOccurs="0">
    </xs:element>
    <xs:element type="String" name="ParentheticalAuthorTeamAndYear" minOccurs="0">
    </xs:element>
    <xs:element type="String" name="AuthorTeamAndYear" minOccurs="0">
    </xs:element>
    <xs:element type="String" name="NameApprobation" minOccurs="0">
    </xs:element>
    </xs:sequence>
</xs:complexType>

<xs:complexType name="NameBotanical">
    <xs:sequence>
    <xs:element type="Monomial" name="GenusOrMonomial" minOccurs="0">
    </xs:element>
    <xs:element type="Epithet" name="FirstEpithet" minOccurs="0">
    </xs:element>
    <xs:element type="Epithet" name="InfraspecificEpithet" minOccurs="0">
    </xs:element>
    <xs:element type="RankAbbreviation" name="Rank" minOccurs="0">
    </xs:element>
    <xs:element name="HybridFlag" minOccurs="0">
        <xs:complexType>
        <xs:simpleContent>
            <xs:extension base="HybridFlag">
            <xs:attribute type="xs:int" name="insertionpoint"/>
            </xs:extension>
        </xs:simpleContent>
        </xs:complexType>
    </xs:element>
    <xs:element type="String" name="AuthorTeamParenthesis" minOccurs="0">
    </xs:element>
    <xs:element type="String" name="AuthorTeam" minOccurs="0">
    </xs:element>
    <xs:element type="String" name="CultivarGroupName" minOccurs="0">
    </xs:element>
    <xs:element type="String" name="CultivarName" minOccurs="0">
    </xs:element>
    <xs:element name="TradeDesignationNames" minOccurs="0">
        <xs:complexType>
        <xs:sequence>
            <xs:element maxOccurs="unbounded" type="String" name="TradeDesignationName" minOccurs="0">
            </xs:element>
        </xs:sequence>
        </xs:complexType>
    </xs:element>
    </xs:sequence>
</xs:complexType>

<xs:complexType name="NameViral">
    <xs:sequence>
    <xs:element type="Monomial" name="GenusOrMonomial" minOccurs="0">
    </xs:element>
    <xs:element type="String" name="ViralSpeciesDesignation" minOccurs="0">
    </xs:element>
    <xs:element name="Acronym" minOccurs="0">
    </xs:element>
    </xs:sequence>
</xs:complexType>

<xs:complexType name="NameZoological">
    <xs:sequence>
    <xs:element type="Monomial" name="GenusOrMonomial" minOccurs="0">
    </xs:element>
    <xs:element type="Monomial" name="Subgenus" minOccurs="0">
    </xs:element>
    <xs:element type="Epithet" name="SpeciesEpithet" minOccurs="0">
    </xs:element>
    <xs:element type="Epithet" name="SubspeciesEpithet" minOccurs="0">
    </xs:element>
    <xs:element type="String" name="AuthorTeamOriginalAndYear" minOccurs="0">
    </xs:element>
    <xs:element type="String" name="AuthorTeamParenthesisAndYear" minOccurs="0">
    </xs:element>
    <xs:element type="String" name="CombinationAuthorTeamAndYear" minOccurs="0">
    </xs:element>
    <xs:element type="String" name="Breed" minOccurs="0">
    </xs:element>
    <xs:element type="String" name="NamedIndividual" minOccurs="0"/>
    </xs:sequence>
</xs:complexType>

EML does

<xs:element name="taxonomicCoverage">
    <xs:complexType>
        <xs:complexContent>
            <xs:extension base="TaxonomicCoverage">
                <xs:attribute name="system" type="res:SystemType" use="optional"/>
                <xs:attribute name="scope" type="res:ScopeType" use="optional" default="document"/>
            </xs:extension>
        </xs:complexContent>
    </xs:complexType>
</xs:element>

<xs:complexType name="TaxonomicCoverage">
    <xs:choice>
        <xs:sequence>
            <xs:element name="taxonomicSystem" minOccurs="0">
                <xs:complexType>
                    <xs:sequence>
                        <xs:element name="classificationSystem" maxOccurs="unbounded">
                            <xs:complexType>
                                <xs:sequence>
                                    <xs:element name="classificationSystemCitation" type="cit:CitationType">
                                    </xs:element>
                                    <xs:element name="classificationSystemModifications" type="res:NonEmptyStringType" minOccurs="0">
                                    </xs:element>
                                </xs:sequence>
                            </xs:complexType>
                        </xs:element>
                        <xs:element name="identificationReference" type="cit:CitationType" minOccurs="0" maxOccurs="unbounded">
                        </xs:element>
                        <xs:element name="identifierName" type="rp:ResponsibleParty" maxOccurs="unbounded">
                        </xs:element>
                        <xs:element name="taxonomicProcedures" type="res:NonEmptyStringType">
                        </xs:element>
                        <xs:element name="taxonomicCompleteness" type="res:NonEmptyStringType" minOccurs="0">
                        </xs:element>
                        <xs:element name="vouchers" minOccurs="0" maxOccurs="unbounded">
                            <xs:complexType>
                                <xs:sequence>
                                    <xs:element name="specimen" type="res:NonEmptyStringType">
                                    </xs:element>
                                    <xs:element name="repository">
                                        <xs:complexType>
                                            <xs:sequence>
                                                <xs:element name="originator" type="rp:ResponsibleParty" maxOccurs="unbounded">
                                                </xs:element>
                                            </xs:sequence>
                                        </xs:complexType>
                                    </xs:element>
                                </xs:sequence>
                            </xs:complexType>
                        </xs:element>
                    </xs:sequence>
                </xs:complexType>
            </xs:element>
            <xs:element name="generalTaxonomicCoverage" type="res:NonEmptyStringType" minOccurs="0">
            </xs:element>
            <xs:element name="taxonomicClassification" type="TaxonomicClassificationType" maxOccurs="unbounded">
            </xs:element>
        </xs:sequence>
        <xs:group ref="res:ReferencesGroup"/>
    </xs:choice>
    <xs:attribute name="id" type="res:IDType" use="optional"/>
</xs:complexType>

DwC does:

<!-- Taxon terms -->
<xs:element ref="dwc:taxonID" minOccurs="0"/>
<xs:element ref="dwc:scientificNameID" minOccurs="0"/>
<xs:element ref="dwc:acceptedNameUsageID" minOccurs="0"/>
<xs:element ref="dwc:parentNameUsageID" minOccurs="0"/>
<xs:element ref="dwc:originalNameUsageID" minOccurs="0"/>
<xs:element ref="dwc:nameAccordingToID" minOccurs="0"/>
<xs:element ref="dwc:namePublishedInID" minOccurs="0"/>
<xs:element ref="dwc:taxonConceptID" minOccurs="0"/>
<xs:element ref="dwc:scientificName" minOccurs="0"/>
<xs:element ref="dwc:acceptedNameUsage" minOccurs="0"/>
<xs:element ref="dwc:parentNameUsage" minOccurs="0"/>
<xs:element ref="dwc:originalNameUsage" minOccurs="0"/>
<xs:element ref="dwc:nameAccordingTo" minOccurs="0"/>
<xs:element ref="dwc:namePublishedIn" minOccurs="0"/>
<xs:element ref="dwc:namePublishedInYear" minOccurs="0"/>
<xs:element ref="dwc:higherClassification" minOccurs="0"/>
<xs:element ref="dwc:kingdom" minOccurs="0"/>
<xs:element ref="dwc:phylum" minOccurs="0"/>
<xs:element ref="dwc:class" minOccurs="0"/>
<xs:element ref="dwc:order" minOccurs="0"/>
<xs:element ref="dwc:family" minOccurs="0"/>
<xs:element ref="dwc:genus" minOccurs="0"/>
<xs:element ref="dwc:subgenus" minOccurs="0"/>
<xs:element ref="dwc:specificEpithet" minOccurs="0"/>
<xs:element ref="dwc:infraspecificEpithet" minOccurs="0"/>
<xs:element ref="dwc:taxonRank" minOccurs="0"/>
<xs:element ref="dwc:verbatimTaxonRank" minOccurs="0"/>
<xs:element ref="dwc:scientificNameAuthorship" minOccurs="0"/>
<xs:element ref="dwc:vernacularName" minOccurs="0"/>
<xs:element ref="dwc:nomenclaturalCode" minOccurs="0"/>
<xs:element ref="dwc:taxonomicStatus" minOccurs="0"/>
<xs:element ref="dwc:nomenclaturalStatus" minOccurs="0"/>
<xs:element ref="dwc:taxonRemarks" minOccurs="0"/>
<xs:element ref="dwr:SimpleDarwinExtensions" minOccurs="0" />
cpfaff commented 8 years ago

Well that is quite a bunch of information packed into the first post. The fist thing to be implemented is what we we already discussed. _A more generic structure for organism taxonomy_. EML can serve as a inspiration for that. We also need to _implement a recursive structure for the generic taxonomical naming with ranks and values_.

cpfaff commented 8 years ago

The new generic and recursive structure now allows for things like

<organism>
    <organismicCoverage>
        <organismItem>
            <taxonomicClassification>
                <taxonRankName>class</taxonRankName>
                <taxonRankValue>coniferopsida</taxonRankValue>
                <taxonomicClassification>
                    <taxonRankName>order</taxonRankName>
                    <taxonRankValue>coniferales</taxonRankValue>
                    <taxonomicClassification>
                        <taxonRankName>family</taxonRankName>
                        <taxonRankValue>piceoideae</taxonRankValue>
                        <taxonomicClassification>
                            <taxonRankName>subfamily</taxonRankName>
                            <taxonRankValue>piceoideae</taxonRankValue>
                            <taxonomicClassification>
                                <taxonRankName>genus</taxonRankName>
                                <taxonRankValue>picea</taxonRankValue>
                                <taxonomicClassification>
                                    <taxonRankName>speices</taxonRankName>
                                    <taxonRankValue>Picea abies</taxonRankValue>
                                </taxonomicClassification>
                            </taxonomicClassification>
                        </taxonomicClassification>
                    </taxonomicClassification>
                </taxonomicClassification>
            </taxonomicClassification>
        </organismItem>
    </organismicCoverage>
</organism>
cpfaff commented 8 years ago

The separation for the naming into botanical, zoological, virus and bacterial is interesting. We may benefit from that separation and granularity as well. This would have to be best implemented into the generic structure we have now.

cpfaff commented 8 years ago

While the split into atomic fields for the species names is interesting and useful in general I ask myself now if we really need to do that. This would extend our deepest level of our taxonomic classification just to be able to store separate parts of a string (species name) in separate fields. I am not sure about the benefit in the search. Just adding the elements of ABCD would also mean a dublication at least to a certain degree as. For example there is fields in ABCD for e.g. epithet, and genus. We allow for this in the generic structure with taxonRankName = gensus ..., taxonRankName = species. Ok the full species name and the genus would also mean a slight dublication as the species name can contain the genus plus the epithet. Maybe we just skip this. @EichenbergBEF What do you mean?

cpfaff commented 8 years ago

Another possible way to go if we decide to go with more detail would be to copy ABCD and then somehow prevent that somebody provides the species and the genus in the taxonomic classification to prevent redundant information.

EichenbergBEF commented 8 years ago

Not sure if I fully understand. You think that it does not really make sense to split the taxonomic classification into separate field. However, as we discussed, this would allow a higher flexibility within the search in terms of abstraction to e.g. higher taxonomic levels. Example already given: I have data on Picea abies and now aslo want to find all datasets concerning the same genus (i.e. Picea). But maybe there are also other ways you already thought of to allow this.

Am 18.12.2015 um 12:45 schrieb Claas-Thido Pfaff:

While the split into atomic fields for the species names is interesting and useful in general I ask myself now if we really need to do that. This would extend our deepest level of our taxonomic classification just to be able to store separate parts of a string (species name) in separate fields. I am not sure about the benefit in the search. Just adding the elements of ABCD would also mean a dublication at least to a certain degree as. For example there is fields in ABCD for e.g. epithet, and genus. We allow for this in the generic structure with taxonRankName = gensus ..., taxonRankName = species. Maybe we just skip this. @EichenbergBEF https://github.com/EichenbergBEF What do you mean?

— Reply to this email directly or view it on GitHub https://github.com/cpfaff/cas/issues/17#issuecomment-165759158.

Dr. rer. nat. David Eichenberg BEF-China research consortium Data manager (BEF China Dataportal) Tel: 0049-341-9738587 Department of Systematic Botany and Functional Biodiversity University of Leipzig Room 120 Johannisalles 21 04103 Leipzig GERMANY

cpfaff commented 8 years ago

Now we can do the following for example:

<organism>
    <organismicCoverage>
        <organismItem>

            <taxonomicClassification>
                <taxonRankName>class</taxonRankName>
                <taxonRankValue>coniferopsida</taxonRankValue>
                <taxonomicClassification>
                    <taxonRankName>order</taxonRankName>
                    <taxonRankValue>coniferales</taxonRankValue>
                    <taxonomicClassification>
                        <taxonRankName>family</taxonRankName>
                        <taxonRankValue>piceoideae</taxonRankValue>
                        <taxonomicClassification>
                            <taxonRankName>subfamily</taxonRankName>
                            <taxonRankValue>piceoideae</taxonRankValue>
                        </taxonomicClassification>
                    </taxonomicClassification>
                </taxonomicClassification>
            </taxonomicClassification>

            <organismName>
                <botanicalName>
                    <genusOrMonomial>
                        picea
                    </genusOrMonomial>
                    <firstEpithet>
                        abies
                    </firstEpithet>
                    <authorTeamParenthesis>
                        (L.)
                    </authorTeamParenthesis>
                    <authorTeam>
                        H.Karst
                    </authorTeam>
                </botanicalName>
            </organismName>

        </organismItem>
    </organismicCoverage>
</organism>