Beblia / Holy-Bible-XML-Format

Holy Bible in 190+ Languages and 900+ versions in XML Format
https://beblia.com
54 stars 22 forks source link

XSD Schema for bible format #1

Closed schierlm closed 7 months ago

schierlm commented 8 months ago

Hello,

I'm trying to create a XSD schema from the bible files (so far I only took letters A to H), and this is what I come up:


<?xml version="1.0" encoding="UTF-8"?>
<xs:schema version="1.0" xmlns:xs="http://www.w3.org/2001/XMLSchema" elementFormDefault="qualified">

    <xs:simpleType name="NormalizedStringType">
        <xs:restriction base="xs:string">
            <xs:pattern value="\S+( \S+)*"></xs:pattern>
        </xs:restriction>
    </xs:simpleType>

    <xs:simpleType name="positiveIntType">
        <xs:restriction base="xs:int">
            <xs:minInclusive value="1"></xs:minInclusive>
        </xs:restriction>
    </xs:simpleType>

    <xs:simpleType name="TestamentType">
        <xs:restriction base="xs:string">
            <xs:enumeration value="Old" />
            <xs:enumeration value="New" />
            <xs:enumeration value="new" /> <!-- alternative for New -->
            <xs:enumeration value="Old Testament" /> <!-- alternative for Old -->
            <xs:enumeration value="New Testament" /> <!-- alternative for New -->
        </xs:restriction>
    </xs:simpleType>

    <xs:complexType name="BookType">
        <xs:sequence>
            <xs:element name="chapter" minOccurs="1" maxOccurs="unbounded">
                <xs:complexType>
                    <xs:sequence>
                        <xs:element name="verse" minOccurs="0" maxOccurs="unbounded" >
                            <xs:complexType>
                                <xs:simpleContent>
                                    <xs:extension base="xs:string">
                                        <xs:attribute name="number" type="positiveIntType" use="required" />
                                    </xs:extension>
                                </xs:simpleContent>
                            </xs:complexType>
                        </xs:element>
                    </xs:sequence>
                    <xs:attribute name="number" type="positiveIntType" use="required" />
                </xs:complexType>
            </xs:element>
        </xs:sequence>
        <xs:attribute name="number" type="positiveIntType" use="required" />
    </xs:complexType>   

    <xs:complexType name="BibleType">
        <xs:sequence>       
            <xs:element name="translation" minOccurs="0" maxOccurs="1"> <!-- alternative for testament -->
                <xs:complexType>
                    <xs:sequence>
                        <xs:element name="book" minOccurs="1" maxOccurs="unbounded" type="BookType" />
                    </xs:sequence>
                    <xs:attribute name="name" type="TestamentType" use="optional" />
                    <xs:attribute name="id" type="TestamentType" use="optional" /> <!-- alternative for name -->
                </xs:complexType>
            </xs:element>   
            <xs:element name="testament" minOccurs="0" maxOccurs="2">
                <xs:complexType>
                    <xs:sequence>
                        <xs:element name="book" minOccurs="1" maxOccurs="unbounded" type="BookType" />
                    </xs:sequence>
                    <xs:attribute name="name" type="TestamentType" use="optional" />
                    <xs:attribute name="Name" type="TestamentType" use="optional" /> <!-- alternative for name -->
                    <xs:attribute name="translation" type="TestamentType" use="optional" /> <!-- alternative for name -->
                    <xs:attribute name="translatio" type="TestamentType" use="optional" /> <!-- alternative for name -->
                    <xs:attribute name="translationumber" type="TestamentType" use="optional" /> <!-- alternative for name -->
                    <xs:attribute name="id" type="TestamentType" use="optional" /> <!-- alternative for name -->
                </xs:complexType>
            </xs:element>
            <xs:element name="book" minOccurs="0" maxOccurs="unbounded" type="BookType" />
        </xs:sequence>
        <xs:attribute name="name" type="NormalizedStringType" use="optional" />
        <xs:attribute name="id" type="NormalizedStringType" use="optional" /> <!-- alternative for name -->
        <xs:attribute name="translation" type="NormalizedStringType" use="optional" /> <!-- alternative for name -->
        <xs:attribute name="translationumber" type="NormalizedStringType" use="optional" /> <!-- alternative for name -->
        <xs:attribute name="translationss" type="NormalizedStringType" use="optional" /> <!-- alternative for name -->
        <xs:attribute name="language" type="NormalizedStringType" use="optional" /> <!-- alternative for name -->
        <xs:attribute name="status" type="NormalizedStringType" use="optional" />
        <xs:attribute name="Status" type="NormalizedStringType" use="optional" /> <!-- alternative for status -->
        <xs:attribute name="staus" type="NormalizedStringType" use="optional" /> <!-- alternative for status -->
        <xs:attribute name="satus" type="NormalizedStringType" use="optional" /> <!-- alternative for status -->
        <xs:attribute name="info" type="NormalizedStringType" use="optional" />
        <xs:attribute name="link" type="NormalizedStringType" use="optional" />
        <xs:attribute name="site" type="NormalizedStringType" use="optional" />
        <xs:attribute name="Copyright" type="NormalizedStringType" use="optional" />
    </xs:complexType>

    <xs:element name="bible" type="BibleType" />
</xs:schema>

As you see, there are many alternative attribute and element names, and while some of them are obvious typos, I am unsure which of them are supposed to be correct (i.e. are picked up by your software).

And even with this very lax schema, 8 of the 345 tested bibles have schema validation errors, because they include unique tags found nowhere else.

I would like to support your format as input format in my BibleMultiConverter project (see schierlm/BibleMultiConverter#85), but in the current state this is hard.

Are you interested in contributions that add a XSD schema and pull requests that fix bibles to conform to it? It would only make sense if you are willing to validate further added bibles against the schema and fix them.