artefactual-labs / mets-reader-writer

Library to parse and create METS files, especially for Archivematica.
https://mets-reader-writer.readthedocs.io
GNU Affero General Public License v3.0
20 stars 13 forks source link

Encode XLINK HREF values using URL Encoding #48

Closed ross-spencer closed 5 years ago

ross-spencer commented 5 years ago

This fix resolves archivematica/issues#187 whereby an anyURI type was not validating when it contained values that needed to be url encoded.

Additionally this commit specifies a UTF-8 encoding for the .py files in the code-base which just seems like good practice to follow across the Archivematica repositories.

Connected to archivematica/issues#187


Tests have been modified to include two characters [ and ] that need to be url encoded. Those tests pass now as expected.

Further, this script was modified to run as main() (NB. Maybe not the best way, but the most expedient one):

#!/usr/bin/env python2
# -*- coding: utf-8 -*-

"""Given a transfer type Dataverse access the metadata submission object
```dataset.json``` to generate a transfer METS.xml file.

The METS.xml will reflect various properties of the ```dataset.json``` file. An
example of a specific feature of Dataverse is the existence of Bundle objects
for Tabular data. Bundles contain derivatives of a tabular data file that are
created by Dataverse to enable the data to be interacted with using as wide a
range of tools as possible. These Derivatives are transcribed to the METS.xml.

More information about Dataverse in Archivematica can be found here:
https://wiki.archivematica.org/Dataverse
"""
from __future__ import print_function
import argparse
import json
import logging
import os
import uuid

from lxml import etree

import metsrw

LOGFORMAT = '%(asctime)-15s %(levelname)s: %(message)s'
DATEFORMAT = '%m/%d/%Y %H:%M:%S'
OFFLINE_ERR = (
    "The QName value '{http://www.w3.org/1999/xlink}simpleLink' does not "
    "resolve to a(n) attribute group definition.")

class ConvertDataverseError(Exception):
    pass

# Mapping from originalFormatLabel in dataset.json to file extension. The
# values here are associated with Dataverse Bundles, created when Tabular data
# is ingested, see: http://guides.dataverse.org/en/latest/user/dataset-
# management.html?highlight=bundle
# The formats supported for tabluar data ingests are here:
# http://guides.dataverse.org/en/latest/user/tabulardataingest/
# supportedformats.html
EXTENSION_MAPPING = {
    "Comma Separated Values": ".csv",
    "MS Excel (XLSX)": ".xlsx",
    "R Data": ".RData",
    "SPSS Portable": ".por",
    "SPSS SAV": ".sav",
    "Stata Binary": ".dta",
    "Stata 13 Binary": ".dta",
    "UNKNOWN": "UNKNOWN",
}

def get_ddi_title_author(dataset_md_latest):
    """Retrieve the title and the author of the dataset for the DDI XML
    snippet to be included in the METS file.
    """
    title_text = author_text = None
    citation = dataset_md_latest.get("metadataBlocks", {}).get("citation")
    fields = citation.get("fields", None)
    if fields:
        for field in fields:
            if field.get("typeName") == "title":
                title_text = field.get("value")
            if field.get("typeName") == "author":
                author_text = field.get("value")[0].get("authorName")\
                    .get("value")
        return title_text.strip(), author_text.strip()
    raise ConvertDataverseError(
        "Unable to retrieve MD fields from dataset.json")

def create_ddi(json_metadata, dataset_md_latest):
    """Create the DDI dmdSec from the JSON metadata."""
    ddi_elems = {}

    try:
        ddi_elems["Title"], \
            ddi_elems["Author"] = get_ddi_title_author(dataset_md_latest)
    except TypeError as err:
        logging.error(
            "Unable to gather citation data from dataset.json: %s", err)
        return None
    except ConvertDataverseError as err:
        logging.error(err)
        return None

    ddi_elems["PID Type"] = json_metadata.get("protocol", "")
    ddi_elems["IDNO"] = json_metadata.get("persistentUrl", "")
    ddi_elems["Version Date"] = dataset_md_latest.get("releaseTime", "")
    ddi_elems["Version Type"] = dataset_md_latest.get("versionState", "")
    ddi_elems["Version Number"] = "{}.{}".format(
        dataset_md_latest.get("versionNumber", ""),
        dataset_md_latest.get("versionMinorNumber", "")
    )
    ddi_elems["Restriction Text"] = dataset_md_latest.get("termsOfUse", "")
    ddi_elems["Distributor Text"] = json_metadata.get("publisher", "")

    draft = False
    print("Fields retrieved from Dataverse:")
    for ddi_k, ddi_v in ddi_elems.iteritems():
        if ddi_k == "Version Type" and ddi_v == "DRAFT":
            draft = True
        print("{}: {}".format(ddi_k, ddi_v))

    if draft:
        print(
            "Dataset is in a DRAFT state and may not transfer correctly")
        logging.error(
            "Dataset is in a DRAFT state and may not transfer correctly")

    # Create XML.
    nsmap = {"ddi": "http://www.icpsr.umich.edu/DDI"}
    ddins = "{" + nsmap["ddi"] + "}"
    ddi_root = etree.Element(ddins + "codebook", nsmap=nsmap)
    ddi_root.attrib["version"] = "2.5"

    root_ns = "{http://www.w3.org/2001/XMLSchema-instance}schemaLocation"
    dv_ns = (
        "http://www.ddialliance.org/Specification/DDI-Codebook/2.5/"
        "XMLSchema/codebook.xsd"
    )
    ddi_root.attrib[root_ns] = dv_ns

    stdydscr = etree.SubElement(ddi_root, ddins + "stdyDscr", nsmap=nsmap)
    citation = etree.SubElement(stdydscr, ddins + "citation", nsmap=nsmap)

    titlstmt = etree.SubElement(citation, ddins + "titlStmt", nsmap=nsmap)
    etree.SubElement(titlstmt, ddins + "titl", nsmap=nsmap).text \
        = ddi_elems["Title"]

    etree.SubElement(
        titlstmt, ddins + "IDNo", agency=ddi_elems["PID Type"]).text \
        = ddi_elems["IDNO"]

    rspstmt = etree.SubElement(citation, ddins + "rspStmt")
    etree.SubElement(rspstmt, ddins + "AuthEnty").text \
        = ddi_elems["Author"]

    diststmt = etree.SubElement(citation, ddins + "distStmt")
    etree.SubElement(diststmt, ddins + "distrbtr").text \
        = ddi_elems["Distributor Text"]

    verstmt = etree.SubElement(citation, ddins + "verStmt")
    etree.SubElement(
        verstmt, ddins + "version", date=ddi_elems["Version Date"],
        type=ddi_elems["Version Type"]
    ).text = ddi_elems["Version Number"]

    dataaccs = etree.SubElement(stdydscr, ddins + "dataAccs")
    usestmt = etree.SubElement(dataaccs, ddins + "useStmt")
    etree.SubElement(usestmt, ddins + "restrctn").text \
        = ddi_elems["Restriction Text"]

    return ddi_root

def display_checksum_for_user(checksum_value, checksum_type="MD5"):
    """Provide some feedback to the user that enables them to understand what
    this script is doing in the Dataverse workflow.
    """
    print(
        "Checksum retrieved from dataset.json: {} ({})"
        .format(checksum_value, checksum_type))

def create_bundle(tabfile_json):
    """Create the FSEntry objects for the various files in a Dataverse bundle
    identified initially by a ```.tab``` file being requested from the
    Dataverse API.

    A bundle is a collection of multiple representations of a tabular data
    file. Bundles are created by Dataverse to allow interaction with as wide a
    range of tools as possible.

    Documentation on Bundles can be found on the Dataverse pages:

       * http://guides.dataverse.org/en/latest/user/dataset-management.html?
       highlight=bundle
    """
    # Base name is .tab with suffix stripped
    tabfile_name = tabfile_json.get("label")
    if tabfile_name is None:
        return None

    # Else, continue processing.
    print("Creating entries for tabfile bundle {}".format(tabfile_name))
    base_name = tabfile_name[:-4]
    bundle = metsrw.FSEntry(path=base_name, type="Directory")
    # Find the original file and add it to the METS FS Entries.
    tabfile_datafile = tabfile_json.get("dataFile")
    fname = None
    ext = EXTENSION_MAPPING.get(
        tabfile_datafile.get("originalFormatLabel", ""), "UNKNOWN")
    logging.info("Retrieved extension mapping value: %s", ext)
    logging.info(
        "Original file format listed as %s",
        tabfile_datafile.get("originalFileFormat", "None"))
    if ext == "UNKNOWN":
        fname = tabfile_datafile.get("filename")
        logging.info(
            "Original Format Label is UNKNOWN, using filename: %s",
            fname)
    if fname is None:
        fname = "{}{}".format(base_name, ext)
    checksum_value = tabfile_datafile.get("md5")
    if checksum_value is None:
        return None
    display_checksum_for_user(checksum_value)
    original_file = metsrw.FSEntry(
        path="{}/{}".format(base_name, fname),
        use="original",
        file_uuid=str(uuid.uuid4()),
        checksumtype="MD5",
        checksum=checksum_value,
    )
    bundle.add_child(original_file)
    if tabfile_datafile.get("originalFormatLabel") != "R Data":
        # RData derivative
        f = metsrw.FSEntry(
            path="{}/{}.RData".format(base_name, base_name),
            use="derivative",
            derived_from=original_file,
            file_uuid=str(uuid.uuid4()),
        )
        bundle.add_child(f)

    # Add expected bundle contents
    # FIXME what is the actual path for the files?
    # Tabfile
    f = metsrw.FSEntry(
        path="{}/{}".format(base_name, tabfile_datafile.get("filename")),
        use="derivative",
        derived_from=original_file,
        file_uuid=str(uuid.uuid4()),
    )
    f.add_dmdsec(
        md="{}/{}-ddi.xml".format(base_name, base_name),
        mdtype="DDI",
        mode="mdref",
        label="{}-ddi.xml".format(base_name),
        loctype="OTHER",
        otherloctype="SYSTEM",
    )
    bundle.add_child(f)
    # -ddi.xml
    f = metsrw.FSEntry(
        path="{}/{}-ddi.xml".format(base_name, base_name),
        use="metadata",
        derived_from=original_file,
        file_uuid=str(uuid.uuid4()),
    )
    bundle.add_child(f)
    # citation - endnote
    f = metsrw.FSEntry(
        path="{}/{}citation-endnote.xml".format(base_name, base_name),
        use="metadata",
        derived_from=original_file,
        file_uuid=str(uuid.uuid4()),
    )
    bundle.add_child(f)
    # citation - ris
    f = metsrw.FSEntry(
        path="{}/{}citation-ris.ris".format(base_name, base_name),
        use="metadata",
        derived_from=original_file,
        file_uuid=str(uuid.uuid4()),
    )
    bundle.add_child(f)
    return bundle

def retrieve_terms_of_access(dataset_md_latest):
    """Return a tuple that can be used to direct users to information about a
    dataset if it is restricted.
    """
    return dataset_md_latest.get("termsOfAccess")

def test_if_zip_in_name(fname):
    """Check if a file-path ends in a .zip extension. If so, return true. This
    helps us to log some information about the characteristics of the package
    as we go.
    """
    ext_ = os.path.splitext(fname)[1]
    if ext_.lower() == '.zip':
        return True
    return False

def add_ddi_xml(sip, json_metadata, dataset_md_latest):
    """Create a DDI XML data block and add this to the METS."""
    ddi_root = create_ddi(json_metadata, dataset_md_latest)
    if ddi_root is None:
        return None
    sip.add_dmdsec(md=ddi_root, mdtype="DDI")
    return sip

def add_metadata_ref(sip, md_name, md_loc):
    """Add a single mdref to the METS file."""
    sip.add_dmdsec(
        md=md_loc,
        mdtype="OTHER",
        mode="mdref",
        label=md_name,
        loctype="OTHER",
        otherloctype="SYSTEM",
    )
    return sip

def add_md_dir_to_structmap(sip):
    """Add the metadata directory to the structmap."""
    md_dir = metsrw.FSEntry(path="metadata", use=None, type="Directory")
    sip.add_child(md_dir)
    # Add dataset.json to the fileSec output.
    f = metsrw.FSEntry(
        path="metadata/dataset.json", use="metadata",
        file_uuid=str(uuid.uuid4())
    )
    # Add dataset.json to the metadata fileSec group.
    md_dir.add_child(f)
    return sip

def add_dataset_files_to_md(sip, dataset_md_latest, contact_information):
    # Add original files to the METS document.
    files = dataset_md_latest.get('files')
    if not files:
        return None

    # Signal to users the existence of zip files in this transfer.
    zipped_file = False

    # Signal to users that this transfer might consist of metadata only.
    if len(files) is 0:
        logging.info(
            "Metadata only transfer? There are no file entries in this "
            "transfer's metadata.")

    for file_json in files:
        is_restricted = file_json.get("restricted")
        if is_restricted is True and contact_information:
            logging.error(
                "Restricted dataset files may not have transferred "
                "correctly: %s", contact_information)

        data_file = file_json.get("dataFile", {})
        if data_file.get("filename", "").endswith(".tab"):
            # A Tabular Data File from Dataverse will consist of an original
            # tabular format submitted by the researcher plus multiple
            # different representations. We need to map that here.
            bundle = create_bundle(file_json)
            if bundle:
                sip.add_child(bundle)
            else:
                logging.error(
                    "Create Dataverse transfer METS failed. "
                    "Bundle returned: %s", bundle)
                return None
        else:
            path_ = None
            if data_file:
                path_ = data_file.get("filename")
            if path_:
                if test_if_zip_in_name(path_):
                    # provide some additional logging around the contents of
                    # the dataset we're processing.
                    if not zipped_file:
                        zipped_file = True
                        logging.info(
                            "Non-bundle .zip file found in the dataset.")
                checksum_value = data_file.get("md5")
                if checksum_value is None:
                    return None
                display_checksum_for_user(checksum_value)
                f = metsrw.FSEntry(
                    path=path_,
                    use="original",
                    file_uuid=str(uuid.uuid4()),
                    checksumtype="MD5",
                    checksum=checksum_value,
                )
                sip.add_child(f)
            else:
                logging.error(
                    "Problem retrieving filename from metadata, returned "
                    "datafile: %s, path: %s", data_file, path_)
                return None
    return sip

def write_mets_to_file(sip, output_md_name):
    metadata_name = output_md_name
    if metadata_name is None:
        metadata_name = "METS_.xml"
    # Write the data structure out to a file and ensure that the encoding is
    # purposely set to UTF-8. This pattern is used in ```create_mets_v2.py```.
    # Given the opportunity we should add an encoding feature to the metsrw
    # package.
    mets_f = metsrw.METSDocument()
    mets_f.append_file(sip)
    with open(metadata_name, 'w') as xml_file:
        xml_file.write(etree.tostring(
            mets_f.serialize(), pretty_print=True, encoding="utf-8",
            xml_declaration=True))

def load_md_and_return_json(dataset_md_name):
    # Read JSON
    json_path = os.path.join(dataset_md_name)
    logging.info("Metadata directory exists %s", os.path.exists(json_path))
    try:
        with open(json_path, "r") as f:
            return json.load(f)
    except IOError as e:
        logging.error("Error opening dataset metadata: %s", e)
        return None

def convert_dataverse_to_mets(
        dataset_md_name="dataset.json", output_md_path=None,
        output_md_name=None):
    """Create a transfer METS file from a Dataverse's dataset.json file"""
    json_metadata = load_md_and_return_json(dataset_md_name)
    if json_metadata is None:
        return 1
    dataset_md_latest = get_latest_version_metadata(json_metadata)
    if dataset_md_latest is None:
        raise ConvertDataverseError(
            "Unable to find the dataset metadata section from dataset.json")

    # If a dataset is restricted we may not have access to all the files. We
    # may also want to flag this dataset to the users of this service. We
    # can do this here and below. We do not yet know whether this microservice
    # should fail because we don't know how all datasets behave when some
    # restrictions are placed on them.
    contact_information = retrieve_terms_of_access(dataset_md_latest)

    # Create METS
    try:
        sip = metsrw.FSEntry(
            path="None", label=get_ddi_title_author(dataset_md_latest)[0],
            use=None, type="Directory"
        )
    except TypeError as err:
        citation_msg = (
            "Unable to gather citation data from dataset.json: %s", err)
        logging.error(citation_msg)
        raise ConvertDataverseError(citation_msg)
    except ConvertDataverseError as err:
        raise

    sip = add_ddi_xml(sip, json_metadata, dataset_md_latest)
    if sip is None:
        raise ConvertDataverseError("Error creating SIP from Dataverse DDI")

    sip = add_metadata_ref(
        sip, dataset_md_name, "metadata/{}".format(dataset_md_name))

    sip = add_dataset_files_to_md(sip, dataset_md_latest, contact_information)
    if sip is None:
        raise ConvertDataverseError("Error adding Dataset files to METS")

    # On success of the following two functions, the module will return None
    # to JobContext which expects non-zero as a failure code only.
    sip = add_md_dir_to_structmap(sip)
    write_mets_to_file(sip, output_md_name)

def get_latest_version_metadata(json_metadata):
    """If the datatset has been downloaded from the Dataverse web ui then there
    is a slightly different structure. While the structure is different, the
    majority of fields should remain the same and work with Archivematica. Just
    in case, we log the version here and inform the user of potential
    compatibility issues.

    Ref: https://github.com/IQSS/dataverse/issues/4715
    """
    datasetVersion = json_metadata.get("datasetVersion")
    if datasetVersion:
        logging.info(
            "Dataset seems to have been downloaded from the Dataverse Web UI."
            "Some features of this method may be incompatible with "
            "Archivematica at present.")
        return datasetVersion
    return json_metadata.get("latestVersion")

def main():
    parser = argparse.ArgumentParser(
        description='convert_dv')
    parser.add_argument(
        'mets', metavar='M', type=str, nargs=1, default="dataset.json",
        help='a mets file to parse')
    parser.add_argument(
        '--logging', type=str, nargs="?", default="DEBUG",
        help='logging level, INFO, DEBUG, WARNING, ERROR')

    args = parser.parse_args()

    if args.logging not in ["INFO", "DEBUG", "WARNING", "ERROR"]:
        logging.basicConfig(
            format=LOGFORMAT, datefmt=DATEFORMAT, level="DEBUG")
    else:
        logging.basicConfig(
            format=LOGFORMAT, datefmt=DATEFORMAT, level=args.logging)

    try:
        convert_dataverse_to_mets(
            dataset_md_name=args.mets[0], output_md_path="",
            output_md_name="CONVERTED_METS.xml")
    except IndexError:
        logging.error("No metadata file to convert")

if __name__ == "__main__":
    main()

Was used against this dataset file from the Dataverse project:

{
    "authority": "10.5072/FK2",
    "id": 880,
    "identifier": "QYH45Z",
    "latestVersion": {
        "UNF": "UNF:6:dLu0AaOeqoOlDbGTwUrWjA==",
        "createTime": "2018-05-16T15:28:41Z",
        "distributionDate": "2013-03-27",
        "files": [
            {
                "categories": [
                    "2011",
                    "Publications"
                ],
                "dataFile": {
                    "checksum": {
                        "type": "MD5",
                        "value": "f759178d0481e04c5f8da7cab5392826"
                    },
                    "contentType": "application/pdf",
                    "description": "Canadian Relocation Cases: Heading Towards Guidelines",
                    "filename": "30_CFLQ_271_13-3-13_1524[1].pdf",
                    "filesize": 152948,
                    "id": 1013,
                    "md5": "f759178d0481e04c5f8da7cab5392826",
                    "originalFormatLabel": "UNKNOWN",
                    "rootDataFileId": -1,
                    "storageIdentifier": "1636a128de1-b2d9731a80e4"
                },
                "datasetVersionId": 268,
                "description": "Canadian Relocation Cases: Heading Towards Guidelines",
                "label": "30_CFLQ_271_13-3-13_1524[1].pdf",
                "restricted": false,
                "version": 1
            },
            {
                "categories": [
                    "2011",
                    "Data"
                ],
                "dataFile": {
                    "checksum": {
                        "type": "MD5",
                        "value": "249fe0b2a4f60446543cfa07a33187df"
                    },
                    "contentType": "text/x-fixed-field",
                    "description": "ASCII file",
                    "filename": "relocation2011.dat",
                    "filesize": 137268,
                    "id": 1014,
                    "md5": "249fe0b2a4f60446543cfa07a33187df",
                    "originalFormatLabel": "UNKNOWN",
                    "rootDataFileId": -1,
                    "storageIdentifier": "1636a128df0-9772f28ac48e"
                },
                "datasetVersionId": 268,
                "description": "ASCII file",
                "label": "relocation2011.dat",
                "restricted": false,
                "version": 1
            },
            {
                "categories": [
                    "2011",
                    "Documentation"
                ],
                "dataFile": {
                    "checksum": {
                        "type": "MD5",
                        "value": "fbfd3e8d1122106e3c0f8ee09afcd6fc"
                    },
                    "contentType": "application/x-spss-syntax",
                    "description": "SPSS syntax file",
                    "filename": "relocation2011.sps",
                    "filesize": 10258,
                    "id": 1012,
                    "md5": "fbfd3e8d1122106e3c0f8ee09afcd6fc",
                    "originalFormatLabel": "UNKNOWN",
                    "rootDataFileId": -1,
                    "storageIdentifier": "1636a128df4-9a22bfb324e0"
                },
                "datasetVersionId": 268,
                "description": "SPSS syntax file",
                "label": "relocation2011.sps",
                "restricted": false,
                "version": 1
            },
            {
                "categories": [
                    "2011",
                    "Data"
                ],
                "dataFile": {
                    "checksum": {
                        "type": "MD5",
                        "value": "187999778dfe955ca8856276e7fd64a0"
                    },
                    "contentType": "text/tab-separated-values",
                    "description": "SPSS file",
                    "filename": "relocation2011.tab",
                    "filesize": 113109,
                    "id": 1010,
                    "md5": "187999778dfe955ca8856276e7fd64a0",
                    "originalFormatLabel": "UNKNOWN",
                    "rootDataFileId": -1,
                    "storageIdentifier": "1636a128df9-f11572473b59"
                },
                "datasetVersionId": 268,
                "description": "SPSS file",
                "label": "relocation2011.tab",
                "restricted": false,
                "version": 1
            },
            {
                "categories": [
                    "2011",
                    "Data"
                ],
                "dataFile": {
                    "UNF": "UNF:6:dLu0AaOeqoOlDbGTwUrWjA==",
                    "checksum": {
                        "type": "MD5",
                        "value": "235c918a8eaff0f65a8044e81a5c1ca8"
                    },
                    "contentType": "text/tab-separated-values",
                    "description": "CSV file",
                    "filename": "RELOCATION_FINAL_CANADA_738TOTAL.tab",
                    "filesize": 306311,
                    "id": 1011,
                    "md5": "235c918a8eaff0f65a8044e81a5c1ca8",
                    "originalFileFormat": "text/csv",
                    "originalFormatLabel": "Comma Separated Values",
                    "rootDataFileId": -1,
                    "storageIdentifier": "1636a128dff-7517f048c790"
                },
                "datasetVersionId": 268,
                "description": "CSV file",
                "label": "RELOCATION_FINAL_CANADA_738TOTAL.tab",
                "restricted": false,
                "version": 2
            },
            {
                "categories": [
                    "2011",
                    "Data"
                ],
                "dataFile": {
                    "checksum": {
                        "type": "MD5",
                        "value": "0b62956fe7244a59c4f2358494b71da2"
                    },
                    "contentType": "application/vnd.ms-excel",
                    "description": "Excel file",
                    "filename": "RELOCATION_FINAL_CANADA_738TOTAL.xls",
                    "filesize": 453120,
                    "id": 1009,
                    "md5": "0b62956fe7244a59c4f2358494b71da2",
                    "originalFormatLabel": "UNKNOWN",
                    "rootDataFileId": -1,
                    "storageIdentifier": "1636a128e07-8ca0dbaec898"
                },
                "datasetVersionId": 268,
                "description": "Excel file",
                "label": "RELOCATION_FINAL_CANADA_738TOTAL.xls",
                "restricted": false,
                "version": 1
            },
            {
                "categories": [
                    "2011",
                    "Documentation"
                ],
                "dataFile": {
                    "checksum": {
                        "type": "MD5",
                        "value": "ed3bc550febecd2cbd90e056223c92c3"
                    },
                    "contentType": "application/pdf",
                    "description": "User Guide",
                    "filename": "relocation-user-guide.pdf",
                    "filesize": 517694,
                    "id": 1015,
                    "md5": "ed3bc550febecd2cbd90e056223c92c3",
                    "originalFormatLabel": "UNKNOWN",
                    "rootDataFileId": -1,
                    "storageIdentifier": "1636a128dea-966f534add97"
                },
                "datasetVersionId": 268,
                "description": "User Guide",
                "label": "relocation-user-guide.pdf",
                "restricted": false,
                "version": 1
            }
        ],
        "id": 268,
        "lastUpdateTime": "2018-05-16T17:53:38Z",
        "license": "CC0",
        "metadataBlocks": {
            "citation": {
                "displayName": "Citation Metadata",
                "fields": [
                    {
                        "multiple": false,
                        "typeClass": "primitive",
                        "typeName": "title",
                        "value": "Canadian Relocation Cases: Heading Towards Guidelines, 2011 [test]"
                    },
                    {
                        "multiple": true,
                        "typeClass": "compound",
                        "typeName": "author",
                        "value": [
                            {
                                "authorAffiliation": {
                                    "multiple": false,
                                    "typeClass": "primitive",
                                    "typeName": "authorAffiliation",
                                    "value": "Queen's University"
                                },
                                "authorName": {
                                    "multiple": false,
                                    "typeClass": "primitive",
                                    "typeName": "authorName",
                                    "value": "Nicholas Bala"
                                }
                            },
                            {
                                "authorAffiliation": {
                                    "multiple": false,
                                    "typeClass": "primitive",
                                    "typeName": "authorAffiliation",
                                    "value": "Queen's University"
                                },
                                "authorName": {
                                    "multiple": false,
                                    "typeClass": "primitive",
                                    "typeName": "authorName",
                                    "value": "Andrea Wheeler"
                                }
                            },
                            {
                                "authorAffiliation": {
                                    "multiple": false,
                                    "typeClass": "primitive",
                                    "typeName": "authorAffiliation",
                                    "value": "Canadian Research Institute for Law and Family"
                                },
                                "authorName": {
                                    "multiple": false,
                                    "typeClass": "primitive",
                                    "typeName": "authorName",
                                    "value": "Joanne Paetsch"
                                }
                            },
                            {
                                "authorAffiliation": {
                                    "multiple": false,
                                    "typeClass": "primitive",
                                    "typeName": "authorAffiliation",
                                    "value": "Canadian Research Institute for Law and Family"
                                },
                                "authorName": {
                                    "multiple": false,
                                    "typeClass": "primitive",
                                    "typeName": "authorName",
                                    "value": "Lorne Bertrand"
                                }
                            }
                        ]
                    },
                    {
                        "multiple": true,
                        "typeClass": "compound",
                        "typeName": "datasetContact",
                        "value": [
                            {
                                "datasetContactAffiliation": {
                                    "multiple": false,
                                    "typeClass": "primitive",
                                    "typeName": "datasetContactAffiliation",
                                    "value": "Queen's University"
                                },
                                "datasetContactEmail": {
                                    "multiple": false,
                                    "typeClass": "primitive",
                                    "typeName": "datasetContactEmail",
                                    "value": "meghan.goodchild@queensu.ca"
                                },
                                "datasetContactName": {
                                    "multiple": false,
                                    "typeClass": "primitive",
                                    "typeName": "datasetContactName",
                                    "value": "Data and Government Information Centre"
                                }
                            }
                        ]
                    },
                    {
                        "multiple": true,
                        "typeClass": "compound",
                        "typeName": "dsDescription",
                        "value": [
                            {
                                "dsDescriptionDate": {
                                    "multiple": false,
                                    "typeClass": "primitive",
                                    "typeName": "dsDescriptionDate",
                                    "value": "2012"
                                },
                                "dsDescriptionValue": {
                                    "multiple": false,
                                    "typeClass": "primitive",
                                    "typeName": "dsDescriptionValue",
                                    "value": "Regardless of outcome, cases where a parent wants to relocate with a child following separation have a profound impact on all concerned, and are among the most difficult cases in the family justice system. While the new British Columbia Family Law Act (not yet in force) has provisions that address relocation, no other Canadian statutes address these problems, and the only relevant Supreme Court case, Gordon v Goertz, [1996] SCJ 52, offers only the most general guidance: relocation deci sions are to be based on the \u201cbest interests of the child\u201d. Having clearer guidance for relocation cases would be of great assistance to the courts, lawyers and families, facilitating judicial resolution, promoting settlement a nd reducing costs, but over the past 15 years the Supreme Court has repeatedly refused leave in relocation cases, and outside of B.C. no government has announced plans to address the issue.\r\n\r\nUsing Westlaw and Quicklaw, Canadian relocation cases reported in English from 2001 to the beginning of 2011 (738 in total) were identified and analyzed."
                                }
                            }
                        ]
                    },
                    {
                        "multiple": true,
                        "typeClass": "controlledVocabulary",
                        "typeName": "subject",
                        "value": [
                            "Social Sciences"
                        ]
                    },
                    {
                        "multiple": true,
                        "typeClass": "compound",
                        "typeName": "keyword",
                        "value": [
                            {
                                "keywordValue": {
                                    "multiple": false,
                                    "typeClass": "primitive",
                                    "typeName": "keywordValue",
                                    "value": "Relocation"
                                }
                            },
                            {
                                "keywordValue": {
                                    "multiple": false,
                                    "typeClass": "primitive",
                                    "typeName": "keywordValue",
                                    "value": "Separation"
                                }
                            },
                            {
                                "keywordValue": {
                                    "multiple": false,
                                    "typeClass": "primitive",
                                    "typeName": "keywordValue",
                                    "value": "Family law"
                                }
                            },
                            {
                                "keywordValue": {
                                    "multiple": false,
                                    "typeClass": "primitive",
                                    "typeName": "keywordValue",
                                    "value": "Family court"
                                }
                            },
                            {
                                "keywordValue": {
                                    "multiple": false,
                                    "typeClass": "primitive",
                                    "typeName": "keywordValue",
                                    "value": "Divorce"
                                }
                            }
                        ]
                    },
                    {
                        "multiple": true,
                        "typeClass": "compound",
                        "typeName": "publication",
                        "value": [
                            {
                                "publicationCitation": {
                                    "multiple": false,
                                    "typeClass": "primitive",
                                    "typeName": "publicationCitation",
                                    "value": "Bala, N., and Wheeler, A. (2012). Canadian Relocation Cases: Heading Towards Guidelines. Canadian Family Law Quarterly, 30(3), 271-320"
                                }
                            },
                            {
                                "publicationCitation": {
                                    "multiple": false,
                                    "typeClass": "primitive",
                                    "typeName": "publicationCitation",
                                    "value": "Bala, N., and Wheeler, A. (2012). More Clarity Needed for Relocation Cases. The Lawyers Weekly, February 27, 2012."
                                }
                            }
                        ]
                    },
                    {
                        "multiple": true,
                        "typeClass": "controlledVocabulary",
                        "typeName": "language",
                        "value": [
                            "English"
                        ]
                    },
                    {
                        "multiple": true,
                        "typeClass": "compound",
                        "typeName": "producer",
                        "value": [
                            {
                                "producerAffiliation": {
                                    "multiple": false,
                                    "typeClass": "primitive",
                                    "typeName": "producerAffiliation",
                                    "value": "Queens' University Faculty of Law"
                                },
                                "producerName": {
                                    "multiple": false,
                                    "typeClass": "primitive",
                                    "typeName": "producerName",
                                    "value": "Nicholas Bala"
                                },
                                "producerURL": {
                                    "multiple": false,
                                    "typeClass": "primitive",
                                    "typeName": "producerURL",
                                    "value": "http://law.queensu.ca/ "
                                }
                            }
                        ]
                    },
                    {
                        "multiple": false,
                        "typeClass": "primitive",
                        "typeName": "productionDate",
                        "value": "2012"
                    },
                    {
                        "multiple": true,
                        "typeClass": "compound",
                        "typeName": "grantNumber",
                        "value": [
                            {
                                "grantNumberAgency": {
                                    "multiple": false,
                                    "typeClass": "primitive",
                                    "typeName": "grantNumberAgency",
                                    "value": "Social Sciences and Humanities Research Council (SSHRC) "
                                }
                            },
                            {
                                "grantNumberAgency": {
                                    "multiple": false,
                                    "typeClass": "primitive",
                                    "typeName": "grantNumberAgency",
                                    "value": "Canadian Research Institute for Law and Family "
                                }
                            }
                        ]
                    },
                    {
                        "multiple": true,
                        "typeClass": "compound",
                        "typeName": "distributor",
                        "value": [
                            {
                                "distributorAbbreviation": {
                                    "multiple": false,
                                    "typeClass": "primitive",
                                    "typeName": "distributorAbbreviation",
                                    "value": "DGIC"
                                },
                                "distributorAffiliation": {
                                    "multiple": false,
                                    "typeClass": "primitive",
                                    "typeName": "distributorAffiliation",
                                    "value": "Queen's University"
                                },
                                "distributorName": {
                                    "multiple": false,
                                    "typeClass": "primitive",
                                    "typeName": "distributorName",
                                    "value": "Data and Government Information Centre"
                                },
                                "distributorURL": {
                                    "multiple": false,
                                    "typeClass": "primitive",
                                    "typeName": "distributorURL",
                                    "value": "http://library.queensu.ca/webdoc "
                                }
                            }
                        ]
                    },
                    {
                        "multiple": false,
                        "typeClass": "primitive",
                        "typeName": "distributionDate",
                        "value": "2013-03-27"
                    },
                    {
                        "multiple": false,
                        "typeClass": "primitive",
                        "typeName": "depositor",
                        "value": "Goodchild, Meghan"
                    },
                    {
                        "multiple": false,
                        "typeClass": "primitive",
                        "typeName": "dateOfDeposit",
                        "value": "2018-05-16"
                    },
                    {
                        "multiple": true,
                        "typeClass": "compound",
                        "typeName": "timePeriodCovered",
                        "value": [
                            {
                                "timePeriodCoveredEnd": {
                                    "multiple": false,
                                    "typeClass": "primitive",
                                    "typeName": "timePeriodCoveredEnd",
                                    "value": "2011-04-30"
                                },
                                "timePeriodCoveredStart": {
                                    "multiple": false,
                                    "typeClass": "primitive",
                                    "typeName": "timePeriodCoveredStart",
                                    "value": "2001-01-01"
                                }
                            }
                        ]
                    }
                ]
            },
            "geospatial": {
                "displayName": "Geospatial Metadata",
                "fields": [
                    {
                        "multiple": true,
                        "typeClass": "compound",
                        "typeName": "geographicCoverage",
                        "value": [
                            {
                                "country": {
                                    "multiple": false,
                                    "typeClass": "controlledVocabulary",
                                    "typeName": "country",
                                    "value": "Canada"
                                }
                            }
                        ]
                    }
                ]
            },
            "journal": {
                "displayName": "Journal Metadata",
                "fields": []
            }
        },
        "productionDate": "Production Date",
        "releaseTime": "2018-05-16T17:53:38Z",
        "termsOfUse": "CC0 Waiver",
        "versionMinorNumber": 0,
        "versionNumber": 1,
        "versionState": "RELEASED"
    },
    "persistentUrl": "https://doi.org/10.5072/FK2/QYH45Z",
    "protocol": "doi",
    "publicationDate": "2018-05-16",
    "publisher": "Root Dataverse"
}

And the result, before and after installing this new metsrw package is as follows:

ross-spencer@artefactual:~/Desktop/Artefactual/python-scratch/mets-valid$ python validate-mets.py CONVERTED_METS.xml 
Validation result. False
Reason: <string>:0:0:ERROR:SCHEMASV:SCHEMAV_CVC_DATATYPE_VALID_1_2_1: Element '{http://www.loc.gov/METS/}FLocat', attribute '{http://www.w3.org/1999/xlink}href': '30_CFLQ_271_13-3-13_1524[1].pdf' is not a valid value of the atomic type 'xs:anyURI'.

After running python setup.py install:

ross-spencer@artefactual:~/Desktop/Artefactual/python-scratch/mets-valid$ python validate-mets.py CONVERTED_METS.xml 
Schema validation via XSD is valid.