========================================================================= test session starts =========================================================================
platform linux2 -- Python 2.7.17, pytest-4.6.11, py-1.10.0, pluggy-0.13.1
rootdir: /home/dan/git/IDIGBIO/idb-backend, inifile: setup.cfg
plugins: cov-2.11.1, mock-1.13.0, celery-4.2.2, flask-0.15.1
collected 225 items
tests/test_data_exists.py .......
...
tests/idigbio_ingestion/lib/test_eml.py FFFFFFFF.FFFF....FF [ 89%]
...
============================================================================== FAILURES ===============================================================================
_____________________________________ test_intellectual_rights[formatted.56e711e6-c847-4f99-915a-6894bb5c5dea_NHM_London.xml-CC0] _____________________________________
eml_filename = 'formatted.56e711e6-c847-4f99-915a-6894bb5c5dea_NHM_London.xml', expected_license = 'CC0'
emlpathdir = local('/home/dan/git/IDIGBIO/idb-backend/tests/data/eml')
@pytest.mark.usefixtures("emlpathdir")
@pytest.mark.parametrize("eml_filename,expected_license", [
("formatted.56e711e6-c847-4f99-915a-6894bb5c5dea_NHM_London.xml", "CC0"), # intellectualRights / para
("AEC-TTD-TCN_DwC-A20160308.eml", "CC4 BY"), # intellectualRights / para
("dr130.xml", "CC3 BY"), # intellectualRights / section | section / title | para
("dr367.xml", "CC4 BY-SA"),
("dr90.xml", "CC3 BY"),
("eml-bg_vascular-v4.66.xml", "CC4 BY"), # intellectualRights / para / ulink / citetitle
("formatted.Bohart-Tardigrada_DwC-A.eml", "CC3 BY-NC"),\
# intellectualRights / para / ulink / broken citetitle, url is available in second intellectualRights
("invertnet_osu.eml.xml", "No license, assume Public Domain"), # no intellectualRights section
("MNHN_Paris_el.xml", "No license, assume Public Domain"), # no intellectualRights section
("MNHN_Paris_RA.xml", "CC4 BY"), # intellectualRights / para / <ulink> and <citetitle>
("museu_paraense_emilio_goeldi_ornithology_collection.xml", "Unknown License, assume Public Domain"), # Open Data Commons
("nmnh_extant_dwc-a.xml", "CC0"), # intellectualRights / para / <ulink> and <citetitle>
("tropicosspecimens.xml", "CC4 BY"), # intellectualRights / para / ulink / citetitle
("UWZM-F_DwC-A.eml", "CC0"), # intellectualRights / para / ulink / citetitle
("formatted.neherbaria.VT_DwC-A.eml", "CC3 BY-NC"), # broken citetitle, url is available in second intellectualRights
("formatted.mycoportal.VT_DwC-A.eml", "CC0"), # bare url in intellectualRights
("VT_DwC-A.eml", "CC3 BY-NC"), # intellectualRights / para / ulink / broken citetitle ?
("rom_birdsnonpass.xml", "CC4 BY-NC"),
("vertnet_sui_verts.xml", "CC0"), # cc zero and vertnet norms
#("usgs_pwrc_northamerican_bees", "No license, assume Public Domain"), # this is an html file that should not parse, currently raising an untrapped Exception
])
def test_intellectual_rights(eml_filename, expected_license, emlpathdir):
emlfile = emlpathdir.join(eml_filename).open()
parsed_eml = parseEml('id_placeholder_test_suite', emlfile.read())
> assert parsed_eml['data_rights'] == expected_license
E AssertionError: assert 'No license, ...Public Domain' == 'CC0'
E - No license, assume Public Domain
E + CC0
tests/idigbio_ingestion/lib/test_eml.py:34: AssertionError
------------------------------------------------------------------------ Captured stderr call -------------------------------------------------------------------------
2021-03-10 07:59:01.932 DEBUG idb.eml჻ No data license text found in intellectualRights, using 'No license, assume Public Domain' for id_placeholder_test_suite
-------------------------------------------------------------------------- Captured log call --------------------------------------------------------------------------
DEBUG idb.eml:eml.py:102 No data license text found in intellectualRights, using 'No license, assume Public Domain' for id_placeholder_test_suite