iDigBio / idb-backend

iDigBio server and backend code for data ingestion, media processing, record indexing, and data API.
GNU General Public License v3.0
7 stars 0 forks source link

eml tests have started failing #131

Closed danstoner closed 3 years ago

danstoner commented 3 years ago
========================================================================= test session starts =========================================================================
platform linux2 -- Python 2.7.17, pytest-4.6.11, py-1.10.0, pluggy-0.13.1
rootdir: /home/dan/git/IDIGBIO/idb-backend, inifile: setup.cfg
plugins: cov-2.11.1, mock-1.13.0, celery-4.2.2, flask-0.15.1
collected 225 items                                                                                                                                                   

tests/test_data_exists.py .......     
...
tests/idigbio_ingestion/lib/test_eml.py FFFFFFFF.FFFF....FF                                                                                                     [ 89%]
...
============================================================================== FAILURES ===============================================================================
_____________________________________ test_intellectual_rights[formatted.56e711e6-c847-4f99-915a-6894bb5c5dea_NHM_London.xml-CC0] _____________________________________

eml_filename = 'formatted.56e711e6-c847-4f99-915a-6894bb5c5dea_NHM_London.xml', expected_license = 'CC0'
emlpathdir = local('/home/dan/git/IDIGBIO/idb-backend/tests/data/eml')

    @pytest.mark.usefixtures("emlpathdir")
    @pytest.mark.parametrize("eml_filename,expected_license", [
            ("formatted.56e711e6-c847-4f99-915a-6894bb5c5dea_NHM_London.xml", "CC0"), # intellectualRights / para
            ("AEC-TTD-TCN_DwC-A20160308.eml", "CC4 BY"), # intellectualRights / para
            ("dr130.xml", "CC3 BY"), # intellectualRights / section | section / title | para
            ("dr367.xml", "CC4 BY-SA"),
            ("dr90.xml", "CC3 BY"),
            ("eml-bg_vascular-v4.66.xml", "CC4 BY"), # intellectualRights / para / ulink / citetitle
            ("formatted.Bohart-Tardigrada_DwC-A.eml", "CC3 BY-NC"),\
                # intellectualRights / para / ulink / broken citetitle, url is available in second intellectualRights
            ("invertnet_osu.eml.xml", "No license, assume Public Domain"), # no intellectualRights section
            ("MNHN_Paris_el.xml", "No license, assume Public Domain"), # no intellectualRights section
            ("MNHN_Paris_RA.xml", "CC4 BY"), # intellectualRights / para / <ulink> and <citetitle>
            ("museu_paraense_emilio_goeldi_ornithology_collection.xml", "Unknown License, assume Public Domain"), # Open Data Commons
            ("nmnh_extant_dwc-a.xml", "CC0"), # intellectualRights / para / <ulink> and <citetitle>
            ("tropicosspecimens.xml", "CC4 BY"), # intellectualRights / para / ulink / citetitle
            ("UWZM-F_DwC-A.eml", "CC0"), # intellectualRights / para / ulink / citetitle
            ("formatted.neherbaria.VT_DwC-A.eml", "CC3 BY-NC"), # broken citetitle, url is available in second intellectualRights
            ("formatted.mycoportal.VT_DwC-A.eml", "CC0"), # bare url in intellectualRights
            ("VT_DwC-A.eml", "CC3 BY-NC"), # intellectualRights / para / ulink / broken citetitle  ?
            ("rom_birdsnonpass.xml", "CC4 BY-NC"),
            ("vertnet_sui_verts.xml", "CC0"), # cc zero and vertnet norms
            #("usgs_pwrc_northamerican_bees", "No license, assume Public Domain"), # this is an html file that should not parse, currently raising an untrapped Exception
    ])

    def test_intellectual_rights(eml_filename, expected_license, emlpathdir):
        emlfile = emlpathdir.join(eml_filename).open()
        parsed_eml = parseEml('id_placeholder_test_suite', emlfile.read())
>       assert parsed_eml['data_rights'] == expected_license
E       AssertionError: assert 'No license, ...Public Domain' == 'CC0'
E         - No license, assume Public Domain
E         + CC0

tests/idigbio_ingestion/lib/test_eml.py:34: AssertionError
------------------------------------------------------------------------ Captured stderr call -------------------------------------------------------------------------
2021-03-10 07:59:01.932 DEBUG idb.eml჻ No data license text found in intellectualRights, using 'No license, assume Public Domain' for id_placeholder_test_suite
-------------------------------------------------------------------------- Captured log call --------------------------------------------------------------------------
DEBUG    idb.eml:eml.py:102 No data license text found in intellectualRights, using 'No license, assume Public Domain' for id_placeholder_test_suite
danstoner commented 3 years ago

Caused by pip installing pyquery 1.2 instead of 1.2.17.

danstoner commented 3 years ago

Fixed