datalad / datalad-dataverse

A DataLad (www.datalad.org) extension to work with Dataverse
Other
9 stars 15 forks source link

Three testfailures against demo.dataverse #335

Open adswa opened 1 month ago

adswa commented 1 month ago

The Appveyor history shows that they started to fail 24 days ago. That coincides with the release of Dataverse 6.4 on September 30th :/ pyDataverse hasn't seen changes since.


FAILURES ===================================================================================================================
______________________________________________________________________________________________________________ test_file_handling ______________________________________________________________________________________________________________
tmp_path = PosixPath('/home/appveyor/DLTMP/pytest-of-appveyor/pytest-0/test_file_handling0'), dataverse_admin_api = <pyDataverse.api.NativeApi object at 0x7f73ec8382e0>
dataverse_dataaccess_api = <pyDataverse.api.DataAccessApi object at 0x7f73ec838d90>, dataverse_dataset = 'doi:10.70122/FK2/2TAKCR'
    def test_file_handling(
            tmp_path,
            dataverse_admin_api,
            dataverse_dataaccess_api,
            dataverse_dataset,
    ):
        # the starting point of `dataverse_dataset` is a freshly
        # created, non-published dataset in draft mode, with no prior
        # version
        odd = ODD(dataverse_admin_api, dataverse_dataset)

        fcontent = 'some_content'
        fpath = tmp_path / 'dummy.txt'
        fpath.write_text(fcontent)
        src_md5 = md5sum(fpath)

        fileid = check_upload(odd, fcontent, fpath, src_md5)

        check_download(odd, fileid, tmp_path / 'downloaded.txt', src_md5)

        check_file_metadata_update(dataverse_admin_api, dataverse_dataset, odd,
                                   fileid, fpath)

>       fileid = check_replace_file(odd, fileid, tmp_path)
../../../dlvenv/lib/python3.8/site-packages/datalad_dataverse/tests/test_dataset.py:39: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
../../../dlvenv/lib/python3.8/site-packages/datalad_dataverse/tests/test_dataset.py:64: in check_replace_file
    new_fileid = odd.upload_file(fpath, remote_path, fileid)
../../../dlvenv/lib/python3.8/site-packages/datalad_dataverse/dataset.py:230: in upload_file
    response.raise_for_status()
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
self = <Response [400 Bad Request]>
    def raise_for_status(self) -> Response:
        """
        Raise the `HTTPStatusError` if one occurred.
        """
        request = self._request
        if request is None:
            raise RuntimeError(
                "Cannot call `raise_for_status` as the request "
                "instance has not been set on this response."
            )

        if self.is_success:
            return self

        if self.has_redirect_location:
            message = (
                "{error_type} '{0.status_code} {0.reason_phrase}' for url '{0.url}'\n"
                "Redirect location: '{0.headers[location]}'\n"
                "For more information check: https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/{0.status_code}"
            )
        else:
            message = (
                "{error_type} '{0.status_code} {0.reason_phrase}' for url '{0.url}'\n"
                "For more information check: https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/{0.status_code}"
            )

        status_class = self.status_code // 100
        error_types = {
            1: "Informational response",
            3: "Redirect response",
            4: "Client error",
            5: "Server error",
        }
        error_type = error_types.get(status_class, "Invalid status code")
        message = message.format(self, error_type=error_type)
>       raise HTTPStatusError(message, request=request, response=self)
E       httpx.HTTPStatusError: Client error '400 Bad Request' for url 'https://demo.dataverse.org/api/v1/files/2421955/replace'
E       For more information check: https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/400
../../../dlvenv/lib/python3.8/site-packages/httpx/_models.py:763: HTTPStatusError
______________________________________________________________________________________________________________ test_name_mangling ______________________________________________________________________________________________________________
tmp_path = PosixPath('/home/appveyor/DLTMP/pytest-of-appveyor/pytest-0/test_name_mangling0'), dataverse_admin_api = <pyDataverse.api.NativeApi object at 0x7f73ec8382e0>
dataverse_dataaccess_api = <pyDataverse.api.DataAccessApi object at 0x7f73ec838d90>, dataverse_dataset = 'doi:10.70122/FK2/V1IPV2'
    def test_name_mangling(
            tmp_path,
            dataverse_admin_api,
            dataverse_dataaccess_api,
            dataverse_dataset,
    ):
        odd = ODD(dataverse_admin_api, dataverse_dataset)

        paths = (
            tmp_path / ".dot-in-front" 'c1.txt',
            tmp_path / " space-in-front" 'c2.txt',
            tmp_path / "-minus-in-front" 'c3.txt',
            tmp_path / "Ö-in-front" 'c4.txt',
            tmp_path / ".Ö-dot-Ö-in-front" 'c5.txt',
            tmp_path / " Ö-space-Ö-in-front" 'c6.txt',
        )

        path_info = dict()
        for path in paths:
            if path.parent != tmp_path:
                path.parent.mkdir()
            fcontent = path.name
            path.write_text(path.name)
            src_md5 = md5sum(path)
            fileid = check_upload(odd, fcontent, path, src_md5)
            path_info[path] = (src_md5, fileid)

        for path, (src_md5, fileid) in path_info.items():
            check_download(odd, fileid, tmp_path / 'downloaded.txt', src_md5)

            check_file_metadata_update(
                dataverse_admin_api,
                dataverse_dataset,
                odd,
                fileid,
                path)

>           fileid = check_replace_file(odd, fileid, tmp_path)
../../../dlvenv/lib/python3.8/site-packages/datalad_dataverse/tests/test_dataset.py:240: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
../../../dlvenv/lib/python3.8/site-packages/datalad_dataverse/tests/test_dataset.py:64: in check_replace_file
    new_fileid = odd.upload_file(fpath, remote_path, fileid)
../../../dlvenv/lib/python3.8/site-packages/datalad_dataverse/dataset.py:230: in upload_file
    response.raise_for_status()
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
self = <Response [400 Bad Request]>
    def raise_for_status(self) -> Response:
        """
        Raise the `HTTPStatusError` if one occurred.
        """
        request = self._request
        if request is None:
            raise RuntimeError(
                "Cannot call `raise_for_status` as the request "
                "instance has not been set on this response."
            )

        if self.is_success:
            return self

        if self.has_redirect_location:
            message = (
                "{error_type} '{0.status_code} {0.reason_phrase}' for url '{0.url}'\n"
                "Redirect location: '{0.headers[location]}'\n"
                "For more information check: https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/{0.status_code}"
            )
        else:
            message = (
                "{error_type} '{0.status_code} {0.reason_phrase}' for url '{0.url}'\n"
                "For more information check: https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/{0.status_code}"
            )

        status_class = self.status_code // 100
        error_types = {
            1: "Informational response",
            3: "Redirect response",
            4: "Client error",
            5: "Server error",
        }
        error_type = error_types.get(status_class, "Invalid status code")
        message = message.format(self, error_type=error_type)
>       raise HTTPStatusError(message, request=request, response=self)
E       httpx.HTTPStatusError: Client error '400 Bad Request' for url 'https://demo.dataverse.org/api/v1/files/2421958/replace'
E       For more information check: https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/400
../../../dlvenv/lib/python3.8/site-packages/httpx/_models.py:763: HTTPStatusError
______________________________________________________________________________________________________________ test_file_handling ______________________________________________________________________________________________________________
tmp_path = PosixPath('/home/appveyor/DLTMP/pytest-of-appveyor/pytest-0/test_file_handling1'), dataverse_admin_api = <pyDataverse.api.NativeApi object at 0x7f73ec8382e0>
dataverse_dataaccess_api = <pyDataverse.api.DataAccessApi object at 0x7f73ec838d90>, dataverse_dataset = 'doi:10.70122/FK2/BMJ0SI', dataverse_instance_url = 'https://demo.dataverse.org'
    def test_file_handling(
            tmp_path,
            dataverse_admin_api,
            dataverse_dataaccess_api,
            dataverse_dataset,
            dataverse_instance_url,
    ):
        # the starting point of `dataverse_dataset` is a freshly
        # created, non-published dataset in draft mode, with no prior
        # version
        fcontent = 'some_content'
        fpath = tmp_path / 'dummy.txt'
        fpath.write_text(fcontent)
        src_md5 = md5sum(fpath)

        check_duplicate_file_deposition(
            dataverse_admin_api,
            dataverse_dataset,
            tmp_path)

>       fileid = check_upload(
            dataverse_admin_api,
            dataverse_dataset, fcontent, fpath, src_md5, dataverse_instance_url)
../../../dlvenv/lib/python3.8/site-packages/datalad_dataverse/tests/test_pydataverse.py:43: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
api = <pyDataverse.api.NativeApi object at 0x7f73ec8382e0>, dsid = 'doi:10.70122/FK2/BMJ0SI', fcontent = 'some_content', fpath = PosixPath('/home/appveyor/DLTMP/pytest-of-appveyor/pytest-0/test_file_handling1/dummy.txt')
src_md5 = 'baab6c16d9143523b7865d46896e4596', dv_url = 'https://demo.dataverse.org'
    def check_upload(api, dsid, fcontent, fpath, src_md5, dv_url):
        # the simplest possible upload, just a source file name
        response = api.upload_datafile(
            identifier=dsid,
            filename=fpath,
        )
        # worked
        assert response.status_code == 200
        # verify structure of response
        rj = response.json()
        assert rj['status'] == 'OK'
        rfiles = rj['data']['files']
        # one file uploaded, one report
        assert len(rfiles) == 1
        rfile = rfiles[0]
        # for a fresh upload a bunch of things should be true
        assert rfile['description'] == ''
        assert rfile['label'] == fpath.name
        assert rfile['restricted'] is False
        assert rfile['version'] == 1
        assert rfile['datasetVersionId']  # we are not testing for identity
        # most info is in a 'dataFile' dict
        df = rfile['dataFile']
        assert df['contentType'] == 'text/plain'
        assert df['creationDate'] == datetime.datetime.today().strftime('%Y-%m-%d')
        # unclear if this is always a copy of the prop above
        assert df['description'] == rfile['description']
        assert df['filename'] == fpath.name
        assert df['filesize'] == len(fcontent)
        assert df['id']
        assert df['checksum']['type'] == 'MD5'
        assert df['md5'] == df['checksum']['value'] == src_md5
>       assert df['persistentId'] == ''
E       AssertionError: assert 'doi:10.70122...BMJ0SI/FN49XY' == ''
E         
E         + doi:10.70122/FK2/BMJ0SI/FN49XY
../../../dlvenv/lib/python3.8/site-packages/datalad_dataverse/tests/test_pydataverse.py:140: AssertionError
=========================================
``
adswa commented 1 month ago

The third test looks like the API call started returning more info than the test expected, and so the test can simply be adjusted. The failing tests in test_dataset look like an API change on Dataverse's side, but neither the Changelog, the User Guide, nor the milestone of the release pointed immediately to something. I've asked in their Zulip channel: https://dataverse.zulipchat.com/#narrow/channel/377090-python/topic/Changes.20in.20the.20files.20API.20endpoint.20for.20replacing.20files.3F

Follow up in dataverse's issue tracker: https://github.com/IQSS/dataverse/issues/10975