iterative / dvc-hdfs

HDFS/WebHDFS plugin for dvc
https://dvc.org/doc/user-guide/data-management/remote-storage/hdfs
Apache License 2.0
0 stars 1 forks source link

tests: fix test #21

Closed skshetry closed 1 year ago

skshetry commented 1 year ago

Add a leading slash to path in test to make it consistent with odb.path. The path_to_oid was raising error because, self.path uses to be in the format of /baea92ba-aed5-4473-bfb8-318ca73b0278/files/md5, which when split turns into 4-tuple:

("/", "baea92ba-aed5-4473-bfb8-318ca73b0278", "files", "md5")

whereas the actual paths did not have leading slashes, as an example baea92ba-aed5-4473-bfb8-318ca73b0278/files/md5/00/14ffd92a6cbf5f2f657067df0d5881a6 which turns into 5-tuple,

("baea92ba-aed5-4473-bfb8-318ca73b0278", "files", "md5", "00", "14ffd92a6cbf5f2f657067df0d5881a6")

and the relative parts will just be last value, which is not a valid oid, and the test would fail.

See https://github.com/iterative/dvc-hdfs/actions/runs/5950643602/job/16139617183#step:6:136

Traceback ```python =================================== FAILURES =================================== ________________________ TestRemote.test_pull_00_prefix ________________________ [gw0] linux -- Python 3.8.17 /opt/hostedtoolcache/Python/3.8.17/x64/bin/python self = tmp_dir = PosixTmpDir('/tmp/pytest-of-runner/pytest-0/popen-gw0/test_pull_00_prefix0') dvc = Repo: '/tmp/pytest-of-runner/pytest-0/popen-gw0/test_pull_00_prefix0' remote = HDFS: 'hdfs://example.com:12345/fcc883be-f362-45f5-a199-55bc4dc98af4' monkeypatch = <_pytest.monkeypatch.MonkeyPatch object at 0x7fb5ec7e7880> @pytest.mark.xfail(raises=NotImplementedError, strict=False) def test_pull_00_prefix(self, tmp_dir, dvc, remote, monkeypatch): # Related: https://github.com/iterative/dvc/issues/6089 fs_type = type(dvc.cloud.get_remote_odb("upstream").fs) monkeypatch.setattr(fs_type, "_ALWAYS_TRAVERSE", True, raising=False) monkeypatch.setattr(fs_type, "LIST_OBJECT_PAGE_SIZE", 256, raising=False) # foo's md5 checksum is 00411460f7c92d2124a67ea0f4cb5f85 # bar's md5 checksum is 0000000018e6137ac2caab16074784a6 foo_out = tmp_dir.dvc_gen("foo", "363")[0].outs[0] bar_out = tmp_dir.dvc_gen("bar", "jk8ssl")[0].outs[0] expected_hashes = {foo_out.hash_info, bar_out.hash_info} dvc.push() status = dvc.cloud.status(expected_hashes) > _check_status(status, ok=expected_hashes) /opt/hostedtoolcache/Python/3.8.17/x64/lib/python3.8/site-packages/dvc/testing/remote_tests.py:134: _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ status = CompareStatusResult(ok=set(), missing=set(), new={HashInfo(name='md5', value='00411460f7c92d2124a67ea0f4cb5f85', obj_name=None), HashInfo(name='md5', value='0000000018e6137ac2caab16074784a6', obj_name=None)}, deleted=set()) kwargs = {'ok': {HashInfo(name='md5', value='00411460f7c92d2124a67ea0f4cb5f85', obj_name=None), HashInfo(name='md5', value='0000000018e6137ac2caab16074784a6', obj_name=None)}} key = 'ok' expected = {HashInfo(name='md5', value='00411460f7c92d2124a67ea0f4cb5f85', obj_name=None), HashInfo(name='md5', value='0000000018e6137ac2caab16074784a6', obj_name=None)} @py_assert6 = set(), @py_assert8 = set(), @py_assert1 = False @py_format10 = "{HashInfo(nam...bj_name=None)} == set()\n~Extra items in the left set:\n~HashInfo(name='md5', value='00411460f7c92d21...16074784a6', obj_name=None),\n~+ HashInfo(name='md5', value='00411460f7c92d2124a67ea0f4cb5f85', obj_name=None),\n~+ }" @py_format12 = "assert {HashInfo(nam...bj_name=None)} == set()\n~Extra items in the left set:\n~HashInfo(name='md5', value='00411460f...16074784a6', obj_name=None),\n~+ HashInfo(name='md5', value='00411460f7c92d2124a67ea0f4cb5f85', obj_name=None),\n~+ }" def _check_status(status, **kwargs): for key in ("ok", "missing", "new", "deleted"): expected = kwargs.get(key, set()) > assert expected == set(getattr(status, key)) E AssertionError: assert {HashInfo(nam...bj_name=None)} == set() E Extra items in the left set: E HashInfo(name='md5', value='00411460f7c92d2124a67ea0f4cb5f85', obj_name=None) E HashInfo(name='md5', value='0000000018e6137ac2caab16074784a6', obj_name=None) E Full diff: E - set(, E - ) E + { E + HashInfo(name='md5', value='0000000018e6137ac2caab16074784a6', obj_name=None), E + HashInfo(name='md5', value='00411460f7c92d2124a67ea0f4cb5f85', obj_name=None), E + } /opt/hostedtoolcache/Python/3.8.17/x64/lib/python3.8/site-packages/dvc/testing/remote_tests.py:16: AssertionError ______________________ TestRemote.test_pull_no_00_prefix _______________________ [gw0] linux -- Python 3.8.17 /opt/hostedtoolcache/Python/3.8.17/x64/bin/python self = tmp_dir = PosixTmpDir('/tmp/pytest-of-runner/pytest-0/popen-gw0/test_pull_no_00_prefix0') dvc = Repo: '/tmp/pytest-of-runner/pytest-0/popen-gw0/test_pull_no_00_prefix0' remote = HDFS: 'hdfs://example.com:12345/69334124-6deb-4777-b123-51eda6f06e79' monkeypatch = <_pytest.monkeypatch.MonkeyPatch object at 0x7fb5ec8449a0> @pytest.mark.xfail(raises=NotImplementedError, strict=False) def test_pull_no_00_prefix(self, tmp_dir, dvc, remote, monkeypatch): # Related: https://github.com/iterative/dvc/issues/6244 fs_type = type(dvc.cloud.get_remote_odb("upstream").fs) monkeypatch.setattr(fs_type, "_ALWAYS_TRAVERSE", True, raising=False) monkeypatch.setattr(fs_type, "LIST_OBJECT_PAGE_SIZE", 256, raising=False) # foo's md5 checksum is 14ffd92a6cbf5f2f657067df0d5881a6 # bar's md5 checksum is 64020400f00960c0ef04052547b134b3 foo_out = tmp_dir.dvc_gen("foo", "dvc")[0].outs[0] bar_out = tmp_dir.dvc_gen("bar", "cml")[0].outs[0] expected_hashes = {foo_out.hash_info, bar_out.hash_info} dvc.push() status = dvc.cloud.status(expected_hashes) > _check_status(status, ok=expected_hashes) /opt/hostedtoolcache/Python/3.8.17/x64/lib/python3.8/site-packages/dvc/testing/remote_tests.py:160: _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ status = CompareStatusResult(ok=set(), missing=set(), new={HashInfo(name='md5', value='14ffd92a6cbf5f2f657067df0d5881a6', obj_name=None), HashInfo(name='md5', value='64020400f00960c0ef04052547b134b3', obj_name=None)}, deleted=set()) kwargs = {'ok': {HashInfo(name='md5', value='14ffd92a6cbf5f2f657067df0d5881a6', obj_name=None), HashInfo(name='md5', value='64020400f00960c0ef04052547b134b3', obj_name=None)}} key = 'ok' expected = {HashInfo(name='md5', value='14ffd92a6cbf5f2f657067df0d5881a6', obj_name=None), HashInfo(name='md5', value='64020400f00960c0ef04052547b134b3', obj_name=None)} @py_assert6 = set(), @py_assert8 = set(), @py_assert1 = False @py_format10 = "{HashInfo(nam...bj_name=None)} == set()\n~Extra items in the left set:\n~HashInfo(name='md5', value='14ffd92a6cbf5f2f...2547b134b3', obj_name=None),\n~+ HashInfo(name='md5', value='14ffd92a6cbf5f2f657067df0d5881a6', obj_name=None),\n~+ }" @py_format12 = "assert {HashInfo(nam...bj_name=None)} == set()\n~Extra items in the left set:\n~HashInfo(name='md5', value='14ffd92a6...2547b134b3', obj_name=None),\n~+ HashInfo(name='md5', value='14ffd92a6cbf5f2f657067df0d5881a6', obj_name=None),\n~+ }" def _check_status(status, **kwargs): for key in ("ok", "missing", "new", "deleted"): expected = kwargs.get(key, set()) > assert expected == set(getattr(status, key)) E AssertionError: assert {HashInfo(nam...bj_name=None)} == set() E Extra items in the left set: E HashInfo(name='md5', value='14ffd92a6cbf5f2f657067df0d5881a6', obj_name=None) E HashInfo(name='md5', value='64020400f00960c0ef04052547b134b3', obj_name=None) E Full diff: E - set(, E - ) E + { E + HashInfo(name='md5', value='64020400f00960c0ef04052547b134b3', obj_name=None), E + HashInfo(name='md5', value='14ffd92a6cbf5f2f657067df0d5881a6', obj_name=None), E + } /opt/hostedtoolcache/Python/3.8.17/x64/lib/python3.8/site-packages/dvc/testing/remote_tests.py:16: AssertionError =========================== short test summary info ============================ SKIPPED [1] dvc_hdfs/tests/test_dvc.py:34: https://github.com/iterative/dvc-hdfs/issues/2 XPASS dvc_hdfs/tests/test_dvc.py::TestRemote::test_stage_cache_push_pull FAILED dvc_hdfs/tests/test_dvc.py::TestRemote::test_pull_00_prefix - Assertio... FAILED dvc_hdfs/tests/test_dvc.py::TestRemote::test_pull_no_00_prefix - Asser... ============== 2 failed, 34 passed, 1 skipped, 1 xpassed in 4.29s ============== ```
skshetry commented 1 year ago

cc @efiop, do you think this is something that should be fixed in dvc-objects?

efiop commented 1 year ago

@skshetry I guess, need to take a closer look.