Closed miguelgfierro closed 3 weeks ago
tests/data_validation/recommenders/datasets/test_mind.py ...FFFFFF.F.FFF [100%]
============================================================================================= FAILURES ==============================================================================================
________________________________________ test_mind_url[https://recodatasets.z20.web.core.windows.net/newsrec/MINDsmall_train.zip-52952752-0x8D834F2EB31BDEC] ________________________________________
url = 'https://recodatasets.z20.web.core.windows.net/newsrec/MINDsmall_train.zip', content_length = '52952752', etag = '0x8D834F2EB31BDEC'
@pytest.mark.parametrize(
"url, content_length, etag",
[
(
"https://recodatasets.z20.web.core.windows.net/newsrec/MINDdemo_train.zip",
"17372879",
'"0x8D8B8AD5B233930"',
), # NOTE: the z20 blob returns the etag with ""
(
"https://recodatasets.z20.web.core.windows.net/newsrec/MINDdemo_dev.zip",
"10080022",
'"0x8D8B8AD5B188839"',
),
(
"https://recodatasets.z20.web.core.windows.net/newsrec/MINDdemo_utils.zip",
"97292694",
'"0x8D8B8AD5B126C3B"',
),
(
"https://recodatasets.z20.web.core.windows.net/newsrec/MINDsmall_train.zip",
"52952752",
"0x8D834F2EB31BDEC",
),
(
"https://recodatasets.z20.web.core.windows.net/newsrec/MINDsmall_dev.zip",
"30945572",
"0x8D834F2EBA8D865",
),
(
"https://recodatasets.z20.web.core.windows.net/newsrec/MINDsmall_utils.zip",
"155178106",
"0x8D87F67F4AEB960",
),
(
"https://recodatasets.z20.web.core.windows.net/newsrec/MINDlarge_train.zip",
"530196631",
"0x8D8244E90C15C07",
),
(
"https://recodatasets.z20.web.core.windows.net/newsrec/MINDlarge_dev.zip",
"103456245",
"0x8D8244E92005849",
),
(
"https://recodatasets.z20.web.core.windows.net/newsrec/MINDlarge_utils.zip",
"150359301",
"0x8D87F67E6CA4364",
),
],
)
def test_mind_url(url, content_length, etag):
url_headers = requests.head(url).headers
> assert url_headers["Content-Length"] == content_length
E AssertionError: assert '52953372' == '52952752'
E
E - 52952752
E + 52953372
tests/data_validation/recommenders/datasets/test_mind.py:63: AssertionError
_________________________________________ test_mind_url[https://recodatasets.z20.web.core.windows.net/newsrec/MINDsmall_dev.zip-30945572-0x8D834F2EBA8D865] _________________________________________
url = 'https://recodatasets.z20.web.core.windows.net/newsrec/MINDsmall_dev.zip', content_length = '30945572', etag = '0x8D834F2EBA8D865'
@pytest.mark.parametrize(
"url, content_length, etag",
[
(
"https://recodatasets.z20.web.core.windows.net/newsrec/MINDdemo_train.zip",
"17372879",
'"0x8D8B8AD5B233930"',
), # NOTE: the z20 blob returns the etag with ""
(
"https://recodatasets.z20.web.core.windows.net/newsrec/MINDdemo_dev.zip",
"10080022",
'"0x8D8B8AD5B188839"',
),
(
"https://recodatasets.z20.web.core.windows.net/newsrec/MINDdemo_utils.zip",
"97292694",
'"0x8D8B8AD5B126C3B"',
),
(
"https://recodatasets.z20.web.core.windows.net/newsrec/MINDsmall_train.zip",
"52952752",
"0x8D834F2EB31BDEC",
),
(
"https://recodatasets.z20.web.core.windows.net/newsrec/MINDsmall_dev.zip",
"30945572",
"0x8D834F2EBA8D865",
),
(
"https://recodatasets.z20.web.core.windows.net/newsrec/MINDsmall_utils.zip",
"155178106",
"0x8D87F67F4AEB960",
),
(
"https://recodatasets.z20.web.core.windows.net/newsrec/MINDlarge_train.zip",
"530196631",
"0x8D8244E90C15C07",
),
(
"https://recodatasets.z20.web.core.windows.net/newsrec/MINDlarge_dev.zip",
"103456245",
"0x8D8244E92005849",
),
(
"https://recodatasets.z20.web.core.windows.net/newsrec/MINDlarge_utils.zip",
"150359301",
"0x8D87F67E6CA4364",
),
],
)
def test_mind_url(url, content_length, etag):
url_headers = requests.head(url).headers
> assert url_headers["Content-Length"] == content_length
E AssertionError: assert '30946172' == '30945572'
E
E - 30945572
E ? ^^
E + 30946172
E ? ^^
tests/data_validation/recommenders/datasets/test_mind.py:63: AssertionError
_______________________________________ test_mind_url[https://recodatasets.z20.web.core.windows.net/newsrec/MINDsmall_utils.zip-155178106-0x8D87F67F4AEB960] ________________________________________
url = 'https://recodatasets.z20.web.core.windows.net/newsrec/MINDsmall_utils.zip', content_length = '155178106', etag = '0x8D87F67F4AEB960'
@pytest.mark.parametrize(
"url, content_length, etag",
[
(
"https://recodatasets.z20.web.core.windows.net/newsrec/MINDdemo_train.zip",
"17372879",
'"0x8D8B8AD5B233930"',
), # NOTE: the z20 blob returns the etag with ""
(
"https://recodatasets.z20.web.core.windows.net/newsrec/MINDdemo_dev.zip",
"10080022",
'"0x8D8B8AD5B188839"',
),
(
"https://recodatasets.z20.web.core.windows.net/newsrec/MINDdemo_utils.zip",
"97292694",
'"0x8D8B8AD5B126C3B"',
),
(
"https://recodatasets.z20.web.core.windows.net/newsrec/MINDsmall_train.zip",
"52952752",
"0x8D834F2EB31BDEC",
),
(
"https://recodatasets.z20.web.core.windows.net/newsrec/MINDsmall_dev.zip",
"30945572",
"0x8D834F2EBA8D865",
),
(
"https://recodatasets.z20.web.core.windows.net/newsrec/MINDsmall_utils.zip",
"155178106",
"0x8D87F67F4AEB960",
),
(
"https://recodatasets.z20.web.core.windows.net/newsrec/MINDlarge_train.zip",
"530196631",
"0x8D8244E90C15C07",
),
(
"https://recodatasets.z20.web.core.windows.net/newsrec/MINDlarge_dev.zip",
"103456245",
"0x8D8244E92005849",
),
(
"https://recodatasets.z20.web.core.windows.net/newsrec/MINDlarge_utils.zip",
"150359301",
"0x8D87F67E6CA4364",
),
],
)
def test_mind_url(url, content_length, etag):
url_headers = requests.head(url).headers
assert url_headers["Content-Length"] == content_length
> assert url_headers["ETag"] == etag
E assert '"0x8D8B8AD5B3677C6"' == '0x8D87F67F4AEB960'
E
E - 0x8D87F67F4AEB960
E + "0x8D8B8AD5B3677C6"
tests/data_validation/recommenders/datasets/test_mind.py:64: AssertionError
_______________________________________ test_mind_url[https://recodatasets.z20.web.core.windows.net/newsrec/MINDlarge_train.zip-530196631-0x8D8244E90C15C07] ________________________________________
url = 'https://recodatasets.z20.web.core.windows.net/newsrec/MINDlarge_train.zip', content_length = '530196631', etag = '0x8D8244E90C15C07'
@pytest.mark.parametrize(
"url, content_length, etag",
[
(
"https://recodatasets.z20.web.core.windows.net/newsrec/MINDdemo_train.zip",
"17372879",
'"0x8D8B8AD5B233930"',
), # NOTE: the z20 blob returns the etag with ""
(
"https://recodatasets.z20.web.core.windows.net/newsrec/MINDdemo_dev.zip",
"10080022",
'"0x8D8B8AD5B188839"',
),
(
"https://recodatasets.z20.web.core.windows.net/newsrec/MINDdemo_utils.zip",
"97292694",
'"0x8D8B8AD5B126C3B"',
),
(
"https://recodatasets.z20.web.core.windows.net/newsrec/MINDsmall_train.zip",
"52952752",
"0x8D834F2EB31BDEC",
),
(
"https://recodatasets.z20.web.core.windows.net/newsrec/MINDsmall_dev.zip",
"30945572",
"0x8D834F2EBA8D865",
),
(
"https://recodatasets.z20.web.core.windows.net/newsrec/MINDsmall_utils.zip",
"155178106",
"0x8D87F67F4AEB960",
),
(
"https://recodatasets.z20.web.core.windows.net/newsrec/MINDlarge_train.zip",
"530196631",
"0x8D8244E90C15C07",
),
(
"https://recodatasets.z20.web.core.windows.net/newsrec/MINDlarge_dev.zip",
"103456245",
"0x8D8244E92005849",
),
(
"https://recodatasets.z20.web.core.windows.net/newsrec/MINDlarge_utils.zip",
"150359301",
"0x8D87F67E6CA4364",
),
],
)
def test_mind_url(url, content_length, etag):
url_headers = requests.head(url).headers
> assert url_headers["Content-Length"] == content_length
E AssertionError: assert '531361237' == '530196631'
E
E - 530196631
E + 531361237
tests/data_validation/recommenders/datasets/test_mind.py:63: AssertionError
________________________________________ test_mind_url[https://recodatasets.z20.web.core.windows.net/newsrec/MINDlarge_dev.zip-103456245-0x8D8244E92005849] _________________________________________
url = 'https://recodatasets.z20.web.core.windows.net/newsrec/MINDlarge_dev.zip', content_length = '103456245', etag = '0x8D8244E92005849'
@pytest.mark.parametrize(
"url, content_length, etag",
[
(
"https://recodatasets.z20.web.core.windows.net/newsrec/MINDdemo_train.zip",
"17372879",
'"0x8D8B8AD5B233930"',
), # NOTE: the z20 blob returns the etag with ""
(
"https://recodatasets.z20.web.core.windows.net/newsrec/MINDdemo_dev.zip",
"10080022",
'"0x8D8B8AD5B188839"',
),
(
"https://recodatasets.z20.web.core.windows.net/newsrec/MINDdemo_utils.zip",
"97292694",
'"0x8D8B8AD5B126C3B"',
),
(
"https://recodatasets.z20.web.core.windows.net/newsrec/MINDsmall_train.zip",
"52952752",
"0x8D834F2EB31BDEC",
),
(
"https://recodatasets.z20.web.core.windows.net/newsrec/MINDsmall_dev.zip",
"30945572",
"0x8D834F2EBA8D865",
),
(
"https://recodatasets.z20.web.core.windows.net/newsrec/MINDsmall_utils.zip",
"155178106",
"0x8D87F67F4AEB960",
),
(
"https://recodatasets.z20.web.core.windows.net/newsrec/MINDlarge_train.zip",
"530196631",
"0x8D8244E90C15C07",
),
(
"https://recodatasets.z20.web.core.windows.net/newsrec/MINDlarge_dev.zip",
"103456245",
"0x8D8244E92005849",
),
(
"https://recodatasets.z20.web.core.windows.net/newsrec/MINDlarge_utils.zip",
"150359301",
"0x8D87F67E6CA4364",
),
],
)
def test_mind_url(url, content_length, etag):
url_headers = requests.head(url).headers
> assert url_headers["Content-Length"] == content_length
E AssertionError: assert '103593383' == '103456245'
E
E - 103456245
E + 103593383
tests/data_validation/recommenders/datasets/test_mind.py:63: AssertionError
_______________________________________ test_mind_url[https://recodatasets.z20.web.core.windows.net/newsrec/MINDlarge_utils.zip-150359301-0x8D87F67E6CA4364] ________________________________________
url = 'https://recodatasets.z20.web.core.windows.net/newsrec/MINDlarge_utils.zip', content_length = '150359301', etag = '0x8D87F67E6CA4364'
@pytest.mark.parametrize(
"url, content_length, etag",
[
(
"https://recodatasets.z20.web.core.windows.net/newsrec/MINDdemo_train.zip",
"17372879",
'"0x8D8B8AD5B233930"',
), # NOTE: the z20 blob returns the etag with ""
(
"https://recodatasets.z20.web.core.windows.net/newsrec/MINDdemo_dev.zip",
"10080022",
'"0x8D8B8AD5B188839"',
),
(
"https://recodatasets.z20.web.core.windows.net/newsrec/MINDdemo_utils.zip",
"97292694",
'"0x8D8B8AD5B126C3B"',
),
(
"https://recodatasets.z20.web.core.windows.net/newsrec/MINDsmall_train.zip",
"52952752",
"0x8D834F2EB31BDEC",
),
(
"https://recodatasets.z20.web.core.windows.net/newsrec/MINDsmall_dev.zip",
"30945572",
"0x8D834F2EBA8D865",
),
(
"https://recodatasets.z20.web.core.windows.net/newsrec/MINDsmall_utils.zip",
"155178106",
"0x8D87F67F4AEB960",
),
(
"https://recodatasets.z20.web.core.windows.net/newsrec/MINDlarge_train.zip",
"530196631",
"0x8D8244E90C15C07",
),
(
"https://recodatasets.z20.web.core.windows.net/newsrec/MINDlarge_dev.zip",
"103456245",
"0x8D8244E92005849",
),
(
"https://recodatasets.z20.web.core.windows.net/newsrec/MINDlarge_utils.zip",
"150359301",
"0x8D87F67E6CA4364",
),
],
)
def test_mind_url(url, content_length, etag):
url_headers = requests.head(url).headers
assert url_headers["Content-Length"] == content_length
> assert url_headers["ETag"] == etag
E assert '"0x8D8B8AD5B2ED4C9"' == '0x8D87F67E6CA4364'
E
E - 0x8D87F67E6CA4364
E + "0x8D8B8AD5B2ED4C9"
tests/data_validation/recommenders/datasets/test_mind.py:64: AssertionError
_____________________________________________________________________________________ test_download_mind_small ______________________________________________________________________________________
tmp = '/tmp/pytest-of-u/pytest-23/tmp77ep29qd'
def test_download_mind_small(tmp):
train_path, valid_path = download_mind(size="small", dest_path=tmp)
statinfo = os.stat(train_path)
> assert statinfo.st_size == 52952752
E assert 52953372 == 52952752
E + where 52953372 = os.stat_result(st_mode=33188, st_ino=225215, st_dev=2080, st_nlink=1, st_uid=1000, st_gid=1000, st_size=52953372, st_atime=1723823550, st_mtime=1723823618, st_ctime=1723823618).st_size
tests/data_validation/recommenders/datasets/test_mind.py:78: AssertionError
--------------------------------------------------------------------------------------- Captured stderr call ----------------------------------------------------------------------------------------
100%|██████████| 51.7k/51.7k [01:08<00:00, 760KB/s]
100%|██████████| 30.2k/30.2k [00:33<00:00, 892KB/s]
______________________________________________________________________________________ test_extract_mind_small ______________________________________________________________________________________
tmp = '/tmp/pytest-of-u/pytest-23/tmptnfvzyfb'
def test_extract_mind_small(tmp):
train_zip, valid_zip = download_mind(size="small", dest_path=tmp)
train_path, valid_path = extract_mind(train_zip, valid_zip, clean_zip_file=False)
> statinfo = os.stat(os.path.join(train_path, "behaviors.tsv"))
E FileNotFoundError: [Errno 2] No such file or directory: 'MINDsmall_train.zip/train/behaviors.tsv'
tests/data_validation/recommenders/datasets/test_mind.py:109: FileNotFoundError
--------------------------------------------------------------------------------------- Captured stderr call ----------------------------------------------------------------------------------------
100%|██████████| 51.7k/51.7k [01:09<00:00, 749KB/s]
100%|██████████| 30.2k/30.2k [00:30<00:00, 977KB/s]
_____________________________________________________________________________________ test_download_mind_large ______________________________________________________________________________________
tmp_path = PosixPath('/tmp/pytest-of-u/pytest-23/test_download_mind_large0')
def test_download_mind_large(tmp_path):
train_path, valid_path = download_mind(size="large", dest_path=tmp_path)
statinfo = os.stat(train_path)
> assert statinfo.st_size == 530196631
E assert 531361237 == 530196631
E + where 531361237 = os.stat_result(st_mode=33188, st_ino=225216, st_dev=2080, st_nlink=1, st_uid=1000, st_gid=1000, st_size=531361237, st_atime=1723823807, st_mtime=1723824456, st_ctime=1723824456).st_size
tests/data_validation/recommenders/datasets/test_mind.py:130: AssertionError
--------------------------------------------------------------------------------------- Captured stderr call ----------------------------------------------------------------------------------------
100%|██████████| 519k/519k [10:49<00:00, 799KB/s]
100%|██████████| 101k/101k [02:11<00:00, 769KB/s]
______________________________________________________________________________________ test_extract_mind_large ______________________________________________________________________________________
tmp = '/tmp/pytest-of-u/pytest-23/tmppfvp4z4c'
def test_extract_mind_large(tmp):
train_zip, valid_zip = download_mind(size="large", dest_path=tmp)
train_path, valid_path = extract_mind(train_zip, valid_zip)
> statinfo = os.stat(os.path.join(train_path, "behaviors.tsv"))
E FileNotFoundError: [Errno 2] No such file or directory: 'MINDlarge_train.zip/train/behaviors.tsv'
tests/data_validation/recommenders/datasets/test_mind.py:139: FileNotFoundError
--------------------------------------------------------------------------------------- Captured stderr call ----------------------------------------------------------------------------------------
100%|██████████| 519k/519k [12:01<00:00, 719KB/s]
100%|██████████| 101k/101k [02:08<00:00, 786KB/s]
====================================================================================== short test summary info ======================================================================================
FAILED tests/data_validation/recommenders/datasets/test_mind.py::test_mind_url[https://recodatasets.z20.web.core.windows.net/newsrec/MINDsmall_train.zip-52952752-0x8D834F2EB31BDEC] - AssertionError: assert '52953372' == '52952752'
FAILED tests/data_validation/recommenders/datasets/test_mind.py::test_mind_url[https://recodatasets.z20.web.core.windows.net/newsrec/MINDsmall_dev.zip-30945572-0x8D834F2EBA8D865] - AssertionError: assert '30946172' == '30945572'
FAILED tests/data_validation/recommenders/datasets/test_mind.py::test_mind_url[https://recodatasets.z20.web.core.windows.net/newsrec/MINDsmall_utils.zip-155178106-0x8D87F67F4AEB960] - assert '"0x8D8B8AD5B3677C6"' == '0x8D87F67F4AEB960'
FAILED tests/data_validation/recommenders/datasets/test_mind.py::test_mind_url[https://recodatasets.z20.web.core.windows.net/newsrec/MINDlarge_train.zip-530196631-0x8D8244E90C15C07] - AssertionError: assert '531361237' == '530196631'
FAILED tests/data_validation/recommenders/datasets/test_mind.py::test_mind_url[https://recodatasets.z20.web.core.windows.net/newsrec/MINDlarge_dev.zip-103456245-0x8D8244E92005849] - AssertionError: assert '103593383' == '103456245'
FAILED tests/data_validation/recommenders/datasets/test_mind.py::test_mind_url[https://recodatasets.z20.web.core.windows.net/newsrec/MINDlarge_utils.zip-150359301-0x8D87F67E6CA4364] - assert '"0x8D8B8AD5B2ED4C9"' == '0x8D87F67E6CA4364'
FAILED tests/data_validation/recommenders/datasets/test_mind.py::test_download_mind_small - assert 52953372 == 52952752
FAILED tests/data_validation/recommenders/datasets/test_mind.py::test_extract_mind_small - FileNotFoundError: [Errno 2] No such file or directory: 'MINDsmall_train.zip/train/behaviors.tsv'
FAILED tests/data_validation/recommenders/datasets/test_mind.py::test_download_mind_large - assert 531361237 == 530196631
FAILED tests/data_validation/recommenders/datasets/test_mind.py::test_extract_mind_large - FileNotFoundError: [Errno 2] No such file or directory: 'MINDlarge_train.zip/train/behaviors.tsv'
============================================================================= 10 failed, 5 passed in 2020.59s (0:33:40) ======================================================================
Description
Small uploaded, large still not found.
This will fix also the tests
Related Issues
2133
References
Checklist:
git commit -s -m "your commit message"
.staging branch
AND NOT TOmain branch
.