I am downloading a dataset through:
from datasets import load_dataset, DatasetDict
dataset = load_dataset("nyu-visionx/Cambrian-Alignment",cache_dir="~\bucket-llm-1\datasets", download_mode='force_redownload')
.
The error message is:
Downloading data: 91%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 26.3G/28.8G [21:12<10:59:50, 61.9kB/s]Traceback (most recent call last):
File "", line 1, in
File "/home/anguoyuan111/.local/lib/python3.10/site-packages/datasets/load.py", line 2628, in load_dataset
builder_instance.download_and_prepare(
File "/home/anguoyuan111/.local/lib/python3.10/site-packages/datasets/builder.py", line 1029, in download_and_prepare
self._download_and_prepare(
File "/home/anguoyuan111/.local/lib/python3.10/site-packages/datasets/builder.py", line 1791, in _download_and_prepare
super()._download_and_prepare(
File "/home/anguoyuan111/.local/lib/python3.10/site-packages/datasets/builder.py", line 1102, in _download_and_prepare
split_generators = self._split_generators(dl_manager, **split_generators_kwargs)
File "/home/anguoyuan111/.local/lib/python3.10/site-packages/datasets/packaged_modules/webdataset/webdataset.py", line 61, in _split_generators
data_files = dl_manager.download(self.config.data_files)
File "/home/anguoyuan111/.local/lib/python3.10/site-packages/datasets/download/download_manager.py", line 257, in download
downloaded_path_or_paths = map_nested(
File "/home/anguoyuan111/.local/lib/python3.10/site-packages/datasets/utils/py_utils.py", line 494, in map_nested
mapped = [
File "/home/anguoyuan111/.local/lib/python3.10/site-packages/datasets/utils/py_utils.py", line 495, in
map_nested(
File "/home/anguoyuan111/.local/lib/python3.10/site-packages/datasets/utils/py_utils.py", line 511, in map_nested
mapped = [
File "/home/anguoyuan111/.local/lib/python3.10/site-packages/datasets/utils/py_utils.py", line 512, in
_single_map_nested((function, obj, batched, batch_size, types, None, True, None))
File "/home/anguoyuan111/.local/lib/python3.10/site-packages/datasets/utils/py_utils.py", line 380, in _single_map_nested
return [mapped_item for batch in iter_batched(data_struct, batch_size) for mapped_item in function(batch)]
File "/home/anguoyuan111/.local/lib/python3.10/site-packages/datasets/utils/py_utils.py", line 380, in
return [mapped_item for batch in iter_batched(data_struct, batch_size) for mapped_item in function(batch)]
File "/home/anguoyuan111/.local/lib/python3.10/site-packages/datasets/download/download_manager.py", line 313, in _download_batched
return [
File "/home/anguoyuan111/.local/lib/python3.10/site-packages/datasets/download/download_manager.py", line 314, in
self._download_single(url_or_filename, download_config=download_config)
File "/home/anguoyuan111/.local/lib/python3.10/site-packages/datasets/download/download_manager.py", line 323, in _download_single
out = cached_path(url_or_filename, download_config=download_config)
File "/home/anguoyuan111/.local/lib/python3.10/site-packages/datasets/utils/file_utils.py", line 211, in cached_path
output_path = get_from_cache(
File "/home/anguoyuan111/.local/lib/python3.10/site-packages/datasets/utils/file_utils.py", line 689, in get_from_cache
fsspec_get(
File "/home/anguoyuan111/.local/lib/python3.10/site-packages/datasets/utils/file_utils.py", line 395, in fsspec_get
fs.get_file(path, temp_file.name, callback=callback)
File "/home/anguoyuan111/.local/lib/python3.10/site-packages/huggingface_hub/hf_file_system.py", line 648, in get_file
http_get(
File "/home/anguoyuan111/.local/lib/python3.10/site-packages/huggingface_hub/file_download.py", line 578, in http_get
raise EnvironmentError(
OSError: Consistency check failed: file should be of size 28754810880 but has size 26303841200 ((…)be6153f2e0126fa516d57cc791b3/sbu558k.tar).
We are sorry for the inconvenience. Please retry with force_download=True.
If the issue persists, please let us know by opening an issue on https://github.com/huggingface/huggingface_hub.
Describe the bug
I am downloading a dataset through: from datasets import load_dataset, DatasetDict dataset = load_dataset("nyu-visionx/Cambrian-Alignment",cache_dir="~\bucket-llm-1\datasets", download_mode='force_redownload') .
The error message is: Downloading data: 91%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 26.3G/28.8G [21:12<10:59:50, 61.9kB/s]Traceback (most recent call last): File "", line 1, in
File "/home/anguoyuan111/.local/lib/python3.10/site-packages/datasets/load.py", line 2628, in load_dataset
builder_instance.download_and_prepare(
File "/home/anguoyuan111/.local/lib/python3.10/site-packages/datasets/builder.py", line 1029, in download_and_prepare
self._download_and_prepare(
File "/home/anguoyuan111/.local/lib/python3.10/site-packages/datasets/builder.py", line 1791, in _download_and_prepare
super()._download_and_prepare(
File "/home/anguoyuan111/.local/lib/python3.10/site-packages/datasets/builder.py", line 1102, in _download_and_prepare
split_generators = self._split_generators(dl_manager, **split_generators_kwargs)
File "/home/anguoyuan111/.local/lib/python3.10/site-packages/datasets/packaged_modules/webdataset/webdataset.py", line 61, in _split_generators
data_files = dl_manager.download(self.config.data_files)
File "/home/anguoyuan111/.local/lib/python3.10/site-packages/datasets/download/download_manager.py", line 257, in download
downloaded_path_or_paths = map_nested(
File "/home/anguoyuan111/.local/lib/python3.10/site-packages/datasets/utils/py_utils.py", line 494, in map_nested
mapped = [
File "/home/anguoyuan111/.local/lib/python3.10/site-packages/datasets/utils/py_utils.py", line 495, in
map_nested(
File "/home/anguoyuan111/.local/lib/python3.10/site-packages/datasets/utils/py_utils.py", line 511, in map_nested
mapped = [
File "/home/anguoyuan111/.local/lib/python3.10/site-packages/datasets/utils/py_utils.py", line 512, in
_single_map_nested((function, obj, batched, batch_size, types, None, True, None))
File "/home/anguoyuan111/.local/lib/python3.10/site-packages/datasets/utils/py_utils.py", line 380, in _single_map_nested
return [mapped_item for batch in iter_batched(data_struct, batch_size) for mapped_item in function(batch)]
File "/home/anguoyuan111/.local/lib/python3.10/site-packages/datasets/utils/py_utils.py", line 380, in
return [mapped_item for batch in iter_batched(data_struct, batch_size) for mapped_item in function(batch)]
File "/home/anguoyuan111/.local/lib/python3.10/site-packages/datasets/download/download_manager.py", line 313, in _download_batched
return [
File "/home/anguoyuan111/.local/lib/python3.10/site-packages/datasets/download/download_manager.py", line 314, in
self._download_single(url_or_filename, download_config=download_config)
File "/home/anguoyuan111/.local/lib/python3.10/site-packages/datasets/download/download_manager.py", line 323, in _download_single
out = cached_path(url_or_filename, download_config=download_config)
File "/home/anguoyuan111/.local/lib/python3.10/site-packages/datasets/utils/file_utils.py", line 211, in cached_path
output_path = get_from_cache(
File "/home/anguoyuan111/.local/lib/python3.10/site-packages/datasets/utils/file_utils.py", line 689, in get_from_cache
fsspec_get(
File "/home/anguoyuan111/.local/lib/python3.10/site-packages/datasets/utils/file_utils.py", line 395, in fsspec_get
fs.get_file(path, temp_file.name, callback=callback)
File "/home/anguoyuan111/.local/lib/python3.10/site-packages/huggingface_hub/hf_file_system.py", line 648, in get_file
http_get(
File "/home/anguoyuan111/.local/lib/python3.10/site-packages/huggingface_hub/file_download.py", line 578, in http_get
raise EnvironmentError(
OSError: Consistency check failed: file should be of size 28754810880 but has size 26303841200 ((…)be6153f2e0126fa516d57cc791b3/sbu558k.tar).
We are sorry for the inconvenience. Please retry with
force_download=True
. If the issue persists, please let us know by opening an issue on https://github.com/huggingface/huggingface_hub.Reproduction
No response
Logs
No response
System info