Traceback (most recent call last):
File "/home/cc/ffcv-imagenet/write_imagenet_web.py", line 67, in <module>
main()
File "/home/cc/miniconda3/envs/ffcv/lib/python3.9/site-packages/fastargs/decorators.py", line 41, in __call__
raise e
File "/home/cc/miniconda3/envs/ffcv/lib/python3.9/site-packages/fastargs/decorators.py", line 35, in __call__
return self.func(*args, **filled_args)
File "/home/cc/ffcv-imagenet/write_imagenet_web.py", line 58, in main
writer.from_webdataset(my_shards, pipeline=pipeline)
File "/home/cc/miniconda3/envs/ffcv/lib/python3.9/site-packages/ffcv/writer.py", line 311, in from_webdataset
lengths = thread_map(counter, shards, max_workers=self.num_workers)
File "/home/cc/miniconda3/envs/ffcv/lib/python3.9/site-packages/tqdm/contrib/concurrent.py", line 94, in thread_map
return _executor_map(ThreadPoolExecutor, fn, *iterables, **tqdm_kwargs)
File "/home/cc/miniconda3/envs/ffcv/lib/python3.9/site-packages/tqdm/contrib/concurrent.py", line 76, in _executor_map
return list(tqdm_class(ex.map(fn, *iterables, **map_args), **kwargs))
File "/home/cc/miniconda3/envs/ffcv/lib/python3.9/site-packages/tqdm/std.py", line 1195, in __iter__
for obj in iterable:
File "/home/cc/miniconda3/envs/ffcv/lib/python3.9/concurrent/futures/_base.py", line 609, in result_iterator
yield fs.pop().result()
File "/home/cc/miniconda3/envs/ffcv/lib/python3.9/concurrent/futures/_base.py", line 446, in result
return self.__get_result()
File "/home/cc/miniconda3/envs/ffcv/lib/python3.9/concurrent/futures/_base.py", line 391, in __get_result
raise self._exception
File "/home/cc/miniconda3/envs/ffcv/lib/python3.9/concurrent/futures/thread.py", line 58, in run
result = self.fn(*self.args, **self.kwargs)
File "/home/cc/miniconda3/envs/ffcv/lib/python3.9/site-packages/ffcv/writer.py", line 36, in count_samples_in_shard
for _ in from_shard(shard, pipeline):
TypeError: 'NoneType' object is not iterable
it looks like an error with either the WebDataset or the pipeline (from writer.py)
def from_shard(shard, pipeline):
# We import webdataset here so that it desn't crash if it's not required
# (Webdataset is an optional depdency)
from webdataset import WebDataset
dataset = WebDataset(shard)
dataset = pipeline(dataset)
return dataset
def count_samples_in_shard(shard, pipeline):
#
# We count the length of the dataset
# We are not using __len__ since it might not be implemented
count = 0
print(shard)
for _ in from_shard(shard, pipeline):
count += 1
return count
what could cause the dataset to return None? I double checked that the glob is correct and returns the right paths to the tar files when called. I'm kinda at a loss here.
Hi @codestar12 ! Sorry for the late response here---is this resolved? If not, can you double check that you're able to iterate over the WebDataset itself (i.e., completely independently of FFCV)?
Hello, I am trying to convert a imagenet sharded as webdataset tar files into an ffcv dataset.
I downloaded the ffcv-imagnet repo https://github.com/libffcv/ffcv-imagenet
and modified it as best as I could tell from the webdataset tutorial
I am getting an error
it looks like an error with either the WebDataset or the pipeline (from writer.py)
what could cause the dataset to return None? I double checked that the glob is correct and returns the right paths to the tar files when called. I'm kinda at a loss here.