when i trained the model on cmu datasets with multiple gpus the dataloader function encontered the followed problem. but it's worked with single gpu.
Traceback (most recent call last):
File "run/train_3d.py", line 163, in
main()
File "run/train_3d.py", line 136, in main
train_3d(config, model, optimizer, train_loader, epoch, final_output_dir, writer_dict)
File "/home/gw/Project/voxelpose/lib/core/function.py", line 37, in train_3d
for i, (inputs, targets_2d, weights_2d, targets_3d, meta, input_heatmap) in enumerate(loader):
File "/home/gw/anaconda3/envs/VIBE/lib/python3.7/site-packages/torch/utils/data/dataloader.py", line 435, in next
data = self._next_data()
File "/home/gw/anaconda3/envs/VIBE/lib/python3.7/site-packages/torch/utils/data/dataloader.py", line 1065, in _next_data
return self._process_data(data)
File "/home/gw/anaconda3/envs/VIBE/lib/python3.7/site-packages/torch/utils/data/dataloader.py", line 1111, in _process_data
data.reraise()
File "/home/gw/anaconda3/envs/VIBE/lib/python3.7/site-packages/torch/_utils.py", line 428, in reraise
raise self.exc_type(msg)
RuntimeError: Caught RuntimeError in DataLoader worker process 3.
Original Traceback (most recent call last):
File "/home/gw/anaconda3/envs/VIBE/lib/python3.7/site-packages/torch/utils/data/_utils/worker.py", line 198, in _worker_loop
data = fetcher.fetch(index)
File "/home/gw/anaconda3/envs/VIBE/lib/python3.7/site-packages/torch/utils/data/_utils/fetch.py", line 47, in fetch
return self.collate_fn(data)
File "/home/gw/anaconda3/envs/VIBE/lib/python3.7/site-packages/torch/utils/data/_utils/collate.py", line 83, in default_collate
return [default_collate(samples) for samples in transposed]
File "/home/gw/anaconda3/envs/VIBE/lib/python3.7/site-packages/torch/utils/data/_utils/collate.py", line 83, in
return [default_collate(samples) for samples in transposed]
File "/home/gw/anaconda3/envs/VIBE/lib/python3.7/site-packages/torch/utils/data/_utils/collate.py", line 81, in default_collate
raise RuntimeError('each element in list of batch should be of equal size')
RuntimeError: each element in list of batch should be of equal size
when i trained the model on cmu datasets with multiple gpus the dataloader function encontered the followed problem. but it's worked with single gpu. Traceback (most recent call last): File "run/train_3d.py", line 163, in
main()
File "run/train_3d.py", line 136, in main
train_3d(config, model, optimizer, train_loader, epoch, final_output_dir, writer_dict)
File "/home/gw/Project/voxelpose/lib/core/function.py", line 37, in train_3d
for i, (inputs, targets_2d, weights_2d, targets_3d, meta, input_heatmap) in enumerate(loader):
File "/home/gw/anaconda3/envs/VIBE/lib/python3.7/site-packages/torch/utils/data/dataloader.py", line 435, in next
data = self._next_data()
File "/home/gw/anaconda3/envs/VIBE/lib/python3.7/site-packages/torch/utils/data/dataloader.py", line 1065, in _next_data
return self._process_data(data)
File "/home/gw/anaconda3/envs/VIBE/lib/python3.7/site-packages/torch/utils/data/dataloader.py", line 1111, in _process_data
data.reraise()
File "/home/gw/anaconda3/envs/VIBE/lib/python3.7/site-packages/torch/_utils.py", line 428, in reraise
raise self.exc_type(msg)
RuntimeError: Caught RuntimeError in DataLoader worker process 3.
Original Traceback (most recent call last):
File "/home/gw/anaconda3/envs/VIBE/lib/python3.7/site-packages/torch/utils/data/_utils/worker.py", line 198, in _worker_loop
data = fetcher.fetch(index)
File "/home/gw/anaconda3/envs/VIBE/lib/python3.7/site-packages/torch/utils/data/_utils/fetch.py", line 47, in fetch
return self.collate_fn(data)
File "/home/gw/anaconda3/envs/VIBE/lib/python3.7/site-packages/torch/utils/data/_utils/collate.py", line 83, in default_collate
return [default_collate(samples) for samples in transposed]
File "/home/gw/anaconda3/envs/VIBE/lib/python3.7/site-packages/torch/utils/data/_utils/collate.py", line 83, in
return [default_collate(samples) for samples in transposed]
File "/home/gw/anaconda3/envs/VIBE/lib/python3.7/site-packages/torch/utils/data/_utils/collate.py", line 81, in default_collate
raise RuntimeError('each element in list of batch should be of equal size')
RuntimeError: each element in list of batch should be of equal size