model = DistributedDataParallel(
File "/opt/conda/envs/pytorch/lib/python3.8/site-packages/torch/nn/parallel/distributed.py", line 648, in init
_sync_module_states(
File "/opt/conda/envs/pytorch/lib/python3.8/site-packages/torch/distributed/utils.py", line 113, in _sync_module_states
_sync_params_and_buffers(
File "/opt/conda/envs/pytorch/lib/python3.8/site-packages/torch/distributed/utils.py", line 131, in _sync_params_and_buffers
dist._broadcast_coalesced(
RuntimeError: The size of tensor a (513) must match the size of tensor b (0) at non-singleton dimension 1
command
python train_net.py --num-gpus 8 --config-file configs/test/infer_coco.yaml MODEL.WEIGHTS \ my/path/res50_fpn_soco_star_400.
pklpthbug
model = DistributedDataParallel( File "/opt/conda/envs/pytorch/lib/python3.8/site-packages/torch/nn/parallel/distributed.py", line 648, in init _sync_module_states( File "/opt/conda/envs/pytorch/lib/python3.8/site-packages/torch/distributed/utils.py", line 113, in _sync_module_states _sync_params_and_buffers( File "/opt/conda/envs/pytorch/lib/python3.8/site-packages/torch/distributed/utils.py", line 131, in _sync_params_and_buffers dist._broadcast_coalesced( RuntimeError: The size of tensor a (513) must match the size of tensor b (0) at non-singleton dimension 1
some requests
lvis_v1_train_norare.json
and otherpkl
files.