[NeurIPS 2020] This project provides a strong single-stage baseline for Long-Tailed Classification, Detection, and Instance Segmentation (LVIS). It is also a PyTorch implementation of the NeurIPS 2020 paper 'Long-Tailed Classification by Keeping the Good and Removing the Bad Momentum Causal Effect'.
GNU General Public License v3.0
560
stars
68
forks
source link
LVIS training bug: TypeError: can't pickle _thread.RLock objects #23
**Error traceback**
If applicable, paste the error trackback here.
2020-11-14 20:28:12,249 - mmdet - INFO - workflow: [('train', 1)], max: 12 epochs
Traceback (most recent call last):
File "./tools/train.py", line 177, in
main()
File "./tools/train.py", line 173, in main
meta=meta)
File "/data/cdp_algo_ceph_ssd/users/georgeni/causallvis/mmdet/apis/train.py", line 143, in train_detector
runner.run(data_loaders, cfg.workflow, cfg.total_epochs)
File "/data/anaconda3/envs/zxcheng/lib/python3.6/site-packages/mmcv/runner/epoch_based_runner.py", line 122, in run
epoch_runner(data_loaders[i], kwargs)
File "/data/anaconda3/envs/zxcheng/lib/python3.6/site-packages/mmcv/runner/epoch_based_runner.py", line 27, in train
for i, data_batch in enumerate(self.data_loader):
File "/data/anaconda3/envs/zxcheng/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 279, in iter
return _MultiProcessingDataLoaderIter(self)
File "/data/anaconda3/envs/zxcheng/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 719, in init
w.start()
File "/data/anaconda3/envs/zxcheng/lib/python3.6/multiprocessing/process.py", line 105, in start
self._popen = self._Popen(self)
File "/data/anaconda3/envs/zxcheng/lib/python3.6/multiprocessing/context.py", line 223, in _Popen
return _default_context.get_context().Process._Popen(process_obj)
File "/data/anaconda3/envs/zxcheng/lib/python3.6/multiprocessing/context.py", line 284, in _Popen
return Popen(process_obj)
File "/data/anaconda3/envs/zxcheng/lib/python3.6/multiprocessing/popen_spawn_posix.py", line 32, in init
super().init(process_obj)
File "/data/anaconda3/envs/zxcheng/lib/python3.6/multiprocessing/popen_fork.py", line 19, in init
self._launch(process_obj)
File "/data/anaconda3/envs/zxcheng/lib/python3.6/multiprocessing/popen_spawn_posix.py", line 47, in _launch
reduction.dump(process_obj, fp)
File "/data/anaconda3/envs/zxcheng/lib/python3.6/multiprocessing/reduction.py", line 60, in dump
ForkingPickler(file, protocol).dump(obj)
TypeError: can't pickle _thread.RLock objects
Traceback (most recent call last):
File "./tools/train.py", line 177, in
main()
File "./tools/train.py", line 173, in main
meta=meta)
File "/data/cdp_algo_ceph_ssd/users/georgeni/causallvis/mmdet/apis/train.py", line 143, in train_detector
runner.run(data_loaders, cfg.workflow, cfg.total_epochs)
File "/data/anaconda3/envs/zxcheng/lib/python3.6/site-packages/mmcv/runner/epoch_based_runner.py", line 122, in run
epoch_runner(data_loaders[i], kwargs)
File "/data/anaconda3/envs/zxcheng/lib/python3.6/site-packages/mmcv/runner/epoch_based_runner.py", line 27, in train
for i, data_batch in enumerate(self.data_loader):
File "/data/anaconda3/envs/zxcheng/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 279, in iter
return _MultiProcessingDataLoaderIter(self)
File "/data/anaconda3/envs/zxcheng/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 719, in init
w.start()
File "/data/anaconda3/envs/zxcheng/lib/python3.6/multiprocessing/process.py", line 105, in start
self._popen = self._Popen(self)
File "/data/anaconda3/envs/zxcheng/lib/python3.6/multiprocessing/context.py", line 223, in _Popen
return _default_context.get_context().Process._Popen(process_obj)
File "/data/anaconda3/envs/zxcheng/lib/python3.6/multiprocessing/context.py", line 284, in _Popen
return Popen(process_obj)
File "/data/anaconda3/envs/zxcheng/lib/python3.6/multiprocessing/popen_spawn_posix.py", line 32, in init
super().init(process_obj)
File "/data/anaconda3/envs/zxcheng/lib/python3.6/multiprocessing/popen_fork.py", line 19, in init
self._launch(process_obj)
File "/data/anaconda3/envs/zxcheng/lib/python3.6/multiprocessing/popen_spawn_posix.py", line 47, in _launch
reduction.dump(process_obj, fp)
File "/data/anaconda3/envs/zxcheng/lib/python3.6/multiprocessing/reduction.py", line 60, in dump
ForkingPickler(file, protocol).dump(obj)
TypeError: can't pickle _thread.RLock objects
^C^C^C^C^C^C^C^C^C^C^C^C^CTraceback (most recent call last):
File "/data/anaconda3/envs/zxcheng/lib/python3.6/runpy.py", line 193, in _run_module_as_main
"main", mod_spec)
File "/data/anaconda3/envs/zxcheng/lib/python3.6/runpy.py", line 85, in _run_code
exec(code, run_globals)
File "/data/anaconda3/envs/zxcheng/lib/python3.6/site-packages/torch/distributed/launch.py", line 263, in
main()
File "/data/anaconda3/envs/zxcheng/lib/python3.6/site-packages/torch/distributed/launch.py", line 259, in main
cmd=cmd)
subprocess.CalledProcessError: Command '['/data/anaconda3/envs/zxcheng/bin/python', '-u', './tools/train.py', '--local_rank=1', 'configs/lvis/htcnosemlvis.py', '--launcher', 'pytorch', '--work-dir', 'work_bendilvis/lvis/htcnosemlvis', '--no-validate']' returned non-zero exit status 1.
Describe the bug training on COCO dataset is ok, but when I train on LVIS meet this bug.
Environment
python mmdet/utils/collect_env.py
to collect necessary environment information and paste it here.TorchVision: 0.5.0 OpenCV: 4.4.0 MMCV: 1.1.2 MMDetection: 2.4.0+ MMDetection Compiler: GCC 7.3 MMDetection CUDA Compiler: 10.1
2020-11-14 20:28:12,249 - mmdet - INFO - workflow: [('train', 1)], max: 12 epochs Traceback (most recent call last): File "./tools/train.py", line 177, in
main()
File "./tools/train.py", line 173, in main
meta=meta)
File "/data/cdp_algo_ceph_ssd/users/georgeni/causallvis/mmdet/apis/train.py", line 143, in train_detector
runner.run(data_loaders, cfg.workflow, cfg.total_epochs)
File "/data/anaconda3/envs/zxcheng/lib/python3.6/site-packages/mmcv/runner/epoch_based_runner.py", line 122, in run
epoch_runner(data_loaders[i], kwargs)
File "/data/anaconda3/envs/zxcheng/lib/python3.6/site-packages/mmcv/runner/epoch_based_runner.py", line 27, in train
for i, data_batch in enumerate(self.data_loader):
File "/data/anaconda3/envs/zxcheng/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 279, in iter
return _MultiProcessingDataLoaderIter(self)
File "/data/anaconda3/envs/zxcheng/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 719, in init
w.start()
File "/data/anaconda3/envs/zxcheng/lib/python3.6/multiprocessing/process.py", line 105, in start
self._popen = self._Popen(self)
File "/data/anaconda3/envs/zxcheng/lib/python3.6/multiprocessing/context.py", line 223, in _Popen
return _default_context.get_context().Process._Popen(process_obj)
File "/data/anaconda3/envs/zxcheng/lib/python3.6/multiprocessing/context.py", line 284, in _Popen
return Popen(process_obj)
File "/data/anaconda3/envs/zxcheng/lib/python3.6/multiprocessing/popen_spawn_posix.py", line 32, in init
super().init(process_obj)
File "/data/anaconda3/envs/zxcheng/lib/python3.6/multiprocessing/popen_fork.py", line 19, in init
self._launch(process_obj)
File "/data/anaconda3/envs/zxcheng/lib/python3.6/multiprocessing/popen_spawn_posix.py", line 47, in _launch
reduction.dump(process_obj, fp)
File "/data/anaconda3/envs/zxcheng/lib/python3.6/multiprocessing/reduction.py", line 60, in dump
ForkingPickler(file, protocol).dump(obj)
TypeError: can't pickle _thread.RLock objects
Traceback (most recent call last):
File "./tools/train.py", line 177, in
main()
File "./tools/train.py", line 173, in main
meta=meta)
File "/data/cdp_algo_ceph_ssd/users/georgeni/causallvis/mmdet/apis/train.py", line 143, in train_detector
runner.run(data_loaders, cfg.workflow, cfg.total_epochs)
File "/data/anaconda3/envs/zxcheng/lib/python3.6/site-packages/mmcv/runner/epoch_based_runner.py", line 122, in run
epoch_runner(data_loaders[i], kwargs)
File "/data/anaconda3/envs/zxcheng/lib/python3.6/site-packages/mmcv/runner/epoch_based_runner.py", line 27, in train
for i, data_batch in enumerate(self.data_loader):
File "/data/anaconda3/envs/zxcheng/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 279, in iter
return _MultiProcessingDataLoaderIter(self)
File "/data/anaconda3/envs/zxcheng/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 719, in init
w.start()
File "/data/anaconda3/envs/zxcheng/lib/python3.6/multiprocessing/process.py", line 105, in start
self._popen = self._Popen(self)
File "/data/anaconda3/envs/zxcheng/lib/python3.6/multiprocessing/context.py", line 223, in _Popen
return _default_context.get_context().Process._Popen(process_obj)
File "/data/anaconda3/envs/zxcheng/lib/python3.6/multiprocessing/context.py", line 284, in _Popen
return Popen(process_obj)
File "/data/anaconda3/envs/zxcheng/lib/python3.6/multiprocessing/popen_spawn_posix.py", line 32, in init
super().init(process_obj)
File "/data/anaconda3/envs/zxcheng/lib/python3.6/multiprocessing/popen_fork.py", line 19, in init
self._launch(process_obj)
File "/data/anaconda3/envs/zxcheng/lib/python3.6/multiprocessing/popen_spawn_posix.py", line 47, in _launch
reduction.dump(process_obj, fp)
File "/data/anaconda3/envs/zxcheng/lib/python3.6/multiprocessing/reduction.py", line 60, in dump
ForkingPickler(file, protocol).dump(obj)
TypeError: can't pickle _thread.RLock objects
^C^C^C^C^C^C^C^C^C^C^C^C^CTraceback (most recent call last):
File "/data/anaconda3/envs/zxcheng/lib/python3.6/runpy.py", line 193, in _run_module_as_main
"main", mod_spec)
File "/data/anaconda3/envs/zxcheng/lib/python3.6/runpy.py", line 85, in _run_code
exec(code, run_globals)
File "/data/anaconda3/envs/zxcheng/lib/python3.6/site-packages/torch/distributed/launch.py", line 263, in
main()
File "/data/anaconda3/envs/zxcheng/lib/python3.6/site-packages/torch/distributed/launch.py", line 259, in main
cmd=cmd)
subprocess.CalledProcessError: Command '['/data/anaconda3/envs/zxcheng/bin/python', '-u', './tools/train.py', '--local_rank=1', 'configs/lvis/htcnosemlvis.py', '--launcher', 'pytorch', '--work-dir', 'work_bendilvis/lvis/htcnosemlvis', '--no-validate']' returned non-zero exit status 1.