Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed.
Traceback (most recent call last):
File "tools/train.py", line 168, in
main()
File "tools/train.py", line 122, in main
env_info_dict = collect_env()
File "/dataset/wh/wh_code/HRFormer-main/pose/mmpose/utils/collect_env.py", line 8, in collect_env
env_info = collect_basic_env()
File "/home/celia/anaconda3/envs/open-mmlab/lib/python3.7/site-packages/mmcv/utils/env.py", line 85, in collect_env
from mmcv.ops import get_compiler_version, get_compiling_cuda_version
File "/home/celia/anaconda3/envs/open-mmlab/lib/python3.7/site-packages/mmcv/ops/init.py", line 1, in
from .bbox import bbox_overlaps
File "/home/celia/anaconda3/envs/open-mmlab/lib/python3.7/site-packages/mmcv/ops/bbox.py", line 3, in
ext_module = ext_loader.load_ext('_ext', ['bbox_overlaps'])
File "/home/celia/anaconda3/envs/open-mmlab/lib/python3.7/site-packages/mmcv/utils/ext_loader.py", line 12, in load_ext
ext = importlib.import_module('mmcv.' + name)
File "/home/celia/anaconda3/envs/open-mmlab/lib/python3.7/importlib/init.py", line 127, in import_module
return _bootstrap._gcd_import(name[level:], package, level)
ImportError: /home/celia/anaconda3/envs/open-mmlab/lib/python3.7/site-packages/mmcv/_ext.cpython-37m-x86_64-linux-gnu.so: undefined symbol: _Z13THCudaCheck9cudaErrorPKci
ERROR:torch.distributed.elastic.multiprocessing.api:failed (exitcode: 1) local_rank: 0 (pid: 42674) of binary: /home/celia/anaconda3/envs/open-mmlab/bin/python
Traceback (most recent call last):
File "/home/celia/anaconda3/envs/open-mmlab/lib/python3.7/runpy.py", line 193, in _run_module_as_main
"main", mod_spec)
File "/home/celia/anaconda3/envs/open-mmlab/lib/python3.7/runpy.py", line 85, in _run_code
exec(code, run_globals)
File "/home/celia/anaconda3/envs/open-mmlab/lib/python3.7/site-packages/torch/distributed/launch.py", line 193, in
main()
File "/home/celia/anaconda3/envs/open-mmlab/lib/python3.7/site-packages/torch/distributed/launch.py", line 189, in main
launch(args)
File "/home/celia/anaconda3/envs/open-mmlab/lib/python3.7/site-packages/torch/distributed/launch.py", line 174, in launch
run(args)
File "/home/celia/anaconda3/envs/open-mmlab/lib/python3.7/site-packages/torch/distributed/run.py", line 718, in run
)(*cmd_args)
File "/home/celia/anaconda3/envs/open-mmlab/lib/python3.7/site-packages/torch/distributed/launcher/api.py", line 131, in call__
return launch_agent(self._config, self._entrypoint, list(args))
File "/home/celia/anaconda3/envs/open-mmlab/lib/python3.7/site-packages/torch/distributed/launcher/api.py", line 247, in launch_agent
failures=result.failures,
torch.distributed.elastic.multiprocessing.errors.ChildFailedError:
` FutureWarning, WARNING:torch.distributed.run:
Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed.
Traceback (most recent call last): File "tools/train.py", line 168, in
main()
File "tools/train.py", line 122, in main
env_info_dict = collect_env()
File "/dataset/wh/wh_code/HRFormer-main/pose/mmpose/utils/collect_env.py", line 8, in collect_env
env_info = collect_basic_env()
File "/home/celia/anaconda3/envs/open-mmlab/lib/python3.7/site-packages/mmcv/utils/env.py", line 85, in collect_env
from mmcv.ops import get_compiler_version, get_compiling_cuda_version
File "/home/celia/anaconda3/envs/open-mmlab/lib/python3.7/site-packages/mmcv/ops/init.py", line 1, in
from .bbox import bbox_overlaps
File "/home/celia/anaconda3/envs/open-mmlab/lib/python3.7/site-packages/mmcv/ops/bbox.py", line 3, in
ext_module = ext_loader.load_ext('_ext', ['bbox_overlaps'])
File "/home/celia/anaconda3/envs/open-mmlab/lib/python3.7/site-packages/mmcv/utils/ext_loader.py", line 12, in load_ext
ext = importlib.import_module('mmcv.' + name)
File "/home/celia/anaconda3/envs/open-mmlab/lib/python3.7/importlib/init.py", line 127, in import_module
return _bootstrap._gcd_import(name[level:], package, level)
ImportError: /home/celia/anaconda3/envs/open-mmlab/lib/python3.7/site-packages/mmcv/_ext.cpython-37m-x86_64-linux-gnu.so: undefined symbol: _Z13THCudaCheck9cudaErrorPKci
ERROR:torch.distributed.elastic.multiprocessing.api:failed (exitcode: 1) local_rank: 0 (pid: 42674) of binary: /home/celia/anaconda3/envs/open-mmlab/bin/python
Traceback (most recent call last):
File "/home/celia/anaconda3/envs/open-mmlab/lib/python3.7/runpy.py", line 193, in _run_module_as_main
"main", mod_spec)
File "/home/celia/anaconda3/envs/open-mmlab/lib/python3.7/runpy.py", line 85, in _run_code
exec(code, run_globals)
File "/home/celia/anaconda3/envs/open-mmlab/lib/python3.7/site-packages/torch/distributed/launch.py", line 193, in
main()
File "/home/celia/anaconda3/envs/open-mmlab/lib/python3.7/site-packages/torch/distributed/launch.py", line 189, in main
launch(args)
File "/home/celia/anaconda3/envs/open-mmlab/lib/python3.7/site-packages/torch/distributed/launch.py", line 174, in launch
run(args)
File "/home/celia/anaconda3/envs/open-mmlab/lib/python3.7/site-packages/torch/distributed/run.py", line 718, in run
)(*cmd_args)
File "/home/celia/anaconda3/envs/open-mmlab/lib/python3.7/site-packages/torch/distributed/launcher/api.py", line 131, in call__
return launch_agent(self._config, self._entrypoint, list(args))
File "/home/celia/anaconda3/envs/open-mmlab/lib/python3.7/site-packages/torch/distributed/launcher/api.py", line 247, in launch_agent
failures=result.failures,
torch.distributed.elastic.multiprocessing.errors.ChildFailedError:
tools/train.py FAILED
Failures: [1]: time : 2022-10-24_10:03:43 host : omnisky rank : 1 (local_rank: 1) exitcode : 1 (pid: 42675) error_file: <N/A> traceback : To enable traceback see: https://pytorch.org/docs/stable/elastic/errors.html [2]: time : 2022-10-24_10:03:43 host : omnisky rank : 2 (local_rank: 2) exitcode : 1 (pid: 42676) error_file: <N/A> traceback : To enable traceback see: https://pytorch.org/docs/stable/elastic/errors.html [3]: time : 2022-10-24_10:03:43 host : omnisky rank : 3 (local_rank: 3) exitcode : 1 (pid: 42677) error_file: <N/A> traceback : To enable traceback see: https://pytorch.org/docs/stable/elastic/errors.html
Root Cause (first observed failure): [0]: time : 2022-10-24_10:03:43 host : omnisky rank : 0 (local_rank: 0) exitcode : 1 (pid: 42674) error_file: <N/A> traceback : To enable traceback see: https://pytorch.org/docs/stable/elastic/errors.html ============================================================`