Traceback (most recent call last):
File "./tools/train.py", line 166, in
main()
File "./tools/train.py", line 155, in main
train_model(
File "/mnt/DATA/AI/project/Lite-HRNet-hrnet/venv/lib/python3.8/site-packages/mmpose/apis/train.py", line 200, in train_model
runner.run(data_loaders, cfg.workflow, cfg.total_epochs)
File "/mnt/DATA/AI/project/Lite-HRNet-hrnet/venv/lib/python3.8/site-packages/mmcv/runner/epoch_based_runner.py", line 127, in run
epoch_runner(data_loaders[i], kwargs)
File "/mnt/DATA/AI/project/Lite-HRNet-hrnet/venv/lib/python3.8/site-packages/mmcv/runner/epoch_based_runner.py", line 50, in train
self.run_iter(data_batch, train_mode=True, kwargs)
File "/mnt/DATA/AI/project/Lite-HRNet-hrnet/venv/lib/python3.8/site-packages/mmcv/runner/epoch_based_runner.py", line 29, in run_iter
outputs = self.model.train_step(data_batch, self.optimizer,
File "/mnt/DATA/AI/project/Lite-HRNet-hrnet/venv/lib/python3.8/site-packages/mmcv/parallel/distributed.py", line 48, in train_step
self._sync_params()
File "/mnt/DATA/AI/project/Lite-HRNet-hrnet/venv/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1185, in getattr
raise AttributeError("'{}' object has no attribute '{}'".format(
AttributeError: 'MMDistributedDataParallel' object has no attribute '_sync_params'
ERROR:torch.distributed.elastic.multiprocessing.api:failed (exitcode: 1) local_rank: 0 (pid: 19468) of binary: /mnt/DATA/AI/project/Lite-HRNet-hrnet/venv/bin/python
Traceback (most recent call last):
File "/usr/lib/python3.8/runpy.py", line 194, in _run_module_as_main
return _run_code(code, main_globals, None,
File "/usr/lib/python3.8/runpy.py", line 87, in _run_code
exec(code, run_globals)
File "/mnt/DATA/AI/project/Lite-HRNet-hrnet/venv/lib/python3.8/site-packages/torch/distributed/launch.py", line 193, in
main()
File "/mnt/DATA/AI/project/Lite-HRNet-hrnet/venv/lib/python3.8/site-packages/torch/distributed/launch.py", line 189, in main
launch(args)
File "/mnt/DATA/AI/project/Lite-HRNet-hrnet/venv/lib/python3.8/site-packages/torch/distributed/launch.py", line 174, in launch
run(args)
File "/mnt/DATA/AI/project/Lite-HRNet-hrnet/venv/lib/python3.8/site-packages/torch/distributed/run.py", line 715, in run
elastic_launch(
File "/mnt/DATA/AI/project/Lite-HRNet-hrnet/venv/lib/python3.8/site-packages/torch/distributed/launcher/api.py", line 131, in call
return launch_agent(self._config, self._entrypoint, list(args))
File "/mnt/DATA/AI/project/Lite-HRNet-hrnet/venv/lib/python3.8/site-packages/torch/distributed/launcher/api.py", line 245, in launch_agent
raise ChildFailedError(
torch.distributed.elastic.multiprocessing.errors.ChildFailedError:
Traceback (most recent call last): File "./tools/train.py", line 166, in
main()
File "./tools/train.py", line 155, in main
train_model(
File "/mnt/DATA/AI/project/Lite-HRNet-hrnet/venv/lib/python3.8/site-packages/mmpose/apis/train.py", line 200, in train_model
runner.run(data_loaders, cfg.workflow, cfg.total_epochs)
File "/mnt/DATA/AI/project/Lite-HRNet-hrnet/venv/lib/python3.8/site-packages/mmcv/runner/epoch_based_runner.py", line 127, in run
epoch_runner(data_loaders[i], kwargs)
File "/mnt/DATA/AI/project/Lite-HRNet-hrnet/venv/lib/python3.8/site-packages/mmcv/runner/epoch_based_runner.py", line 50, in train
self.run_iter(data_batch, train_mode=True, kwargs)
File "/mnt/DATA/AI/project/Lite-HRNet-hrnet/venv/lib/python3.8/site-packages/mmcv/runner/epoch_based_runner.py", line 29, in run_iter
outputs = self.model.train_step(data_batch, self.optimizer,
File "/mnt/DATA/AI/project/Lite-HRNet-hrnet/venv/lib/python3.8/site-packages/mmcv/parallel/distributed.py", line 48, in train_step
self._sync_params()
File "/mnt/DATA/AI/project/Lite-HRNet-hrnet/venv/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1185, in getattr
raise AttributeError("'{}' object has no attribute '{}'".format(
AttributeError: 'MMDistributedDataParallel' object has no attribute '_sync_params'
ERROR:torch.distributed.elastic.multiprocessing.api:failed (exitcode: 1) local_rank: 0 (pid: 19468) of binary: /mnt/DATA/AI/project/Lite-HRNet-hrnet/venv/bin/python
Traceback (most recent call last):
File "/usr/lib/python3.8/runpy.py", line 194, in _run_module_as_main
return _run_code(code, main_globals, None,
File "/usr/lib/python3.8/runpy.py", line 87, in _run_code
exec(code, run_globals)
File "/mnt/DATA/AI/project/Lite-HRNet-hrnet/venv/lib/python3.8/site-packages/torch/distributed/launch.py", line 193, in
main()
File "/mnt/DATA/AI/project/Lite-HRNet-hrnet/venv/lib/python3.8/site-packages/torch/distributed/launch.py", line 189, in main
launch(args)
File "/mnt/DATA/AI/project/Lite-HRNet-hrnet/venv/lib/python3.8/site-packages/torch/distributed/launch.py", line 174, in launch
run(args)
File "/mnt/DATA/AI/project/Lite-HRNet-hrnet/venv/lib/python3.8/site-packages/torch/distributed/run.py", line 715, in run
elastic_launch(
File "/mnt/DATA/AI/project/Lite-HRNet-hrnet/venv/lib/python3.8/site-packages/torch/distributed/launcher/api.py", line 131, in call
return launch_agent(self._config, self._entrypoint, list(args))
File "/mnt/DATA/AI/project/Lite-HRNet-hrnet/venv/lib/python3.8/site-packages/torch/distributed/launcher/api.py", line 245, in launch_agent
raise ChildFailedError(
torch.distributed.elastic.multiprocessing.errors.ChildFailedError:
./tools/train.py FAILED
how to fix this?