08/24 14:46:31 - mmengine - INFO - paramwise_options -- neck.decoder_pred.bias:weight_decay=0.0
08/24 14:46:31 - mmengine - INFO - paramwise_options -- neck.decoder_pred.bias:decay_mult=0.0
Traceback (most recent call last):
File "./tools/train.py", line 159, in <module>
main()
File "./tools/train.py", line 155, in main
runner.train()
File "/opt/conda/envs/bossvision/lib/python3.8/site-packages/mmengine/runner/runner.py", line 1708, in train
self.optim_wrapper = self.build_optim_wrapper(self.optim_wrapper)
File "/opt/conda/envs/bossvision/lib/python3.8/site-packages/mmengine/runner/runner.py", line 1097, in build_optim_wrapper
return build_optim_wrapper(self.model, optim_wrapper)
File "/opt/conda/envs/bossvision/lib/python3.8/site-packages/mmengine/optim/optimizer/builder.py", line 201, in build_optim_wrapper
optim_wrapper = optim_wrapper_constructor(model)
File "/opt/conda/envs/bossvision/lib/python3.8/site-packages/mmengine/optim/optimizer/default_constructor.py", line 306, in __call__
optim_wrapper = OPTIM_WRAPPERS.build(
File "/opt/conda/envs/bossvision/lib/python3.8/site-packages/mmengine/registry/registry.py", line 570, in build
return self.build_func(cfg, *args, **kwargs, registry=self)
File "/opt/conda/envs/bossvision/lib/python3.8/site-packages/mmengine/registry/build_functions.py", line 121, in build_from_cfg
obj = obj_cls(**args) # type: ignore
TypeError: __init__() got an unexpected keyword argument 'loss_scale'
Traceback (most recent call last):
File "./tools/train.py", line 159, in <module>
main()
File "./tools/train.py", line 155, in main
runner.train()
File "/opt/conda/envs/bossvision/lib/python3.8/site-packages/mmengine/runner/runner.py", line 1708, in train
self.optim_wrapper = self.build_optim_wrapper(self.optim_wrapper)
File "/opt/conda/envs/bossvision/lib/python3.8/site-packages/mmengine/runner/runner.py", line 1097, in build_optim_wrapper
return build_optim_wrapper(self.model, optim_wrapper)
File "/opt/conda/envs/bossvision/lib/python3.8/site-packages/mmengine/optim/optimizer/builder.py", line 201, in build_optim_wrapper
optim_wrapper = optim_wrapper_constructor(model)
File "/opt/conda/envs/bossvision/lib/python3.8/site-packages/mmengine/optim/optimizer/default_constructor.py", line 306, in __call__
optim_wrapper = OPTIM_WRAPPERS.build(
File "/opt/conda/envs/bossvision/lib/python3.8/site-packages/mmengine/registry/registry.py", line 570, in build
return self.build_func(cfg, *args, **kwargs, registry=self)
File "/opt/conda/envs/bossvision/lib/python3.8/site-packages/mmengine/registry/build_functions.py", line 121, in build_from_cfg
obj = obj_cls(**args) # type: ignore
TypeError: __init__() got an unexpected keyword argument 'loss_scale'
ERROR:torch.distributed.elastic.multiprocessing.api:failed (exitcode: 1) local_rank: 0 (pid: 33379) of binary: /opt/conda/envs/bossvision/bin/python
Traceback (most recent call last):
File "/opt/conda/envs/bossvision/lib/python3.8/runpy.py", line 194, in _run_module_as_main
return _run_code(code, main_globals, None,
File "/opt/conda/envs/bossvision/lib/python3.8/runpy.py", line 87, in _run_code
exec(code, run_globals)
File "/opt/conda/envs/bossvision/lib/python3.8/site-packages/torch/distributed/launch.py", line 195, in <module>
main()
File "/opt/conda/envs/bossvision/lib/python3.8/site-packages/torch/distributed/launch.py", line 191, in main
launch(args)
File "/opt/conda/envs/bossvision/lib/python3.8/site-packages/torch/distributed/launch.py", line 176, in launch
run(args)
File "/opt/conda/envs/bossvision/lib/python3.8/site-packages/torch/distributed/run.py", line 753, in run
elastic_launch(
File "/opt/conda/envs/bossvision/lib/python3.8/site-packages/torch/distributed/launcher/api.py", line 132, in __call__
return launch_agent(self._config, self._entrypoint, list(args))
File "/opt/conda/envs/bossvision/lib/python3.8/site-packages/torch/distributed/launcher/api.py", line 246, in launch_agent
raise ChildFailedError(
torch.distributed.elastic.multiprocessing.errors.ChildFailedError:
============================================================
./tools/train.py FAILED
------------------------------------------------------------
分支
main 分支 (mmpretrain 版本)
描述该错误
vit-huge-p14_8xb128-ds-coslr-50e_in1k.py
环境信息
{'sys.platform': 'linux', 'Python': '3.8.8 (default, Feb 24 2021, 21:46:12) [GCC 7.3.0]', 'CUDA available': True, 'numpy_random_seed': 2147483648, 'GPU 0,1': 'NVIDIA GeForce RTX 3090', 'CUDA_HOME': '/usr/local/cuda', 'NVCC': 'Cuda compilation tools, release 11.7, V11.7.99', 'GCC': 'gcc (Ubuntu 9.4.0-1ubuntu1~20.04.1) 9.4.0', 'PyTorch': '1.13.1+cu117', 'TorchVision': '0.14.1+cu117', 'OpenCV': '4.2.0', 'MMEngine': '0.8.4', 'MMCV': '2.0.1', 'MMPreTrain': '1.0.2+HEAD'}
DeepSpeed 0.10.1
其他信息
No response