File "train.py", line 30, in
trainer_defaults={"plugins": DDPPlugin(find_unused_parameters=True)},
File "/root/miniconda3/envs/HMER/lib/python3.7/site-packages/pytorch_lightning/utilities/cli.py", line 289, in init
self.fit()
File "/root/miniconda3/envs/HMER/lib/python3.7/site-packages/pytorch_lightning/utilities/cli.py", line 432, in fit
self.trainer.fit(self.fit_kwargs)
File "/root/miniconda3/envs/HMER/lib/python3.7/site-packages/pytorch_lightning/trainer/trainer.py", line 552, in fit
self._run(model)
File "/root/miniconda3/envs/HMER/lib/python3.7/site-packages/pytorch_lightning/trainer/trainer.py", line 922, in _run
self._dispatch()
File "/root/miniconda3/envs/HMER/lib/python3.7/site-packages/pytorch_lightning/trainer/trainer.py", line 990, in _dispatch
self.accelerator.start_training(self)
File "/root/miniconda3/envs/HMER/lib/python3.7/site-packages/pytorch_lightning/accelerators/accelerator.py", line 92, in start_training
self.training_type_plugin.start_training(trainer)
File "/root/miniconda3/envs/HMER/lib/python3.7/site-packages/pytorch_lightning/plugins/training_type/training_type_plugin.py", line 161, in start_training
self._results = trainer.run_stage()
File "/root/miniconda3/envs/HMER/lib/python3.7/site-packages/pytorch_lightning/trainer/trainer.py", line 1000, in run_stage
return self._run_train()
File "/root/miniconda3/envs/HMER/lib/python3.7/site-packages/pytorch_lightning/trainer/trainer.py", line 1049, in _run_train
self.fit_loop.run()
File "/root/miniconda3/envs/HMER/lib/python3.7/site-packages/pytorch_lightning/loops/base.py", line 111, in run
self.advance(*args, *kwargs)
File "/root/miniconda3/envs/HMER/lib/python3.7/site-packages/pytorch_lightning/loops/fit_loop.py", line 200, in advance
epoch_output = self.epoch_loop.run(train_dataloader)
File "/root/miniconda3/envs/HMER/lib/python3.7/site-packages/pytorch_lightning/loops/base.py", line 111, in run
self.advance(args, kwargs)
File "/root/miniconda3/envs/HMER/lib/python3.7/site-packages/pytorch_lightning/loops/epoch/training_epoch_loop.py", line 130, in advance
batch_output = self.batch_loop.run(batch, self.iteration_count, self._dataloader_idx)
File "/root/miniconda3/envs/HMER/lib/python3.7/site-packages/pytorch_lightning/loops/batch/training_batch_loop.py", line 100, in run
super().run(batch, batch_idx, dataloader_idx)
File "/root/miniconda3/envs/HMER/lib/python3.7/site-packages/pytorch_lightning/loops/base.py", line 111, in run
self.advance(args, kwargs)
File "/root/miniconda3/envs/HMER/lib/python3.7/site-packages/pytorch_lightning/loops/batch/training_batch_loop.py", line 147, in advance
result = self._run_optimization(batch_idx, split_batch, opt_idx, optimizer)
File "/root/miniconda3/envs/HMER/lib/python3.7/site-packages/pytorch_lightning/loops/batch/training_batch_loop.py", line 201, in _run_optimization
self._optimizer_step(optimizer, opt_idx, batch_idx, closure)
File "/root/miniconda3/envs/HMER/lib/python3.7/site-packages/pytorch_lightning/loops/batch/training_batch_loop.py", line 403, in _optimizer_step
using_lbfgs=is_lbfgs,
File "/root/miniconda3/envs/HMER/lib/python3.7/site-packages/pytorch_lightning/core/lightning.py", line 1616, in optimizer_step
optimizer.step(closure=optimizer_closure)
File "/root/miniconda3/envs/HMER/lib/python3.7/site-packages/pytorch_lightning/core/optimizer.py", line 206, in step
self.__optimizer_step(closure=closure, profiler_name=profiler_name, kwargs)
File "/root/miniconda3/envs/HMER/lib/python3.7/site-packages/pytorch_lightning/core/optimizer.py", line 128, in __optimizer_step
trainer.accelerator.optimizer_step(self._optimizer, self._optimizer_idx, lambda_closure=closure, kwargs)
File "/root/miniconda3/envs/HMER/lib/python3.7/site-packages/pytorch_lightning/accelerators/accelerator.py", line 296, in optimizer_step
self.run_optimizer_step(optimizer, opt_idx, lambda_closure, kwargs)
File "/root/miniconda3/envs/HMER/lib/python3.7/site-packages/pytorch_lightning/accelerators/accelerator.py", line 303, in run_optimizer_step
self.training_type_plugin.optimizer_step(optimizer, lambda_closure=lambda_closure, kwargs)
File "/root/miniconda3/envs/HMER/lib/python3.7/site-packages/pytorch_lightning/plugins/training_type/training_type_plugin.py", line 226, in optimizer_step
optimizer.step(closure=lambda_closure, kwargs)
File "/root/miniconda3/envs/HMER/lib/python3.7/site-packages/torch/optim/optimizer.py", line 89, in wrapper
return func(args, kwargs)
File "/root/miniconda3/envs/HMER/lib/python3.7/site-packages/torch/autograd/grad_mode.py", line 27, in decorate_context
return func(*args, *kwargs)
File "/root/miniconda3/envs/HMER/lib/python3.7/site-packages/torch/optim/sgd.py", line 87, in step
loss = closure()
File "/root/miniconda3/envs/HMER/lib/python3.7/site-packages/pytorch_lightning/loops/batch/training_batch_loop.py", line 235, in _training_step_and_backward_closure
result = self.training_step_and_backward(split_batch, batch_idx, opt_idx, optimizer, hiddens)
File "/root/miniconda3/envs/HMER/lib/python3.7/site-packages/pytorch_lightning/loops/batch/training_batch_loop.py", line 548, in training_step_and_backward
self.backward(result, optimizer, opt_idx)
File "/root/miniconda3/envs/HMER/lib/python3.7/site-packages/pytorch_lightning/loops/batch/training_batch_loop.py", line 589, in backward
result.closure_loss = self.trainer.accelerator.backward(result.closure_loss, optimizer, args, kwargs)
File "/root/miniconda3/envs/HMER/lib/python3.7/site-packages/pytorch_lightning/accelerators/accelerator.py", line 276, in backward
self.precision_plugin.backward(self.lightning_module, closure_loss, optimizer, *args, kwargs)
File "/root/miniconda3/envs/HMER/lib/python3.7/site-packages/pytorch_lightning/plugins/precision/precision_plugin.py", line 78, in backward
model.backward(closure_loss, optimizer, *args, *kwargs)
File "/root/miniconda3/envs/HMER/lib/python3.7/site-packages/pytorch_lightning/core/lightning.py", line 1479, in backward
loss.backward(args, kwargs)
File "/root/miniconda3/envs/HMER/lib/python3.7/site-packages/torch/tensor.py", line 245, in backward
torch.autograd.backward(self, gradient, retain_graph, create_graph, inputs=inputs)
File "/root/miniconda3/envs/HMER/lib/python3.7/site-packages/torch/autograd/init.py", line 147, in backward
allow_unreachable=True, accumulate_grad=True) # allow_unreachable flag
RuntimeError: CUDA error: an illegal memory access was encountered
File "train.py", line 30, in
trainer_defaults={"plugins": DDPPlugin(find_unused_parameters=True)},
File "/root/miniconda3/envs/HMER/lib/python3.7/site-packages/pytorch_lightning/utilities/cli.py", line 289, in init
self.fit()
File "/root/miniconda3/envs/HMER/lib/python3.7/site-packages/pytorch_lightning/utilities/cli.py", line 432, in fit
self.trainer.fit(self.fit_kwargs)
File "/root/miniconda3/envs/HMER/lib/python3.7/site-packages/pytorch_lightning/trainer/trainer.py", line 552, in fit
self._run(model)
File "/root/miniconda3/envs/HMER/lib/python3.7/site-packages/pytorch_lightning/trainer/trainer.py", line 922, in _run
self._dispatch()
File "/root/miniconda3/envs/HMER/lib/python3.7/site-packages/pytorch_lightning/trainer/trainer.py", line 990, in _dispatch
self.accelerator.start_training(self)
File "/root/miniconda3/envs/HMER/lib/python3.7/site-packages/pytorch_lightning/accelerators/accelerator.py", line 92, in start_training
self.training_type_plugin.start_training(trainer)
File "/root/miniconda3/envs/HMER/lib/python3.7/site-packages/pytorch_lightning/plugins/training_type/training_type_plugin.py", line 161, in start_training
self._results = trainer.run_stage()
File "/root/miniconda3/envs/HMER/lib/python3.7/site-packages/pytorch_lightning/trainer/trainer.py", line 1000, in run_stage
return self._run_train()
File "/root/miniconda3/envs/HMER/lib/python3.7/site-packages/pytorch_lightning/trainer/trainer.py", line 1049, in _run_train
self.fit_loop.run()
File "/root/miniconda3/envs/HMER/lib/python3.7/site-packages/pytorch_lightning/loops/base.py", line 111, in run
self.advance(*args, *kwargs)
File "/root/miniconda3/envs/HMER/lib/python3.7/site-packages/pytorch_lightning/loops/fit_loop.py", line 200, in advance
epoch_output = self.epoch_loop.run(train_dataloader)
File "/root/miniconda3/envs/HMER/lib/python3.7/site-packages/pytorch_lightning/loops/base.py", line 111, in run
self.advance(args, kwargs)
File "/root/miniconda3/envs/HMER/lib/python3.7/site-packages/pytorch_lightning/loops/epoch/training_epoch_loop.py", line 130, in advance
batch_output = self.batch_loop.run(batch, self.iteration_count, self._dataloader_idx)
File "/root/miniconda3/envs/HMER/lib/python3.7/site-packages/pytorch_lightning/loops/batch/training_batch_loop.py", line 100, in run
super().run(batch, batch_idx, dataloader_idx)
File "/root/miniconda3/envs/HMER/lib/python3.7/site-packages/pytorch_lightning/loops/base.py", line 111, in run
self.advance(args, kwargs)
File "/root/miniconda3/envs/HMER/lib/python3.7/site-packages/pytorch_lightning/loops/batch/training_batch_loop.py", line 147, in advance
result = self._run_optimization(batch_idx, split_batch, opt_idx, optimizer)
File "/root/miniconda3/envs/HMER/lib/python3.7/site-packages/pytorch_lightning/loops/batch/training_batch_loop.py", line 201, in _run_optimization
self._optimizer_step(optimizer, opt_idx, batch_idx, closure)
File "/root/miniconda3/envs/HMER/lib/python3.7/site-packages/pytorch_lightning/loops/batch/training_batch_loop.py", line 403, in _optimizer_step
using_lbfgs=is_lbfgs,
File "/root/miniconda3/envs/HMER/lib/python3.7/site-packages/pytorch_lightning/core/lightning.py", line 1616, in optimizer_step
optimizer.step(closure=optimizer_closure)
File "/root/miniconda3/envs/HMER/lib/python3.7/site-packages/pytorch_lightning/core/optimizer.py", line 206, in step
self.__optimizer_step(closure=closure, profiler_name=profiler_name, kwargs)
File "/root/miniconda3/envs/HMER/lib/python3.7/site-packages/pytorch_lightning/core/optimizer.py", line 128, in __optimizer_step
trainer.accelerator.optimizer_step(self._optimizer, self._optimizer_idx, lambda_closure=closure, kwargs)
File "/root/miniconda3/envs/HMER/lib/python3.7/site-packages/pytorch_lightning/accelerators/accelerator.py", line 296, in optimizer_step
self.run_optimizer_step(optimizer, opt_idx, lambda_closure, kwargs)
File "/root/miniconda3/envs/HMER/lib/python3.7/site-packages/pytorch_lightning/accelerators/accelerator.py", line 303, in run_optimizer_step
self.training_type_plugin.optimizer_step(optimizer, lambda_closure=lambda_closure, kwargs)
File "/root/miniconda3/envs/HMER/lib/python3.7/site-packages/pytorch_lightning/plugins/training_type/training_type_plugin.py", line 226, in optimizer_step
optimizer.step(closure=lambda_closure, kwargs)
File "/root/miniconda3/envs/HMER/lib/python3.7/site-packages/torch/optim/optimizer.py", line 89, in wrapper
return func(args, kwargs)
File "/root/miniconda3/envs/HMER/lib/python3.7/site-packages/torch/autograd/grad_mode.py", line 27, in decorate_context
return func(*args, *kwargs)
File "/root/miniconda3/envs/HMER/lib/python3.7/site-packages/torch/optim/sgd.py", line 87, in step
loss = closure()
File "/root/miniconda3/envs/HMER/lib/python3.7/site-packages/pytorch_lightning/loops/batch/training_batch_loop.py", line 235, in _training_step_and_backward_closure
result = self.training_step_and_backward(split_batch, batch_idx, opt_idx, optimizer, hiddens)
File "/root/miniconda3/envs/HMER/lib/python3.7/site-packages/pytorch_lightning/loops/batch/training_batch_loop.py", line 548, in training_step_and_backward
self.backward(result, optimizer, opt_idx)
File "/root/miniconda3/envs/HMER/lib/python3.7/site-packages/pytorch_lightning/loops/batch/training_batch_loop.py", line 589, in backward
result.closure_loss = self.trainer.accelerator.backward(result.closure_loss, optimizer, args, kwargs)
File "/root/miniconda3/envs/HMER/lib/python3.7/site-packages/pytorch_lightning/accelerators/accelerator.py", line 276, in backward
self.precision_plugin.backward(self.lightning_module, closure_loss, optimizer, *args, kwargs)
File "/root/miniconda3/envs/HMER/lib/python3.7/site-packages/pytorch_lightning/plugins/precision/precision_plugin.py", line 78, in backward
model.backward(closure_loss, optimizer, *args, *kwargs)
File "/root/miniconda3/envs/HMER/lib/python3.7/site-packages/pytorch_lightning/core/lightning.py", line 1479, in backward
loss.backward(args, kwargs)
File "/root/miniconda3/envs/HMER/lib/python3.7/site-packages/torch/tensor.py", line 245, in backward
torch.autograd.backward(self, gradient, retain_graph, create_graph, inputs=inputs)
File "/root/miniconda3/envs/HMER/lib/python3.7/site-packages/torch/autograd/init.py", line 147, in backward
allow_unreachable=True, accumulate_grad=True) # allow_unreachable flag
RuntimeError: CUDA error: an illegal memory access was encountered
报错内容这样 是什么原因呢