pesser / stable-diffusion

MIT License
1.03k stars 395 forks source link

when I finetune sd model, and set trainer(precision=16), an error occurred #25

Open zyx1213271098 opened 1 year ago

zyx1213271098 commented 1 year ago

Traceback (most recent call last): File "main.py", line 851, in trainer.fit(model, data) File "/opt/conda/envs/ldm/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py", line 553, in fit self._run(model) File "/opt/conda/envs/ldm/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py", line 918, in _run self._dispatch() File "/opt/conda/envs/ldm/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py", line 986, in _dispatch self.accelerator.start_training(self) File "/opt/conda/envs/ldm/lib/python3.8/site-packages/pytorch_lightning/accelerators/accelerator.py", line 92, in start_training self.training_type_plugin.start_training(trainer) File "/opt/conda/envs/ldm/lib/python3.8/site-packages/pytorch_lightning/plugins/training_type/training_type_plugin.py", line 161, in start_training self._results = trainer.run_stage() File "/opt/conda/envs/ldm/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py", line 996, in run_stage return self._run_train() File "/opt/conda/envs/ldm/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py", line 1045, in _run_train self.fit_loop.run() File "/opt/conda/envs/ldm/lib/python3.8/site-packages/pytorch_lightning/loops/base.py", line 111, in run self.advance(*args, kwargs) File "/opt/conda/envs/ldm/lib/python3.8/site-packages/pytorch_lightning/loops/fit_loop.py", line 200, in advance epoch_output = self.epoch_loop.run(train_dataloader) File "/opt/conda/envs/ldm/lib/python3.8/site-packages/pytorch_lightning/loops/base.py", line 111, in run self.advance(*args, *kwargs) File "/opt/conda/envs/ldm/lib/python3.8/site-packages/pytorch_lightning/loops/epoch/training_epoch_loop.py", line 130, in advance batch_output = self.batch_loop.run(batch, self.iteration_count, self._dataloader_idx) File "/opt/conda/envs/ldm/lib/python3.8/site-packages/pytorch_lightning/loops/batch/training_batch_loop.py", line 101, in run super().run(batch, batch_idx, dataloader_idx) File "/opt/conda/envs/ldm/lib/python3.8/site-packages/pytorch_lightning/loops/base.py", line 111, in run self.advance(args, kwargs) File "/opt/conda/envs/ldm/lib/python3.8/site-packages/pytorch_lightning/loops/batch/training_batch_loop.py", line 148, in advance result = self._run_optimization(batch_idx, split_batch, opt_idx, optimizer) File "/opt/conda/envs/ldm/lib/python3.8/site-packages/pytorch_lightning/loops/batch/training_batch_loop.py", line 194, in _run_optimization closure() File "/opt/conda/envs/ldm/lib/python3.8/site-packages/pytorch_lightning/loops/batch/training_batch_loop.py", line 236, in _training_step_and_backward_closure result = self.training_step_and_backward(split_batch, batch_idx, opt_idx, optimizer, hiddens) File "/opt/conda/envs/ldm/lib/python3.8/site-packages/pytorch_lightning/loops/batch/training_batch_loop.py", line 549, in training_step_and_backward self.backward(result, optimizer, opt_idx) File "/opt/conda/envs/ldm/lib/python3.8/site-packages/pytorch_lightning/loops/batch/training_batch_loop.py", line 590, in backward result.closure_loss = self.trainer.accelerator.backward(result.closure_loss, optimizer, *args, kwargs) File "/opt/conda/envs/ldm/lib/python3.8/site-packages/pytorch_lightning/accelerators/accelerator.py", line 276, in backward self.precision_plugin.backward(self.lightning_module, closure_loss, *args, *kwargs) File "/opt/conda/envs/ldm/lib/python3.8/site-packages/pytorch_lightning/plugins/precision/precision_plugin.py", line 78, in backward model.backward(closure_loss, optimizer, args, kwargs) File "/opt/conda/envs/ldm/lib/python3.8/site-packages/pytorch_lightning/core/lightning.py", line 1481, in backward loss.backward(*args, *kwargs) File "/opt/conda/envs/ldm/lib/python3.8/site-packages/torch/_tensor.py", line 363, in backward torch.autograd.backward(self, gradient, retain_graph, create_graph, inputs=inputs) File "/opt/conda/envs/ldm/lib/python3.8/site-packages/torch/autograd/init.py", line 173, in backward Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass File "/opt/conda/envs/ldm/lib/python3.8/site-packages/torch/autograd/function.py", line 253, in apply return user_fn(self, args) File "/root/data/juicefs_hz_cv_v3/11120102/project/generative-model/pesser-stable-diffusion/ldm/modules/diffusionmodules/util.py", line 138, in backward output_tensors = ctx.run_function(shallow_copies) File "/root/data/juicefs_hz_cv_v3/11120102/project/generative-model/pesser-stable-diffusion/ldm/modules/attention.py", line 215, in _forward x = self.attn1(self.norm1(x), context=context if self.disable_self_attn else None) + x File "/opt/conda/envs/ldm/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1110, in _call_impl return forward_call(input, **kwargs) File "/opt/conda/envs/ldm/lib/python3.8/site-packages/torch/nn/modules/normalization.py", line 189, in forward return F.layer_norm( File "/opt/conda/envs/ldm/lib/python3.8/site-packages/torch/nn/functional.py", line 2486, in layer_norm return torch.layer_norm(input, normalized_shape, weight, bias, eps, torch.backends.cudnn.enabled) RuntimeError: expected scalar type Half but found Float