Closed gianmarcoaversanoenx closed 1 year ago
@gianmarcoaversanoenx Where did you find this example? If there is a mistake, we can update it. Also please provide the full error message if possible. Thanks!
@awaelchli Thanks for your reply! This example was manually crafted by me, but can be run by anyone.
The error message is:
File ~/.pyenv/versions/3.8.13/envs/brainiac-2/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py:696, in Trainer.fit(self, model, train_dataloaders, val_dataloaders, datamodule, ckpt_path)
677 r"""
678 Runs the full optimization routine.
679
(...)
693 datamodule: An instance of :class:`~pytorch_lightning.core.datamodule.LightningDataModule`.
694 """
695 self.strategy.model = model
--> 696 self._call_and_handle_interrupt(
697 self._fit_impl, model, train_dataloaders, val_dataloaders, datamodule, ckpt_path
698 )
File ~/.pyenv/versions/3.8.13/envs/brainiac-2/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py:650, in Trainer._call_and_handle_interrupt(self, trainer_fn, *args, **kwargs)
648 return self.strategy.launcher.launch(trainer_fn, *args, trainer=self, **kwargs)
649 else:
--> 650 return trainer_fn(*args, **kwargs)
651 # TODO(awaelchli): Unify both exceptions below, where `KeyboardError` doesn't re-raise
652 except KeyboardInterrupt as exception:
File ~/.pyenv/versions/3.8.13/envs/brainiac-2/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py:735, in Trainer._fit_impl(self, model, train_dataloaders, val_dataloaders, datamodule, ckpt_path)
731 ckpt_path = ckpt_path or self.resume_from_checkpoint
732 self._ckpt_path = self.__set_ckpt_path(
733 ckpt_path, model_provided=True, model_connected=self.lightning_module is not None
734 )
--> 735 results = self._run(model, ckpt_path=self.ckpt_path)
737 assert self.state.stopped
738 self.training = False
File ~/.pyenv/versions/3.8.13/envs/brainiac-2/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py:1166, in Trainer._run(self, model, ckpt_path)
1162 self._checkpoint_connector.restore_training_state()
1164 self._checkpoint_connector.resume_end()
-> 1166 results = self._run_stage()
1168 log.detail(f"{self.__class__.__name__}: trainer tearing down")
1169 self._teardown()
File ~/.pyenv/versions/3.8.13/envs/brainiac-2/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py:1252, in Trainer._run_stage(self)
1250 if self.predicting:
1251 return self._run_predict()
-> 1252 return self._run_train()
File ~/.pyenv/versions/3.8.13/envs/brainiac-2/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py:1283, in Trainer._run_train(self)
1280 self.fit_loop.trainer = self
1282 with torch.autograd.set_detect_anomaly(self._detect_anomaly):
-> 1283 self.fit_loop.run()
File ~/.pyenv/versions/3.8.13/envs/brainiac-2/lib/python3.8/site-packages/pytorch_lightning/loops/loop.py:200, in Loop.run(self, *args, **kwargs)
198 try:
199 self.on_advance_start(*args, **kwargs)
--> 200 self.advance(*args, **kwargs)
201 self.on_advance_end()
202 self._restarting = False
File ~/.pyenv/versions/3.8.13/envs/brainiac-2/lib/python3.8/site-packages/pytorch_lightning/loops/fit_loop.py:271, in FitLoop.advance(self)
267 self._data_fetcher.setup(
268 dataloader, batch_to_device=partial(self.trainer._call_strategy_hook, "batch_to_device", dataloader_idx=0)
269 )
270 with self.trainer.profiler.profile("run_training_epoch"):
--> 271 self._outputs = self.epoch_loop.run(self._data_fetcher)
File ~/.pyenv/versions/3.8.13/envs/brainiac-2/lib/python3.8/site-packages/pytorch_lightning/loops/loop.py:200, in Loop.run(self, *args, **kwargs)
198 try:
199 self.on_advance_start(*args, **kwargs)
--> 200 self.advance(*args, **kwargs)
201 self.on_advance_end()
202 self._restarting = False
File ~/.pyenv/versions/3.8.13/envs/brainiac-2/lib/python3.8/site-packages/pytorch_lightning/loops/epoch/training_epoch_loop.py:203, in TrainingEpochLoop.advance(self, data_fetcher)
200 self.batch_progress.increment_started()
202 with self.trainer.profiler.profile("run_training_batch"):
--> 203 batch_output = self.batch_loop.run(kwargs)
205 self.batch_progress.increment_processed()
207 # update non-plateau LR schedulers
208 # update epoch-interval ones only when we are at the end of training epoch
File ~/.pyenv/versions/3.8.13/envs/brainiac-2/lib/python3.8/site-packages/pytorch_lightning/loops/loop.py:200, in Loop.run(self, *args, **kwargs)
198 try:
199 self.on_advance_start(*args, **kwargs)
--> 200 self.advance(*args, **kwargs)
201 self.on_advance_end()
202 self._restarting = False
File ~/.pyenv/versions/3.8.13/envs/brainiac-2/lib/python3.8/site-packages/pytorch_lightning/loops/batch/training_batch_loop.py:87, in TrainingBatchLoop.advance(self, kwargs)
83 if self.trainer.lightning_module.automatic_optimization:
84 optimizers = _get_active_optimizers(
85 self.trainer.optimizers, self.trainer.optimizer_frequencies, kwargs.get("batch_idx", 0)
86 )
---> 87 outputs = self.optimizer_loop.run(optimizers, kwargs)
88 else:
89 outputs = self.manual_loop.run(kwargs)
File ~/.pyenv/versions/3.8.13/envs/brainiac-2/lib/python3.8/site-packages/pytorch_lightning/loops/loop.py:200, in Loop.run(self, *args, **kwargs)
198 try:
199 self.on_advance_start(*args, **kwargs)
--> 200 self.advance(*args, **kwargs)
201 self.on_advance_end()
202 self._restarting = False
File ~/.pyenv/versions/3.8.13/envs/brainiac-2/lib/python3.8/site-packages/pytorch_lightning/loops/optimization/optimizer_loop.py:201, in OptimizerLoop.advance(self, optimizers, kwargs)
198 def advance(self, optimizers: List[Tuple[int, Optimizer]], kwargs: OrderedDict) -> None: # type: ignore[override]
199 kwargs = self._build_kwargs(kwargs, self.optimizer_idx, self._hiddens)
--> 201 result = self._run_optimization(kwargs, self._optimizers[self.optim_progress.optimizer_position])
202 if result.loss is not None:
203 # automatic optimization assumes a loss needs to be returned for extras to be considered as the batch
204 # would be skipped otherwise
205 self._outputs[self.optimizer_idx] = result.asdict()
File ~/.pyenv/versions/3.8.13/envs/brainiac-2/lib/python3.8/site-packages/pytorch_lightning/loops/optimization/optimizer_loop.py:248, in OptimizerLoop._run_optimization(self, kwargs, optimizer)
240 closure()
242 # ------------------------------
243 # BACKWARD PASS
244 # ------------------------------
245 # gradient update with accumulated gradients
246 else:
247 # the `batch_idx` is optional with inter-batch parallelism
--> 248 self._optimizer_step(optimizer, opt_idx, kwargs.get("batch_idx", 0), closure)
250 result = closure.consume_result()
252 if result.loss is not None:
253 # if no result, user decided to skip optimization
254 # otherwise update running loss + reset accumulated loss
255 # TODO: find proper way to handle updating running loss
File ~/.pyenv/versions/3.8.13/envs/brainiac-2/lib/python3.8/site-packages/pytorch_lightning/loops/optimization/optimizer_loop.py:358, in OptimizerLoop._optimizer_step(self, optimizer, opt_idx, batch_idx, train_step_and_backward_closure)
355 self.optim_progress.optimizer.step.increment_ready()
357 # model hook
--> 358 self.trainer._call_lightning_module_hook(
359 "optimizer_step",
360 self.trainer.current_epoch,
361 batch_idx,
362 optimizer,
363 opt_idx,
364 train_step_and_backward_closure,
365 on_tpu=isinstance(self.trainer.accelerator, TPUAccelerator),
366 using_native_amp=(self.trainer.amp_backend == AMPType.NATIVE),
367 using_lbfgs=is_lbfgs,
368 )
370 if not should_accumulate:
371 self.optim_progress.optimizer.step.increment_completed()
File ~/.pyenv/versions/3.8.13/envs/brainiac-2/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py:1550, in Trainer._call_lightning_module_hook(self, hook_name, pl_module, *args, **kwargs)
1547 pl_module._current_fx_name = hook_name
1549 with self.profiler.profile(f"[LightningModule]{pl_module.__class__.__name__}.{hook_name}"):
-> 1550 output = fn(*args, **kwargs)
1552 # restore current_fx when nested context
1553 pl_module._current_fx_name = prev_fx_name
File ~/.pyenv/versions/3.8.13/envs/brainiac-2/lib/python3.8/site-packages/pytorch_lightning/core/module.py:1705, in LightningModule.optimizer_step(self, epoch, batch_idx, optimizer, optimizer_idx, optimizer_closure, on_tpu, using_native_amp, using_lbfgs)
1623 def optimizer_step(
1624 self,
1625 epoch: int,
(...)
1632 using_lbfgs: bool = False,
1633 ) -> None:
1634 r"""
1635 Override this method to adjust the default way the :class:`~pytorch_lightning.trainer.trainer.Trainer` calls
1636 each optimizer.
(...)
1703
1704 """
-> 1705 optimizer.step(closure=optimizer_closure)
File ~/.pyenv/versions/3.8.13/envs/brainiac-2/lib/python3.8/site-packages/pytorch_lightning/core/optimizer.py:168, in LightningOptimizer.step(self, closure, **kwargs)
165 raise MisconfigurationException("When `optimizer.step(closure)` is called, the closure should be callable")
167 assert self._strategy is not None
--> 168 step_output = self._strategy.optimizer_step(self._optimizer, self._optimizer_idx, closure, **kwargs)
170 self._on_after_step()
172 return step_output
File ~/.pyenv/versions/3.8.13/envs/brainiac-2/lib/python3.8/site-packages/pytorch_lightning/strategies/strategy.py:216, in Strategy.optimizer_step(self, optimizer, opt_idx, closure, model, **kwargs)
206 """Performs the actual optimizer step.
207
208 Args:
(...)
213 **kwargs: Any extra arguments to ``optimizer.step``
214 """
215 model = model or self.lightning_module
--> 216 return self.precision_plugin.optimizer_step(model, optimizer, opt_idx, closure, **kwargs)
File ~/.pyenv/versions/3.8.13/envs/brainiac-2/lib/python3.8/site-packages/pytorch_lightning/plugins/precision/precision_plugin.py:153, in PrecisionPlugin.optimizer_step(self, model, optimizer, optimizer_idx, closure, **kwargs)
151 if isinstance(model, pl.LightningModule):
152 closure = partial(self._wrap_closure, model, optimizer, optimizer_idx, closure)
--> 153 return optimizer.step(closure=closure, **kwargs)
File ~/.pyenv/versions/3.8.13/envs/brainiac-2/lib/python3.8/site-packages/opacus/optimizers/optimizer.py:507, in DPOptimizer.step(self, closure)
505 if closure is not None:
506 with torch.enable_grad():
--> 507 closure()
509 if self.pre_step():
510 return self.original_optimizer.step()
File ~/.pyenv/versions/3.8.13/envs/brainiac-2/lib/python3.8/site-packages/pytorch_lightning/plugins/precision/precision_plugin.py:138, in PrecisionPlugin._wrap_closure(self, model, optimizer, optimizer_idx, closure)
125 def _wrap_closure(
126 self,
127 model: "pl.LightningModule",
(...)
130 closure: Callable[[], Any],
131 ) -> Any:
132 """This double-closure allows makes sure the ``closure`` is executed before the
133 ``on_before_optimizer_step`` hook is called.
134
135 The closure (generally) runs ``backward`` so this allows inspecting gradients in this hook. This structure is
136 consistent with the ``PrecisionPlugin`` subclasses that cannot pass ``optimizer.step(closure)`` directly.
137 """
--> 138 closure_result = closure()
139 self._after_closure(model, optimizer, optimizer_idx)
140 return closure_result
File ~/.pyenv/versions/3.8.13/envs/brainiac-2/lib/python3.8/site-packages/pytorch_lightning/loops/optimization/optimizer_loop.py:146, in Closure.__call__(self, *args, **kwargs)
145 def __call__(self, *args: Any, **kwargs: Any) -> Optional[Tensor]:
--> 146 self._result = self.closure(*args, **kwargs)
147 return self._result.loss
File ~/.pyenv/versions/3.8.13/envs/brainiac-2/lib/python3.8/site-packages/pytorch_lightning/loops/optimization/optimizer_loop.py:141, in Closure.closure(self, *args, **kwargs)
138 self._zero_grad_fn()
140 if self._backward_fn is not None and step_output.closure_loss is not None:
--> 141 self._backward_fn(step_output.closure_loss)
143 return step_output
File ~/.pyenv/versions/3.8.13/envs/brainiac-2/lib/python3.8/site-packages/pytorch_lightning/loops/optimization/optimizer_loop.py:304, in OptimizerLoop._make_backward_fn.<locals>.backward_fn(loss)
303 def backward_fn(loss: Tensor) -> None:
--> 304 self.trainer._call_strategy_hook("backward", loss, optimizer, opt_idx)
File ~/.pyenv/versions/3.8.13/envs/brainiac-2/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py:1704, in Trainer._call_strategy_hook(self, hook_name, *args, **kwargs)
1701 return
1703 with self.profiler.profile(f"[Strategy]{self.strategy.__class__.__name__}.{hook_name}"):
-> 1704 output = fn(*args, **kwargs)
1706 # restore current_fx when nested context
1707 pl_module._current_fx_name = prev_fx_name
File ~/.pyenv/versions/3.8.13/envs/brainiac-2/lib/python3.8/site-packages/pytorch_lightning/strategies/strategy.py:191, in Strategy.backward(self, closure_loss, optimizer, optimizer_idx, *args, **kwargs)
188 assert self.lightning_module is not None
189 closure_loss = self.precision_plugin.pre_backward(self.lightning_module, closure_loss)
--> 191 self.precision_plugin.backward(self.lightning_module, closure_loss, optimizer, optimizer_idx, *args, **kwargs)
193 closure_loss = self.precision_plugin.post_backward(self.lightning_module, closure_loss)
194 self.post_backward(closure_loss)
File ~/.pyenv/versions/3.8.13/envs/brainiac-2/lib/python3.8/site-packages/pytorch_lightning/plugins/precision/precision_plugin.py:80, in PrecisionPlugin.backward(self, model, closure_loss, optimizer, optimizer_idx, *args, **kwargs)
78 # do backward pass
79 if model is not None and isinstance(model, pl.LightningModule):
---> 80 model.backward(closure_loss, optimizer, optimizer_idx, *args, **kwargs)
81 else:
82 self._run_backward(closure_loss, *args, **kwargs)
File ~/.pyenv/versions/3.8.13/envs/brainiac-2/lib/python3.8/site-packages/pytorch_lightning/core/module.py:1450, in LightningModule.backward(self, loss, optimizer, optimizer_idx, *args, **kwargs)
1433 def backward(
1434 self, loss: Tensor, optimizer: Optional[Optimizer], optimizer_idx: Optional[int], *args, **kwargs
1435 ) -> None:
1436 """Called to perform backward on the loss returned in :meth:`training_step`. Override this hook with your
1437 own implementation if you need to.
1438
(...)
1448 loss.backward()
1449 """
-> 1450 loss.backward(*args, **kwargs)
File ~/.pyenv/versions/3.8.13/envs/brainiac-2/lib/python3.8/site-packages/torch/_tensor.py:396, in Tensor.backward(self, gradient, retain_graph, create_graph, inputs)
387 if has_torch_function_unary(self):
388 return handle_torch_function(
389 Tensor.backward,
390 (self,),
(...)
394 create_graph=create_graph,
395 inputs=inputs)
--> 396 torch.autograd.backward(self, gradient, retain_graph, create_graph, inputs=inputs)
File ~/.pyenv/versions/3.8.13/envs/brainiac-2/lib/python3.8/site-packages/torch/autograd/__init__.py:173, in backward(tensors, grad_tensors, retain_graph, create_graph, grad_variables, inputs)
168 retain_graph = create_graph
170 # The reason we repeat same the comment below is that
171 # some Python versions print out the first line of a multi-line function
172 # calls in the traceback and some print out the last line
--> 173 Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
174 tensors, grad_tensors_, retain_graph, create_graph, inputs,
175 allow_unreachable=True, accumulate_grad=True)
File ~/.pyenv/versions/3.8.13/envs/brainiac-2/lib/python3.8/site-packages/opacus/grad_sample/grad_sample_module.py:310, in GradSampleModule.capture_backprops_hook(self, module, _forward_input, forward_output, loss_reduction, batch_first)
307 return
309 backprops = forward_output[0].detach()
--> 310 activations, backprops = self.rearrange_grad_samples(
311 module=module,
312 backprops=backprops,
313 loss_reduction=loss_reduction,
314 batch_first=batch_first,
315 )
316 if not self.force_functorch and type(module) in self.GRAD_SAMPLERS:
317 grad_sampler_fn = self.GRAD_SAMPLERS[type(module)]
File ~/.pyenv/versions/3.8.13/envs/brainiac-2/lib/python3.8/site-packages/opacus/grad_sample/grad_sample_module.py:358, in GradSampleModule.rearrange_grad_samples(self, module, backprops, loss_reduction, batch_first)
347 """
348 Rearrange activations and grad_samples based on loss reduction and batch dim
349
(...)
355 batch_first: True is batch dimension is first
356 """
357 if not hasattr(module, "activations"):
--> 358 raise ValueError(
359 f"No activations detected for {type(module)},"
360 " run forward after add_hooks(model)"
361 )
363 batch_dim = 0 if batch_first or type(module) is RNNLinear else 1
365 activations = module.activations.pop()
ValueError: No activations detected for <class 'torch.nn.modules.linear.Linear'>, run forward after add_hooks(model)
I think you shall open an issue on Opacus as this is in our codebase not much to change...
I can successfully run this piece of code:
But only when the definition of
LitSampleConvNetClassifier
is the following:That is, I use manual optimization. If I change the class definition as follows:
I get an error. Why? Independently from what error I get, which is Opacus-related, shouldn't the two pieces of code be equivalent? The documentation says so but apparently they are not. This should be better explained in the documentation.
For the complete code, check here
cc @borda