Closed enryH closed 10 months ago
One more thing I noticed: Your git history is ~4GB large, which means probably some old data is still in the history...
I'll try now local installation: Is your pypi package up-to-date?
So yes, this was it indeed. In the shap_example.ipynb
I now get the following error, when trying to call trainer.fit
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
c:\Users\user\Documents\repos\BINN\docs\shap_example.ipynb Cell 2 line 1
14 dataloader = torch.utils.data.DataLoader(dataset, batch_size=8, shuffle=True)
16 trainer = Trainer(max_epochs=3, log_every_n_steps=10)
---> 17 trainer.fit(binn, dataloader)
File c:\Users\user\Anaconda3\envs\binn\Lib\site-packages\lightning\pytorch\trainer\trainer.py:544, in Trainer.fit(self, model, train_dataloaders, val_dataloaders, datamodule, ckpt_path)
542 self.state.status = TrainerStatus.RUNNING
543 self.training = True
--> 544 call._call_and_handle_interrupt(
545 self, self._fit_impl, model, train_dataloaders, val_dataloaders, datamodule, ckpt_path
546 )
File c:\Users\user\Anaconda3\envs\binn\Lib\site-packages\lightning\pytorch\trainer\call.py:44, in _call_and_handle_interrupt(trainer, trainer_fn, *args, **kwargs)
42 if trainer.strategy.launcher is not None:
43 return trainer.strategy.launcher.launch(trainer_fn, *args, trainer=trainer, **kwargs)
---> 44 return trainer_fn(*args, **kwargs)
46 except _TunerExitException:
47 _call_teardown_hook(trainer)
File c:\Users\user\Anaconda3\envs\binn\Lib\site-packages\lightning\pytorch\trainer\trainer.py:580, in Trainer._fit_impl(self, model, train_dataloaders, val_dataloaders, datamodule, ckpt_path)
573 assert self.state.fn is not None
574 ckpt_path = self._checkpoint_connector._select_ckpt_path(
575 self.state.fn,
576 ckpt_path,
577 model_provided=True,
578 model_connected=self.lightning_module is not None,
579 )
--> 580 self._run(model, ckpt_path=ckpt_path)
582 assert self.state.stopped
583 self.training = False
File c:\Users\user\Anaconda3\envs\binn\Lib\site-packages\lightning\pytorch\trainer\trainer.py:989, in Trainer._run(self, model, ckpt_path)
984 self._signal_connector.register_signal_handlers()
986 # ----------------------------
987 # RUN THE TRAINER
988 # ----------------------------
--> 989 results = self._run_stage()
991 # ----------------------------
992 # POST-Training CLEAN UP
993 # ----------------------------
994 log.debug(f"{self.__class__.__name__}: trainer tearing down")
File c:\Users\user\Anaconda3\envs\binn\Lib\site-packages\lightning\pytorch\trainer\trainer.py:1035, in Trainer._run_stage(self)
1033 self._run_sanity_check()
1034 with torch.autograd.set_detect_anomaly(self._detect_anomaly):
-> 1035 self.fit_loop.run()
1036 return None
1037 raise RuntimeError(f"Unexpected state {self.state}")
File c:\Users\user\Anaconda3\envs\binn\Lib\site-packages\lightning\pytorch\loops\fit_loop.py:202, in _FitLoop.run(self)
200 try:
201 self.on_advance_start()
--> 202 self.advance()
203 self.on_advance_end()
204 self._restarting = False
File c:\Users\user\Anaconda3\envs\binn\Lib\site-packages\lightning\pytorch\loops\fit_loop.py:359, in _FitLoop.advance(self)
357 with self.trainer.profiler.profile("run_training_epoch"):
358 assert self._data_fetcher is not None
--> 359 self.epoch_loop.run(self._data_fetcher)
File c:\Users\user\Anaconda3\envs\binn\Lib\site-packages\lightning\pytorch\loops\training_epoch_loop.py:136, in _TrainingEpochLoop.run(self, data_fetcher)
134 while not self.done:
135 try:
--> 136 self.advance(data_fetcher)
137 self.on_advance_end(data_fetcher)
138 self._restarting = False
File c:\Users\user\Anaconda3\envs\binn\Lib\site-packages\lightning\pytorch\loops\training_epoch_loop.py:240, in _TrainingEpochLoop.advance(self, data_fetcher)
237 with trainer.profiler.profile("run_training_batch"):
238 if trainer.lightning_module.automatic_optimization:
239 # in automatic optimization, there can only be one optimizer
--> 240 batch_output = self.automatic_optimization.run(trainer.optimizers[0], batch_idx, kwargs)
241 else:
242 batch_output = self.manual_optimization.run(kwargs)
File c:\Users\user\Anaconda3\envs\binn\Lib\site-packages\lightning\pytorch\loops\optimization\automatic.py:187, in _AutomaticOptimization.run(self, optimizer, batch_idx, kwargs)
180 closure()
182 # ------------------------------
183 # BACKWARD PASS
184 # ------------------------------
185 # gradient update with accumulated gradients
186 else:
--> 187 self._optimizer_step(batch_idx, closure)
189 result = closure.consume_result()
190 if result.loss is None:
File c:\Users\user\Anaconda3\envs\binn\Lib\site-packages\lightning\pytorch\loops\optimization\automatic.py:265, in _AutomaticOptimization._optimizer_step(self, batch_idx, train_step_and_backward_closure)
262 self.optim_progress.optimizer.step.increment_ready()
264 # model hook
--> 265 call._call_lightning_module_hook(
266 trainer,
267 "optimizer_step",
...
3051 if size_average is not None or reduce is not None:
3052 reduction = _Reduction.legacy_get_string(size_average, reduce)
-> 3053 return torch._C._nn.cross_entropy_loss(input, target, weight, _Reduction.get_enum(reduction), ignore_index, label_smoothing)
RuntimeError: expected scalar type Long but found Short
So this was just the type of y, which needs to be int64
: torch.tensor(y, dtype=torch.int64, device=binn.device)
One more thing I noticed: Your git history is ~4GB large, which means probably some old data is still in the history...
I'm currently working on reducing the .git history. Oddly, I can't find any large files in the git history... But it might be some old logs from PyTorch (so-called lightning logs). Currently looking into if I could remove these from the git-history. Alternatively, I'll create an orphan branch and make it the main branch, but this is not to be preferred really.
I guess you can keep your history as long as the pip package is up-to-date:) I cloned the repository without the full history, which was fine. Maybe you just put this as the default (with lets say the last 10 commits)
I left the errors in the notebooks. It's rather a documentation of the errors than an update.