LarsKue / lightning-trainable

A default trainable module for pytorch lightning.
MIT License
10 stars 1 forks source link

No support for cpu only #22

Closed thelostscout closed 10 months ago

thelostscout commented 10 months ago

An Environment set up with conda install pytorch cpuonly -c pytorch is not supported with lightning-trainable:

File ~/Python/bg/modules/lightning-trainable/src/lightning_trainable/trainable/trainable.py:229, in Trainable.configure_trainer(self, logger_kwargs, trainer_kwargs)
    226 trainer_kwargs.setdefault("max_steps", self.hparams.max_steps)
    227 trainer_kwargs.setdefault("profiler", self.hparams.profiler)
--> 229 return lightning.Trainer(**trainer_kwargs)

File ~/.mambaforge/envs/lightning_bg/lib/python3.10/site-packages/lightning/pytorch/utilities/argparse.py:70, in _defaults_from_env_vars.<locals>.insert_env_defaults(self, *args, **kwargs)
     67 kwargs = dict(list(env_variables.items()) + list(kwargs.items()))
     69 # all args were already moved to kwargs
---> 70 return fn(self, **kwargs)

File ~/.mambaforge/envs/lightning_bg/lib/python3.10/site-packages/lightning/pytorch/trainer/trainer.py:401, in Trainer.__init__(self, accelerator, strategy, devices, num_nodes, precision, logger, callbacks, fast_dev_run, max_epochs, min_epochs, max_steps, min_steps, max_time, limit_train_batches, limit_val_batches, limit_test_batches, limit_predict_batches, overfit_batches, val_check_interval, check_val_every_n_epoch, num_sanity_val_steps, log_every_n_steps, enable_checkpointing, enable_progress_bar, enable_model_summary, accumulate_grad_batches, gradient_clip_val, gradient_clip_algorithm, deterministic, benchmark, inference_mode, use_distributed_sampler, profiler, detect_anomaly, barebones, plugins, sync_batchnorm, reload_dataloaders_every_n_epochs, default_root_dir)
    398 # init connectors
    399 self._data_connector = _DataConnector(self)
--> 401 self._accelerator_connector = _AcceleratorConnector(
    402     devices=devices,
    403     accelerator=accelerator,
    404     strategy=strategy,
    405     num_nodes=num_nodes,
    406     sync_batchnorm=sync_batchnorm,
    407     benchmark=benchmark,
    408     use_distributed_sampler=use_distributed_sampler,
    409     deterministic=deterministic,
    410     precision=precision,
    411     plugins=plugins,
    412 )
    413 self._logger_connector = _LoggerConnector(self)
    414 self._callback_connector = _CallbackConnector(self)

File ~/.mambaforge/envs/lightning_bg/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/accelerator_connector.py:152, in _AcceleratorConnector.__init__(self, devices, num_nodes, accelerator, strategy, plugins, precision, sync_batchnorm, benchmark, use_distributed_sampler, deterministic)
    150     self._accelerator_flag = self._choose_auto_accelerator()
    151 elif self._accelerator_flag == "gpu":
--> 152     self._accelerator_flag = self._choose_gpu_accelerator_backend()
    154 self._check_device_config_and_set_final_flags(devices=devices, num_nodes=num_nodes)
    155 self._set_parallel_devices_and_init_accelerator()

File ~/.mambaforge/envs/lightning_bg/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/accelerator_connector.py:372, in _AcceleratorConnector._choose_gpu_accelerator_backend()
    370 if CUDAAccelerator.is_available():
    371     return "cuda"
--> 372 raise MisconfigurationException("No supported gpu backend found!")

MisconfigurationException: No supported gpu backend found!
thelostscout commented 10 months ago

Actually seems like a Lightning issue. But it shouldn't be, right?

thelostscout commented 10 months ago

Issue can be fixed by adding accelerator = "cpu" to the HParams. Still a question if it might be desirable to have this done automatically.