XGBoost error in 1B_Model_Searching on M1 Mac

PatWalters commented 2 years ago

A new XGBoost error appears to have popped up when calling oce.TOP_MODELS_ADMET()

MisconfigurationException Traceback (most recent call last) Cell In [4], line 4 1 # We'll now get our list of top model architectures. 2 # Each of these models has certain situations where it outperforms the others, 3 # so we test all of them to see which model is best for this specific situation. ----> 4 models = oce.TOP_MODELS_ADMET() 6 # We'll also create a ModelManager object to keep track of our experiments 7 mm = oce.ModelManager(dataset, metrics = ["Root Mean Squared Error"], file_path="mm_1B_results.oce")

File ~/opt/anaconda3/envs/oce/lib/python3.8/site-packages/olorenchemengine/manager.py:21, in TOP_MODELS_ADMET() 13 """ 14 Returns a list of the top models from the ADMET dataset. 15 16 Returns: 17 List[BaseModel]: A list of the top models from the ADMET dataset. 18 """ 19 df = pd.read_csv(download_public_file("ModelDatabase/ModelDatabase_small.csv")) ---> 21 return [oce.create_BC(mp) for mp in df["Parameters"]]

File ~/opt/anaconda3/envs/oce/lib/python3.8/site-packages/olorenchemengine/manager.py:21, in (.0) 13 """ 14 Returns a list of the top models from the ADMET dataset. 15 16 Returns: 17 List[BaseModel]: A list of the top models from the ADMET dataset. 18 """ 19 df = pd.read_csv(download_public_file("ModelDatabase/ModelDatabase_small.csv")) ---> 21 return [oce.create_BC(mp) for mp in df["Parameters"]]

File ~/opt/anaconda3/envs/oce/lib/python3.8/site-packages/olorenchemengine/internal.py:841, in create_BC(d) 839 else: 840 if isinstance(arg, list): --> 841 arg = [ 842 create_BC(x) 843 if isinstance(x, dict) 844 and ("BC_class_name" in x.keys() or "REMOTE_ID" in x.keys()) 845 else x 846 for x in arg 847 ] 848 args.append(arg) 850 kwargs = {}

File ~/opt/anaconda3/envs/oce/lib/python3.8/site-packages/olorenchemengine/internal.py:842, in (.0) 839 else: 840 if isinstance(arg, list): 841 arg = [ --> 842 create_BC(x) 843 if isinstance(x, dict) 844 and ("BC_class_name" in x.keys() or "REMOTE_ID" in x.keys()) 845 else x 846 for x in arg 847 ] 848 args.append(arg) 850 kwargs = {}

File ~/opt/anaconda3/envs/oce/lib/python3.8/site-packages/olorenchemengine/internal.py:860, in create_BC(d) 857 else: 858 kwargs[k] = v --> 860 return BaseClass.Registry()[d["BC_class_name"]](*args, **kwargs)

File ~/opt/anaconda3/envs/oce/lib/python3.8/site-packages/olorenchemengine/internal.py:241, in log_arguments..wrapper(self, *args, *kwargs) 239 self.kwargs = {k: v for k, v in kwargs.items() if k not in ignored_kwargs} 240 if _runtime.is_local: --> 241 return func(self, args, **kwargs) 242 import uuid 244 REMOTE_ID = str(uuid.uuid4())

File ~/opt/anaconda3/envs/oce/lib/python3.8/site-packages/olorenchemengine/gnn.py:272, in BaseTorchGeometricModel.init(self, network, representation, epochs, batch_size, lr, auto_lr_find, pos_weight, preinitialized, log, kwargs) 268 self.pos_weight = pos_weight 270 from pytorch_lightning import Trainer --> 272 self.trainer = Trainer( 273 accelerator="auto", 274 devices=-1, 275 auto_select_gpus=False, 276 max_epochs=self.epochs, 277 auto_lr_find=auto_lr_find, 278 num_sanity_val_steps=0 279 ) 281 super().init(log=False, kwargs)

File ~/opt/anaconda3/envs/oce/lib/python3.8/site-packages/pytorch_lightning/utilities/argparse.py:345, in _defaults_from_env_vars..insert_env_defaults(self, *args, kwargs) 342 kwargs = dict(list(env_variables.items()) + list(kwargs.items())) 344 # all args were already moved to kwargs --> 345 return fn(self, kwargs)

File ~/opt/anaconda3/envs/oce/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py:433, in Trainer.init(self, logger, enable_checkpointing, callbacks, default_root_dir, gradient_clip_val, gradient_clip_algorithm, num_nodes, num_processes, devices, gpus, auto_select_gpus, tpu_cores, ipus, enable_progress_bar, overfit_batches, track_grad_norm, check_val_every_n_epoch, fast_dev_run, accumulate_grad_batches, max_epochs, min_epochs, max_steps, min_steps, max_time, limit_train_batches, limit_val_batches, limit_test_batches, limit_predict_batches, val_check_interval, log_every_n_steps, accelerator, strategy, sync_batchnorm, precision, enable_model_summary, weights_save_path, num_sanity_val_steps, resume_from_checkpoint, profiler, benchmark, deterministic, reload_dataloaders_every_n_epochs, auto_lr_find, replace_sampler_ddp, detect_anomaly, auto_scale_batch_size, plugins, amp_backend, amp_level, move_metrics_to_cpu, multiple_trainloader_mode) 430 # init connectors 431 self._data_connector = DataConnector(self, multiple_trainloader_mode) --> 433 self._accelerator_connector = AcceleratorConnector( 434 num_processes=num_processes, 435 devices=devices, 436 tpu_cores=tpu_cores, 437 ipus=ipus, 438 accelerator=accelerator, 439 strategy=strategy, 440 gpus=gpus, 441 num_nodes=num_nodes, 442 sync_batchnorm=sync_batchnorm, 443 benchmark=benchmark, 444 replace_sampler_ddp=replace_sampler_ddp, 445 deterministic=deterministic, 446 auto_select_gpus=auto_select_gpus, 447 precision=precision, 448 amp_type=amp_backend, 449 amp_level=amp_level, 450 plugins=plugins, 451 ) 452 self._logger_connector = LoggerConnector(self) 453 self._callback_connector = CallbackConnector(self)

File ~/opt/anaconda3/envs/oce/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/accelerator_connector.py:214, in AcceleratorConnector.init(self, devices, num_nodes, accelerator, strategy, plugins, precision, amp_type, amp_level, sync_batchnorm, benchmark, replace_sampler_ddp, deterministic, auto_select_gpus, num_processes, tpu_cores, ipus, gpus) 211 elif self._accelerator_flag == "gpu": 212 self._accelerator_flag = self._choose_gpu_accelerator_backend() --> 214 self._set_parallel_devices_and_init_accelerator() 216 # 3. Instantiate ClusterEnvironment 217 self.cluster_environment: ClusterEnvironment = self._choose_and_init_cluster_environment()

File ~/opt/anaconda3/envs/oce/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/accelerator_connector.py:545, in AcceleratorConnector._set_parallel_devices_and_init_accelerator(self) 541 self._tpu_cores = self._devices_flag if not self._tpu_cores else self._tpu_cores 543 self._set_devices_flag_if_auto_select_gpus_passed() --> 545 self._devices_flag = self.accelerator.parse_devices(self._devices_flag) 546 if not self._parallel_devices: 547 self._parallel_devices = self.accelerator.get_parallel_devices(self._devices_flag)

File ~/opt/anaconda3/envs/oce/lib/python3.8/site-packages/pytorch_lightning/accelerators/cpu.py:45, in CPUAccelerator.parse_devices(devices) 42 @staticmethod 43 def parse_devices(devices: Union[int, str, List[int]]) -> int: 44 """Accelerator device parsing logic.""" ---> 45 devices = parse_cpu_cores(devices) 46 return devices

File ~/opt/anaconda3/envs/oce/lib/python3.8/site-packages/pytorch_lightning/utilities/device_parser.py:175, in parse_cpu_cores(cpu_cores) 172 cpu_cores = int(cpu_cores) 174 if not isinstance(cpu_cores, int) or cpu_cores <= 0: --> 175 raise MisconfigurationException("devices selected with CPUAccelerator should be an int > 0.") 177 return cpu_cores

MisconfigurationException: devices selected with CPUAccelerator should be an int > 0.

davidzqhuang commented 2 years ago

Looking into it, you can dodge the xgboost model by calling oce.TOP_MODELS_ADMET()[1:]

raunakdoesdev commented 2 years ago

We were able to replicate this issue on a M1 Mac with a fresh install of OCE.

It was introduced as a byproduct of a different fix. It was an issue with Pytorch Geometric's handling of non-gpu devices. Here is the commit: f8a2257

You should be able to update the install to the latest from Github and the errors should subside. Let me know if you run into any other issues.

Here is the command to install from master: pip install --upgrade "olorenchemengine[full] @ git+https://github.com/Oloren-AI/olorenchemengine.git"

Oloren-AI / olorenchemengine

XGBoost error in 1B_Model_Searching on M1 Mac #56