GokuMohandas / Made-With-ML

Learn how to design, develop, deploy and iterate on production-grade ML applications.
https://madewithml.com
MIT License
37.6k stars 5.96k forks source link

Getting TypeError: RayTaskError.as_instanceof_cause.<locals>.cls.__init__() takes 2 positional arguments but 4 were given #263

Open NikhilK-crypto opened 3 months ago

NikhilK-crypto commented 3 months ago

After running below code

# Trainer
trainer = TorchTrainer(
    train_loop_per_worker=train_loop_per_worker,
    train_loop_config=train_loop_config,
    scaling_config=scaling_config,
    run_config=run_config,
    datasets={"train": train_ds, "val": val_ds},
    dataset_config=dataset_config,
    metadata={"class_to_index": preprocessor.class_to_index}
)

%%time
# Train
results = trainer.fit()

Setuptools version - 69.5.1 Ray version - 2.7.0 also tried 2.7.1 & 2.7.2

getting below error -


TypeError Traceback (most recent call last) File :2

File /opt/anaconda3/envs/madewithml/lib/python3.10/site-packages/ray/train/base_trainer.py:653, in BaseTrainer.fit(self) 647 restore_msg = TrainingFailedError._RESTORE_MSG.format( 648 trainer_cls_name=self.class.name, 649 path=str(experiment_local_path), 650 ) 652 try: --> 653 result_grid = tuner.fit() 654 except TuneError as e: 655 # Catch any TuneErrors raised by the Tuner.fit call. 656 # Unwrap the TuneError if needed. 657 parent_error = e.cause or e

File /opt/anaconda3/envs/madewithml/lib/python3.10/site-packages/ray/tune/tuner.py:372, in Tuner.fit(self) 370 if not self._is_ray_client: 371 try: --> 372 return self._local_tuner.fit() 373 except TuneError as e: 374 raise TuneError( 375 _TUNER_FAILED_MSG.format( 376 path=self._local_tuner.get_experiment_checkpoint_dir() 377 ) 378 ) from e

File /opt/anaconda3/envs/madewithml/lib/python3.10/site-packages/ray/tune/impl/tuner_internal.py:585, in TunerInternal.fit(self) 581 analysis = self._fit_resume(trainable, param_space) 583 self._experiment_analysis = analysis --> 585 return ResultGrid(self._experiment_analysis)

File /opt/anaconda3/envs/madewithml/lib/python3.10/site-packages/ray/tune/result_grid.py:83, in ResultGrid.init(self, experiment_analysis) 78 def init( 79 self, 80 experiment_analysis: ExperimentAnalysis, 81 ): 82 self._experiment_analysis = experiment_analysis ---> 83 self._results = [ 84 self._trial_to_result(trial) for trial in self._experiment_analysis.trials 85 ]

File /opt/anaconda3/envs/madewithml/lib/python3.10/site-packages/ray/tune/result_grid.py:84, in (.0) 78 def init( 79 self, 80 experiment_analysis: ExperimentAnalysis, 81 ): 82 self._experiment_analysis = experiment_analysis 83 self._results = [ ---> 84 self._trial_to_result(trial) for trial in self._experiment_analysis.trials 85 ]

File /opt/anaconda3/envs/madewithml/lib/python3.10/site-packages/ray/tune/result_grid.py:317, in ResultGrid._trial_to_result(self, trial) 309 else: 310 metrics_df = self._experiment_analysis.trial_dataframes.get( 311 trial.local_path 312 ) 314 result = Result( 315 checkpoint=checkpoint, 316 metrics=trial.last_result.copy(), --> 317 error=self._populate_exception(trial), 318 _local_path=trial.local_path, 319 _remote_path=trial.remote_path, 320 _storage_filesystem=( 321 self._experiment_analysis._fs 322 if isinstance(self._experiment_analysis, ExperimentAnalysis) 323 else None 324 ), 325 metrics_dataframe=metrics_df, 326 best_checkpoints=best_checkpoints, 327 ) 328 return result

File /opt/anaconda3/envs/madewithml/lib/python3.10/site-packages/ray/tune/result_grid.py:262, in ResultGrid._populate_exception(trial) 260 if trial.pickled_error_file and os.path.exists(trial.pickled_error_file): 261 with open(trial.pickled_error_file, "rb") as f: --> 262 e = cloudpickle.load(f) 263 return e 264 elif trial.error_file and os.path.exists(trial.error_file):

TypeError: RayTaskError.as_instanceof_cause..cls.init() takes 2 positional arguments but 4 were given

anisharitakula commented 1 month ago

I am getting the same error too @NikhilK-crypto . Were you able to find a fix/workaround?

CtrlMj commented 2 days ago

same here