Nixtla / neuralforecast

Scalable and user friendly neural :brain: forecasting algorithms.
https://nixtlaverse.nixtla.io/neuralforecast
Apache License 2.0
2.69k stars 312 forks source link

RaySystemError #1006

Closed hberande closed 1 month ago

hberande commented 1 month ago

What happened + What you expected to happen

Trying Hyperparameter selection using Ray Tune Library in NHITS Model. But facing below error.

RaySystemError Traceback (most recent call last) Cell In[21], line 55 47 models = [AutoNHITS(h=horizon, 48 config=nhits_config, 49 num_samples=5)] 51 nf = NeuralForecast( 52 models=models, 53 freq='15min') ---> 55 Y_hat_df = nf.cross_validation(df=Y_df, val_size=val_size, 56 test_size=test_size, n_windows=None
57 )

File ~\AppData\Local\anaconda3\envs\NeuralForecast\Lib\site-packages\neuralforecast\core.py:981, in NeuralForecast.cross_validation(self, df, static_df, n_windows, step_size, val_size, test_size, sort_df, use_init_models, verbose, refit, id_col, time_col, target_col, data_kwargs) 979 df = df.reset_index(id_col) 980 if not refit: --> 981 return self._no_refit_cross_validation( 982 df=df, 983 static_df=static_df, 984 n_windows=n_windows, 985 step_size=step_size, 986 val_size=val_size, 987 test_size=test_size, 988 sort_df=sort_df, 989 verbose=verbose, 990 id_col=id_col, 991 time_col=time_col, 992 target_col=target_col, 993 data_kwargs, 994 ) 995 if df is None: 996 raise ValueError("Must specify df with refit!=False.")

File ~\AppData\Local\anaconda3\envs\NeuralForecast\Lib\site-packages\neuralforecast\core.py:862, in NeuralForecast._no_refit_cross_validation(self, df, static_df, n_windows, step_size, val_size, test_size, sort_df, verbose, id_col, time_col, target_col, data_kwargs) 855 fcsts = np.full( 856 (self.dataset.n_groups self.h n_windows, len(cols)), 857 np.nan, 858 dtype=np.float32, 859 ) 861 for model in self.models: --> 862 model.fit(dataset=self.dataset, val_size=val_size, test_size=test_size) 863 model_fcsts = model.predict( 864 self.dataset, step_size=step_size, data_kwargs 865 ) 867 # Append predictions in memory placeholder

File ~\AppData\Local\anaconda3\envs\NeuralForecast\Lib\site-packages\neuralforecast\common_base_auto.py:398, in BaseAuto.fit(self, dataset, val_size, test_size, random_seed, distributed_config) 394 if distributed_config is not None: 395 raise ValueError( 396 "distributed training is not supported for the ray backend." 397 ) --> 398 results = self._tune_model( 399 cls_model=self.cls_model, 400 dataset=dataset, 401 val_size=val_size, 402 test_size=test_size, 403 cpus=self.cpus, 404 gpus=self.gpus, 405 verbose=self.verbose, 406 num_samples=self.num_samples, 407 search_alg=search_alg, 408 config=self.config, 409 ) 410 best_config = results.get_best_result().config 411 else:

File ~\AppData\Local\anaconda3\envs\NeuralForecast\Lib\site-packages\neuralforecast\common_base_auto.py:230, in BaseAuto._tune_model(self, cls_model, dataset, val_size, test_size, cpus, gpus, verbose, num_samples, search_alg, config) 217 def _tune_model( 218 self, 219 cls_model, (...) 228 config, 229 ): --> 230 train_fn_with_parameters = tune.with_parameters( 231 self._train_tune, 232 cls_model=cls_model, 233 dataset=dataset, 234 val_size=val_size, 235 test_size=test_size, 236 ) 238 # Device 239 if gpus > 0:

File ~\AppData\Local\anaconda3\envs\NeuralForecast\Lib\site-packages\ray\tune\trainable\util.py:293, in withparameters(trainable, **kwargs) 291 prefix = f"{str(trainable)}" 292 for k, v in kwargs.items(): --> 293 parameter_registry.put(prefix + k, v) 295 trainable_name = getattr(trainable, "name", "tune_with_parameters") 296 keys = set(kwargs.keys())

File ~\AppData\Local\anaconda3\envs\NeuralForecast\Lib\site-packages\ray\tune\registry.py:296, in _ParameterRegistry.put(self, k, v) 294 self.to_flush[k] = v 295 if ray.is_initialized(): --> 296 self.flush()

File ~\AppData\Local\anaconda3\envs\NeuralForecast\Lib\site-packages\ray\tune\registry.py:308, in _ParameterRegistry.flush(self) 306 self.references[k] = v 307 else: --> 308 self.references[k] = ray.put(v) 309 self.to_flush.clear()

File ~\AppData\Local\anaconda3\envs\NeuralForecast\Lib\site-packages\ray_private\auto_init_hook.py:24, in wrap_auto_init..auto_init_wrapper(*args, kwargs) 21 @wraps(fn) 22 def auto_init_wrapper(*args, *kwargs): 23 auto_init_ray() ---> 24 return fn(args, kwargs)

File ~\AppData\Local\anaconda3\envs\NeuralForecast\Lib\site-packages\ray_private\client_mode_hook.py:103, in client_mode_hook..wrapper(*args, kwargs) 101 if func.name != "init" or is_client_mode_enabled_by_default: 102 return getattr(ray, func.name)(*args, *kwargs) --> 103 return func(args, kwargs)

File ~\AppData\Local\anaconda3\envs\NeuralForecast\Lib\site-packages\ray_private\worker.py:2597, in put(value, _owner) 2595 with profiling.profile("ray.put"): 2596 try: -> 2597 object_ref = worker.put_object(value, owner_address=serialize_owner_address) 2598 except ObjectStoreFullError: 2599 logger.info( 2600 "Put failed since the value was either too large or the " 2601 "store was full of pinned objects." 2602 )

File ~\AppData\Local\anaconda3\envs\NeuralForecast\Lib\site-packages\ray_private\worker.py:704, in Worker.put_object(self, value, object_ref, owner_address) 696 raise TypeError(msg) from e 697 # This must be the first place that we construct this python 698 # ObjectRef because an entry with 0 local references is created when 699 # the object is Put() in the core worker, expecting that this python 700 # reference will be created. If another reference is created and 701 # removed before this one, it will corrupt the state in the 702 # reference counter. 703 return ray.ObjectRef( --> 704 self.core_worker.put_serialized_object_and_increment_local_ref( 705 serialized_value, object_ref=object_ref, owner_address=owner_address 706 ), 707 # The initial local reference is already acquired internally. 708 skip_adding_local_ref=True, 709 )

File python\ray_raylet.pyx:2939, in ray._raylet.CoreWorker.put_serialized_object_and_increment_local_ref()

File python\ray_raylet.pyx:2831, in ray._raylet.CoreWorker._create_put_buffer()

File python\ray_raylet.pyx:412, in ray._raylet.check_status()

RaySystemError: System error: Unknown error

Versions / Dependencies

numpy = 1.26.3 pandas = 2.1.4 pydantic = 2.7.1 pydantic_core = 2.18.2 ray = 2.6.3 neuralforecast = 1.7.1

Reproduction script

df_scada = pd.read_excel(r"E:\1. SCADA\1. SCADA_15m\Chandragiri_Raw_SCADA_data_15min.xlsx") df_wrf = pd.read_excel(r"E:\2. WRF\1. WRF_15min\Chandragiri_WRF_Iter_R00_15min.xlsx") df_merged = pd.merge(df_wrf, df_scada,on="Date") df_merged1 = df_merged[['Date', 'WRF_WS_120m','WRF_WD_120m','WRF_Temp_120m','SCADA_WS_Avg', 'SCADA_Temp_Avg','SCADA_Dir_Avg','SCADA_Power_Sum [kwh]']] Y_df = df_merged1.copy()

Y_df.rename(columns={'Date' :'ds', 'SCADA_Power_Sum [kwh]':'y'}, inplace=True) Y_df.insert(loc=1,column='unique_id', value='Chandragiri')

n_time = len(Y_df.ds.unique()) val_size = int(0.1935n_time) # int(Y_df[Y_df['ds'] == "2022-12-31 20:30:00"].index[0]) test_size = int(0.1935n_time) # int(Y_df[Y_df['ds'] == "2023-12-31 20:30:00"].index[0])

horizon = 24 # 6hrs

Use your own config or AutoNHITS.default_config

nhits_config = { "learning_rate": tune.choice([1e-3]), # Initial Learning rate "max_steps": tune.choice([1000]), # Number of SGD steps "input_size": tune.choice([4 horizon]), # input_size = multiplier horizon "batch_size": tune.choice([7]), # Number of series in windows "windows_batch_size": tune.choice([256]), # Number of windows in batch "n_pool_kernel_size": tune.choice([[2, 2, 2], [16, 8, 1]]), # MaxPool's Kernelsize "n_freq_downsample": tune.choice([[168, 24, 1], [24, 12, 1], [1, 1, 1]]), # Interpolation expressivity ratios "activation": tune.choice(['ReLU']), # Type of non-linear activation "n_blocks": tune.choice([[1, 1, 1]]), # Blocks per each 3 stacks "mlp_units": tune.choice([[[512, 512], [512, 512], [512, 512]]]), # 2 512-Layers per block for each stack "interpolation_mode": tune.choice(['linear']), # Type of multi-step interpolation "val_check_steps": tune.choice([100]), # Compute validation every 100 epochs "random_seed": tune.randint(1, 10), } models = [AutoNHITS(h=horizon,config=nhits_config, num_samples=5)] nf = NeuralForecast(models=models,freq='15min') Y_hat_df = nf.cross_validation(df=Y_df, val_size=val_size,test_size=test_size, n_windows=None)

Issue Severity

High: It blocks me from completing my task.

elephaint commented 1 month ago