Nixtla / neuralforecast

Scalable and user friendly neural :brain: forecasting algorithms.
https://nixtlaverse.nixtla.io/neuralforecast
Apache License 2.0
2.98k stars 342 forks source link

FileNotFoundError: [Errno 2] No such file or directory #905

Closed kkckk1110 closed 7 months ago

kkckk1110 commented 7 months ago

What happened + What you expected to happen

I came across a bug:

Stacktrace ```python KeyError Traceback (most recent call last) File ~\AppData\Roaming\Python\Python39\site-packages\tensorboardX\record_writer.py:58, in open_file(path) 57 prefix = path.split(':')[0] ---> 58 factory = REGISTERED_FACTORIES[prefix] 59 return factory.open(path) KeyError: 'C' During handling of the above exception, another exception occurred: FileNotFoundError Traceback (most recent call last) Cell In[13], line 1 ----> 1 nf.fit(df=Y_df) File ~\AppData\Roaming\Python\Python39\site-packages\neuralforecast\core.py:274, in NeuralForecast.fit(self, df, static_df, val_size, sort_df, use_init_models, verbose) 271 print("WARNING: Deleting previously fitted models.") 273 for model in self.models: --> 274 model.fit(self.dataset, val_size=val_size) 276 self._fitted = True File ~\AppData\Roaming\Python\Python39\site-packages\neuralforecast\common\_base_auto.py:361, in BaseAuto.fit(self, dataset, val_size, test_size, random_seed) 359 val_size = val_size if val_size > 0 else self.h 360 if self.backend == "ray": --> 361 results = self._tune_model( 362 cls_model=self.cls_model, 363 dataset=dataset, 364 val_size=val_size, 365 test_size=test_size, 366 cpus=self.cpus, 367 gpus=self.gpus, 368 verbose=self.verbose, 369 num_samples=self.num_samples, 370 search_alg=search_alg, 371 config=self.config, 372 ) 373 best_config = results.get_best_result().config 374 else: File ~\AppData\Roaming\Python\Python39\site-packages\neuralforecast\common\_base_auto.py:259, in BaseAuto._tune_model(self, cls_model, dataset, val_size, test_size, cpus, gpus, verbose, num_samples, search_alg, config) 240 device_dict = {"cpu": cpus} 242 tuner = tune.Tuner( 243 tune.with_resources(train_fn_with_parameters, device_dict), 244 run_config=air.RunConfig( (...) 257 param_space=config, 258 ) --> 259 results = tuner.fit() 260 return results File D:\Anaconda\envs\neural\lib\site-packages\ray\tune\tuner.py:381, in Tuner.fit(self) 379 if not self._is_ray_client: 380 try: --> 381 return self._local_tuner.fit() 382 except TuneError as e: 383 raise TuneError( 384 _TUNER_FAILED_MSG.format( 385 path=self._local_tuner.get_experiment_checkpoint_dir() 386 ) 387 ) from e File D:\Anaconda\envs\neural\lib\site-packages\ray\tune\impl\tuner_internal.py:509, in TunerInternal.fit(self) 507 param_space = copy.deepcopy(self.param_space) 508 if not self._is_restored: --> 509 analysis = self._fit_internal(trainable, param_space) 510 else: 511 analysis = self._fit_resume(trainable, param_space) File D:\Anaconda\envs\neural\lib\site-packages\ray\tune\impl\tuner_internal.py:628, in TunerInternal._fit_internal(self, trainable, param_space) 615 """Fitting for a fresh Tuner.""" 616 args = { 617 **self._get_tune_run_arguments(trainable), 618 **dict( (...) 626 **self._tuner_kwargs, 627 } --> 628 analysis = run( 629 **args, 630 ) 631 self.clear_remote_string_queue() 632 return analysis File D:\Anaconda\envs\neural\lib\site-packages\ray\tune\tune.py:1002, in run(run_or_experiment, name, metric, mode, stop, time_budget_s, config, resources_per_trial, num_samples, storage_path, storage_filesystem, search_alg, scheduler, checkpoint_config, verbose, progress_reporter, log_to_file, trial_name_creator, trial_dirname_creator, sync_config, export_formats, max_failures, fail_fast, restore, resume, reuse_actors, raise_on_failed_trial, callbacks, max_concurrent_trials, keep_checkpoints_num, checkpoint_score_attr, checkpoint_freq, checkpoint_at_end, chdir_to_trial_dir, local_dir, _remote, _remote_string_queue, _entrypoint) 1000 try: 1001 while not runner.is_finished() and not experiment_interrupted_event.is_set(): -> 1002 runner.step() 1003 if has_verbosity(Verbosity.V1_EXPERIMENT): 1004 _report_progress(runner, progress_reporter) File D:\Anaconda\envs\neural\lib\site-packages\ray\tune\execution\tune_controller.py:728, in TuneController.step(self) 725 self._maybe_add_actors() 727 # Handle one event --> 728 if not self._actor_manager.next(timeout=0.1): 729 # If there are no actors running, warn about potentially 730 # insufficient resources 731 if not self._actor_manager.num_live_actors: 732 self._insufficient_resources_manager.on_no_available_trials( 733 self.get_trials() 734 ) File D:\Anaconda\envs\neural\lib\site-packages\ray\air\execution\_internal\actor_manager.py:222, in RayActorManager.next(self, timeout) 219 [future] = ready 221 if future in actor_state_futures: --> 222 self._actor_state_events.resolve_future(future) 223 elif future in actor_task_futures: 224 self._actor_task_events.resolve_future(future) File D:\Anaconda\envs\neural\lib\site-packages\ray\air\execution\_internal\event_manager.py:118, in RayEventManager.resolve_future(self, future) 116 else: 117 if on_result: --> 118 on_result(result) File D:\Anaconda\envs\neural\lib\site-packages\ray\air\execution\_internal\actor_manager.py:381, in RayActorManager._try_start_actors..create_callbacks..on_actor_start(result) 380 def on_actor_start(result: Any): --> 381 self._actor_start_resolved( 382 tracked_actor=tracked_actor, future=future 383 ) File D:\Anaconda\envs\neural\lib\site-packages\ray\air\execution\_internal\actor_manager.py:243, in RayActorManager._actor_start_resolved(self, tracked_actor, future) 240 self._tracked_actors_to_state_futures[tracked_actor].remove(future) 242 if tracked_actor._on_start: --> 243 tracked_actor._on_start(tracked_actor) File D:\Anaconda\envs\neural\lib\site-packages\ray\tune\execution\tune_controller.py:1174, in TuneController._actor_started(self, tracked_actor, log) 1169 ray_actor = self._actor_manager._live_actors_to_ray_actors_resources[ 1170 tracked_actor 1171 ][0] 1172 trial.set_ray_actor(ray_actor) -> 1174 self._callbacks.on_trial_start( 1175 iteration=self._iteration, trials=self._trials, trial=trial 1176 ) 1178 self._set_trial_status(trial, Trial.RUNNING) 1180 self._mark_trial_to_checkpoint(trial) File D:\Anaconda\envs\neural\lib\site-packages\ray\tune\callback.py:400, in CallbackList.on_trial_start(self, **info) 398 def on_trial_start(self, **info): 399 for callback in self._callbacks: --> 400 callback.on_trial_start(**info) File D:\Anaconda\envs\neural\lib\site-packages\ray\tune\logger\logger.py:147, in LoggerCallback.on_trial_start(self, iteration, trials, trial, **info) 144 def on_trial_start( 145 self, iteration: int, trials: List["Trial"], trial: "Trial", **info 146 ): --> 147 self.log_trial_start(trial) File D:\Anaconda\envs\neural\lib\site-packages\ray\tune\logger\tensorboardx.py:187, in TBXLoggerCallback.log_trial_start(self, trial) 185 self._trial_writer[trial].close() 186 trial.init_local_path() --> 187 self._trial_writer[trial] = self._summary_writer_cls( 188 trial.local_path, flush_secs=30 189 ) 190 self._trial_result[trial] = {} File ~\AppData\Roaming\Python\Python39\site-packages\tensorboardX\writer.py:300, in SummaryWriter.__init__(self, logdir, comment, purge_step, max_queue, flush_secs, filename_suffix, write_to_disk, log_dir, comet_config, **kwargs) 297 # Initialize the file writers, but they can be cleared out on close 298 # and recreated later as needed. 299 self.file_writer = self.all_writers = None --> 300 self._get_file_writer() 302 # Create default bins for histograms, see generate_testdata.py in tensorflow/tensorboard 303 v = 1E-12 File ~\AppData\Roaming\Python\Python39\site-packages\tensorboardX\writer.py:348, in SummaryWriter._get_file_writer(self) 345 return self.file_writer 347 if self.all_writers is None or self.file_writer is None: --> 348 self.file_writer = FileWriter(logdir=self.logdir, 349 max_queue=self._max_queue, 350 flush_secs=self._flush_secs, 351 filename_suffix=self._filename_suffix, 352 **self.kwargs) 353 if self.purge_step is not None: 354 self.file_writer.add_event( 355 Event(step=self.purge_step, file_version='brain.Event:2')) File ~\AppData\Roaming\Python\Python39\site-packages\tensorboardX\writer.py:104, in FileWriter.__init__(self, logdir, max_queue, flush_secs, filename_suffix) 99 # Sometimes PosixPath is passed in and we need to coerce it to 100 # a string in all cases 101 # TODO: See if we can remove this in the future if we are 102 # actually the ones passing in a PosixPath 103 logdir = str(logdir) --> 104 self.event_writer = EventFileWriter( 105 logdir, max_queue, flush_secs, filename_suffix) 107 def cleanup(): 108 self.event_writer.close() File ~\AppData\Roaming\Python\Python39\site-packages\tensorboardX\event_file_writer.py:106, in EventFileWriter.__init__(self, logdir, max_queue_size, flush_secs, filename_suffix) 104 directory_check(self._logdir) 105 self._event_queue = multiprocessing.Queue(max_queue_size) --> 106 self._ev_writer = EventsWriter(os.path.join( 107 self._logdir, "events"), filename_suffix) 108 self._flush_secs = flush_secs 109 self._closed = False File ~\AppData\Roaming\Python\Python39\site-packages\tensorboardX\event_file_writer.py:43, in EventsWriter.__init__(self, file_prefix, filename_suffix) 40 self._file_name = file_prefix + ".out.tfevents." + str(time.time())[:10] + "." +\ 41 socket.gethostname() + filename_suffix 42 self._num_outstanding_events = 0 ---> 43 self._py_recordio_writer = RecordWriter(self._file_name) 44 # Initialize an event instance. 45 self._event = event_pb2.Event() File ~\AppData\Roaming\Python\Python39\site-packages\tensorboardX\record_writer.py:182, in RecordWriter.__init__(self, path) 180 self.path = path 181 self._writer = None --> 182 self._writer = open_file(path) File ~\AppData\Roaming\Python\Python39\site-packages\tensorboardX\record_writer.py:61, in open_file(path) 59 return factory.open(path) 60 except KeyError: ---> 61 return open(path, 'wb') FileNotFoundError: [Errno 2] No such file or directory: 'C:/Users/Administrator.DESKTOP-56V32DM/ray_results/_train_tune_2024-02-28_20-30-30/_train_tune_11446_00000_0_batch_size=1,input_size=48,learning_rate=0.0012,max_steps=1000,n_freq_downsample=1_1_1_1_1,n_pool_kernel_2024-02-28_20-30-30\\events.out.tfevents.1709123434.DESKTOP-56V32DM' ```

Versions / Dependencies

ray == 2.9.3. neuralforecast==1.6.4

Reproduction script

nf.fit(df=Y_df)

Issue Severity

None

jmoralez commented 7 months ago

Hey. This seems to be a duplicate of #526, I'm closing this in favor of that one.