unit8co / darts

A python library for user-friendly forecasting and anomaly detection on time series.
https://unit8co.github.io/darts/
Apache License 2.0
8.14k stars 886 forks source link

Model can't find encoding_available variable [BUG] #814

Closed Joe-TheBro closed 2 years ago

Joe-TheBro commented 2 years ago

Describe the bug After fitting TCN model, when attempting to backtest program spits out error regarding self.encoders.encoding_available

Error

AttributeError                            Traceback (most recent call last)
/tmp/ipykernel_1955/95313221.py in <module>
      7     retrain=False,
      8     num_samples=1,
----> 9     verbose=True,
     10 )

/lib/python3.7/site-packages/darts/utils/utils.py in sanitized_method(self, *args, **kwargs)
    170 
    171                 getattr(self, sanity_check_method)(*only_args.values(), **only_kwargs)
--> 172             return method_to_sanitize(self, *only_args.values(), **only_kwargs)
    173 
    174         return sanitized_method

/lib/python3.7/site-packages/darts/models/forecasting/forecasting_model.py in historical_forecasts(self, series, past_covariates, future_covariates, num_samples, start, forecast_horizon, stride, retrain, overlap_end, last_points_only, verbose)
    421                 past_covariates=past_covariates,
    422                 future_covariates=future_covariates,
--> 423                 num_samples=num_samples,
    424             )
    425 

/lib/python3.7/site-packages/darts/models/forecasting/forecasting_model.py in _predict_wrapper(self, n, series, past_covariates, future_covariates, num_samples)
   1008             past_covariates=past_covariates,
   1009             future_covariates=future_covariates,
-> 1010             num_samples=num_samples,
   1011         )
   1012 

/lib/python3.7/site-packages/darts/utils/torch.py in decorator(self, *args, **kwargs)
     68         with fork_rng():
     69             manual_seed(self._random_instance.randint(0, high=MAX_TORCH_SEED_VALUE))
---> 70             return decorated(self, *args, **kwargs)
     71 
     72     return decorator

/lib/python3.7/site-packages/darts/models/forecasting/torch_forecasting_model.py in predict(self, n, series, past_covariates, future_covariates, trainer, batch_size, verbose, n_jobs, roll_size, num_samples, num_loader_workers)
   1076         )
   1077 
-> 1078         if self.encoders.encoding_available:
   1079             past_covariates, future_covariates = self.encoders.encode_inference(
   1080                 n=n,

AttributeError: 'NoneType' object has no attribute 'encoding_available'

To Reproduce

backtest_series = model.historical_forecasts(
    series_transformed,
    start=series_transformed.get_timestamp_at_point(0.6),
    forecast_horizon=7,
    stride=5,
    retrain=False,
    num_samples=1, 
    verbose=True,
)

Expected behavior I expect to backtest on fitted model

System

dennisbader commented 2 years ago

Hi @Joe-TheBro, I couldn't reproduce this error in version 0.17.1. Could you provide a code snippet to do so?

Joe-TheBro commented 2 years ago

Relevant code

series = TimeSeries.from_dataframe(data)

series = series.astype(np.float32)
train, val = series.split_after(0.6)
scaler = Scaler()
train_transformed = scaler.fit_transform(train)
val_transformed = scaler.transform(val)
series_transformed = scaler.transform(series)

trainer_kwargs={"accelerator": "gpu", "gpus": 1, "auto_select_gpus": True}

model = TCNModel(
    n_epochs=1, 
    input_chunk_length=400,
    output_chunk_length=50, 
    dropout=0, 
    dilation_base=2, 
    weight_norm=True,
    kernel_size=7,
    num_filters=4,
    random_state=RANDOM_STATE,
    save_checkpoints=True,
    pl_trainer_kwargs=trainer_kwargs
)

model.fit(series=train_transformed, val_series=val_transformed)

backtest_series = model.historical_forecasts(
    series_transformed,
    start=series_transformed.get_timestamp_at_point(0.6),
    # Split data 60 train / 40 validate-test --- value
    forecast_horizon=7,
    stride=5,
    retrain=False,
    num_samples=1, 
    verbose=True,
)

Then error is thrown out

---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
/tmp/ipykernel_1955/95313221.py in <module>
      7     retrain=False,
      8     num_samples=1,
----> 9     verbose=True,
     10 )

/pdrive/dt-env/lib/python3.7/site-packages/darts/utils/utils.py in sanitized_method(self, *args, **kwargs)
    170 
    171                 getattr(self, sanity_check_method)(*only_args.values(), **only_kwargs)
--> 172             return method_to_sanitize(self, *only_args.values(), **only_kwargs)
    173 
    174         return sanitized_method

/pdrive/dt-env/lib/python3.7/site-packages/darts/models/forecasting/forecasting_model.py in historical_forecasts(self, series, past_covariates, future_covariates, num_samples, start, forecast_horizon, stride, retrain, overlap_end, last_points_only, verbose)
    421                 past_covariates=past_covariates,
    422                 future_covariates=future_covariates,
--> 423                 num_samples=num_samples,
    424             )
    425 

/pdrive/dt-env/lib/python3.7/site-packages/darts/models/forecasting/forecasting_model.py in _predict_wrapper(self, n, series, past_covariates, future_covariates, num_samples)
   1008             past_covariates=past_covariates,
   1009             future_covariates=future_covariates,
-> 1010             num_samples=num_samples,
   1011         )
   1012 

/pdrive/dt-env/lib/python3.7/site-packages/darts/utils/torch.py in decorator(self, *args, **kwargs)
     68         with fork_rng():
     69             manual_seed(self._random_instance.randint(0, high=MAX_TORCH_SEED_VALUE))
---> 70             return decorated(self, *args, **kwargs)
     71 
     72     return decorator

/pdrive/dt-env/lib/python3.7/site-packages/darts/models/forecasting/torch_forecasting_model.py in predict(self, n, series, past_covariates, future_covariates, trainer, batch_size, verbose, n_jobs, roll_size, num_samples, num_loader_workers)
   1076         )
   1077 
-> 1078         if self.encoders.encoding_available:
   1079             past_covariates, future_covariates = self.encoders.encode_inference(
   1080                 n=n,

AttributeError: 'NoneType' object has no attribute 'encoding_available'
dennisbader commented 2 years ago

Hey @Joe-TheBro, I tried your code snippet and it runs without issues..

This seems strange to me. The encoders should be initialized when model.fit() is called.

Here are some questions:

Joe-TheBro commented 2 years ago

Questions in order

Thanks for the help!

dennisbader commented 2 years ago

Can you try model = TCNModel.load_model('dt-model.pth.tar') and see if this resolves the issue? load_model() is a class method and returns the loaded model as a new TCNModel object.

Joe-TheBro commented 2 years ago

After running

model = TCNModel.load_model('dt-model.pth.tar')
print(model.epochs_trained)
1

so it seems this does the trick to reinitialize model. Then after this trying to backtest succeeds!

So, in summary, by not specifying TCNModel when reloading model, the program defaults to the torch forecasting object instead of the TCNModel object.

I appreciate all your help @dennisbader and commend your patience. As a suggestion going forward I would either update the documentation to let people know they have to specify the specific model when loading, or integrate into codebase with load_model() method that checks to see which model object fits and instantiates as such.

Joe-TheBro commented 2 years ago

Unfortunately, I have another bug that has arisen. Although I do not know if it is related. After running backtest code.

backtest_series = model.historical_forecasts(
    series=series_transformed,
    start=series_transformed.get_timestamp_at_point(0.9995),
    # Split data 60 train / 40 validate-test --- value
    forecast_horizon=7,
    stride=5,
    retrain=False,
    num_samples=1, 
    verbose=True
)

An error occurs after predicting the entire specified length occurs

ValueError                                Traceback (most recent call last)
/tmp/ipykernel_2006/609873824.py in <module>
      7     retrain=False,
      8     num_samples=1,
----> 9     verbose=True
     10 )
     11 print(backtest_series)

/pdrive/dt-env/lib/python3.7/site-packages/darts/utils/utils.py in sanitized_method(self, *args, **kwargs)
    170 
    171                 getattr(self, sanity_check_method)(*only_args.values(), **only_kwargs)
--> 172             return method_to_sanitize(self, *only_args.values(), **only_kwargs)
    173 
    174         return sanitized_method

/pdrive/dt-env/lib/python3.7/site-packages/darts/models/forecasting/forecasting_model.py in historical_forecasts(self, series, past_covariates, future_covariates, num_samples, start, forecast_horizon, stride, retrain, overlap_end, last_points_only, verbose)
    443                         step=1,
    444                     ),
--> 445                     np.array(last_points_values),
    446                 )
    447 

/pdrive/dt-env/lib/python3.7/site-packages/darts/timeseries.py in from_times_and_values(cls, times, values, fill_missing_dates, freq, columns, fillna_value)
    604             coords[DIMS[1]] = columns
    605 
--> 606         xa = xr.DataArray(values, dims=(times_name,) + DIMS[-2:], coords=coords)
    607 
    608         return cls.from_xarray(

/pdrive/dt-env/lib/python3.7/site-packages/xarray/core/dataarray.py in __init__(self, data, coords, dims, name, attrs, indexes, fastpath)
    404             data = _check_data_shape(data, coords, dims)
    405             data = as_compatible_data(data)
--> 406             coords, dims = _infer_coords_and_dims(data.shape, coords, dims)
    407             variable = Variable(dims, data, attrs, fastpath=True)
    408             indexes = dict(

/pdrive/dt-env/lib/python3.7/site-packages/xarray/core/dataarray.py in _infer_coords_and_dims(shape, coords, dims)
    153             if s != sizes[d]:
    154                 raise ValueError(
--> 155                     f"conflicting sizes for dimension {d!r}: "
    156                     f"length {sizes[d]} on the data but length {s} on "
    157                     f"coordinate {k!r}"

ValueError: conflicting sizes for dimension 'time': length 972 on the data but length 4856 on coordinate 'time'