jdb78 / pytorch-forecasting

Time series forecasting with PyTorch
https://pytorch-forecasting.readthedocs.io/
MIT License
3.84k stars 608 forks source link

DeepAR.from_dataset: "AssertionError: target has to be real" #601

Open calyptis opened 3 years ago

calyptis commented 3 years ago

Expected behavior

I want to initialise a DeepAR model instance from a TimeSeriesDataSet object, like so DeepAR.from_dataset(). For this, I am following closely this example from the official list of tutorials, with minor changes to the actual artificial dataset and using DeepAR instead of NBeats.

Actual behavior

However, the initialisation fails as it raises an AssertionError telling me that the "target value has to be real". Below is the code snippet to reproduce the error.

Code to reproduce the problem

import pandas as pd
from pytorch_forecasting import TimeSeriesDataSet, DeepAR
from pytorch_forecasting.data.examples import generate_ar_data
from pytorch_forecasting.data import NaNLabelEncoder

# Generate data
data = generate_ar_data(seasonality=10.0, timesteps=400, n_series=100, seed=42)
# Ensure that categoricals are not of numeric type
data["series"] = data.series.astype(str).astype("category")
# Adding one covariate to the data just to use more parameters when constructing TimeSeriesDataSet
data["quarter"] = (
    (pd.Timestamp("2020-01-01") + pd.to_timedelta(data.time_idx, "D"))
    .dt.quarter.astype(str).astype("category")
)
# In this example, targets should always be positive
data["value"] = data.value.abs()

data.info()
>>> output
        <class 'pandas.core.frame.DataFrame'>
        RangeIndex: 40000 entries, 0 to 39999
        Data columns (total 4 columns):
         #   Column    Non-Null Count  Dtype   
        ---  ------    --------------  -----   
         0   series    40000 non-null  category
         1   time_idx  40000 non-null  int64   
         2   value     40000 non-null  float64 
         3   quarter   40000 non-null  category
        dtypes: category(2), float64(1), int64(1)
        memory usage: 708.3 KB

# Setting parameters
max_prediction_length = 20  # Period of forecasting
max_encoder_length = 60  # Period to look back in time
training_cutoff = data["time_idx"].max() - max_prediction_length  # Last observation in training set

# Defining TimeSeriesDataSet
training = TimeSeriesDataSet(
    data[lambda x: x.time_idx <= training_cutoff],
    time_idx="time_idx",
    target="value",
    group_ids=["series"],
    max_encoder_length=max_encoder_length,
    max_prediction_length=max_prediction_length,
    static_categoricals=["series"],
    categorical_encoders={"series": NaNLabelEncoder().fit(data.series)},
    time_varying_known_reals=["time_idx"],
    time_varying_known_categoricals=["quarter"],
    add_relative_time_idx=True,
    target_normalizer=None  # Just to make sure the error is not due to a certain chosen normalizer
)

# Initialise DeepAR by keeping default parameters for brevity of the example
model = DeepAR.from_dataset(
    training
)
>>> output
---------------------------------------------------------------------------
AssertionError                            Traceback (most recent call last)
<ipython-input-15-817bdbd1bf02> in <module>
----> 1 model = DeepAR.from_dataset(
      2     training
      3 )

~/opt/anaconda3/envs/pytorch/lib/python3.9/site-packages/pytorch_forecasting/models/deepar/__init__.py in from_dataset(cls, dataset, allowed_encoder_known_variable_names, **kwargs)
    185             or all([not isinstance(normalizer, NaNLabelEncoder) for normalizer in dataset.target_normalizer])
    186         ), "target(s) should be continuous - categorical targets are not supported"  # todo: remove this restriction
--> 187         return super().from_dataset(
    188             dataset, allowed_encoder_known_variable_names=allowed_encoder_known_variable_names, **new_kwargs
    189         )

~/opt/anaconda3/envs/pytorch/lib/python3.9/site-packages/pytorch_forecasting/models/base_model.py in from_dataset(cls, dataset, allowed_encoder_known_variable_names, **kwargs)
   1353         )
   1354         new_kwargs.update(kwargs)
-> 1355         return super().from_dataset(dataset, **new_kwargs)
   1356 
   1357     def calculate_prediction_actual_by_variable(

~/opt/anaconda3/envs/pytorch/lib/python3.9/site-packages/pytorch_forecasting/models/base_model.py in from_dataset(cls, dataset, **kwargs)
   1635 
   1636         kwargs.setdefault("target_lags", {name: dataset._get_lagged_names(name) for name in lags})
-> 1637         return super().from_dataset(dataset, **kwargs)
   1638 
   1639     def output_to_prediction(

~/opt/anaconda3/envs/pytorch/lib/python3.9/site-packages/pytorch_forecasting/models/base_model.py in from_dataset(cls, dataset, **kwargs)
    905         if "output_transformer" not in kwargs:
    906             kwargs["output_transformer"] = dataset.target_normalizer
--> 907         net = cls(**kwargs)
    908         net.dataset_parameters = dataset.get_parameters()
    909         if dataset.multi_target:

~/opt/anaconda3/envs/pytorch/lib/python3.9/site-packages/pytorch_forecasting/models/deepar/__init__.py in __init__(self, cell_type, hidden_size, rnn_layers, dropout, static_categoricals, static_reals, time_varying_categoricals_encoder, time_varying_categoricals_decoder, categorical_groups, time_varying_reals_encoder, time_varying_reals_decoder, embedding_sizes, embedding_paddings, embedding_labels, x_reals, x_categoricals, n_validation_samples, n_plotting_samples, target, target_lags, loss, logging_metrics, **kwargs)
    132         ), "Encoder and decoder variables have to be the same apart from target variable"
    133         for targeti in to_list(target):
--> 134             assert (
    135                 targeti in time_varying_reals_encoder
    136             ), f"target {targeti} has to be real"  # todo: remove this restriction

AssertionError: target value has to be real

The column value, which is the target, is clearly a float and I am not using any transformation or normalisation. Hence, I don't really know why this error is thrown. Any support on this would be much appreciated.

jdb78 commented 3 years ago

This is a pretty misleading error message. You target has to be in the time_varying_unknown_reals. Otherwise an autoregressive model makes little sense.