awslabs / gluonts

Probabilistic time series modeling in Python
https://ts.gluon.ai
Apache License 2.0
4.58k stars 750 forks source link

TreePredictor not compatible with real dynamic features #2221

Open axeng opened 2 years ago

axeng commented 2 years ago

Description

When using a TreePredictor estimator with a dataset containing real dynamic features, the training crashes.

To Reproduce

import pandas as pd

from gluonts.model.rotbaum import TreeEstimator
from gluonts.dataset.pandas import PandasDataset

# Generate dataset (see: https://ts.gluon.ai/stable/tutorials/data_manipulation/pandasdataframes.html#Include-static-and-dynamic-features)
def generate_single_ts(date_range, item_id=None) -> pd.DataFrame:
    """create sum of `n_f` sin/cos curves with random scale and phase."""
    n_f = 2
    period = np.array([24 / (i + 1) for i in range(n_f)]).reshape(1, n_f)
    scale = np.random.normal(1, 0.3, size=(1, n_f))
    phase = 2 * np.pi * np.random.uniform(size=(1, n_f))
    periodic_f = lambda x: scale * np.sin(np.pi * x / period + phase)

    t = np.arange(0, len(date_range)).reshape(-1, 1)
    target = periodic_f(t).sum(axis=1) + np.random.normal(0, 0.1, size=len(t))
    ts = pd.DataFrame({"target": target}, index=date_range)
    if item_id is not None:
        ts["item_id"] = item_id
    return ts

def generate_single_ts_with_features(date_range, item_id) -> pd.DataFrame:
    ts = generate_single_ts(date_range, item_id)
    T = ts.shape[0]

    ts["dynamic_real_1"] = np.random.normal(size=T)

    return ts

ts = generate_single_ts_with_features(pd.date_range(start="2000-01-01", freq="D", periods=50), item_id=0)
ts_dataset = PandasDataset(
    ts,
    feat_dynamic_real=["dynamic_real_1"],
)

estimator = TreeEstimator(
    freq=ts_dataset.freq,
    prediction_length=10,
    use_feat_dynamic_cat=False,
    use_feat_dynamic_real=True,
    use_feat_static_real=False,
    method="QRX",
)

predictor = estimator.train(ts_dataset)

Error message or code output

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
----> 1 predictor = estimator.train(ts_dataset)

~/.venv/lib/python3.7/site-packages/gluonts/model/rotbaum/_estimator.py in train(self, training_data, validation_dataset)
     40         self, training_data: Dataset, validation_dataset=None
     41     ) -> Predictor:
---> 42         return self.predictor.train(training_data)
     43 
     44 

~/.venv/lib/python3.7/site-packages/gluonts/model/rotbaum/_predictor.py in train(self, training_data, train_QRX_only_using_timestep)
    194             assert self.freq == next(iter(training_data))["start"].freq
    195         self.preprocess_object.preprocess_from_list(
--> 196             ts_list=list(training_data), change_internal_variables=True
    197         )
    198         feature_data, target_data = (

~/.venv/lib/python3.7/site-packages/gluonts/model/rotbaum/_preprocess.py in preprocess_from_list(self, ts_list, change_internal_variables)
    230         for time_series in ts_list:
    231             ts_feature_data, ts_target_data = self.preprocess_from_single_ts(
--> 232                 time_series=time_series
    233             )
    234             feature_data += list(ts_feature_data)

~/.venv/lib/python3.7/site-packages/gluonts/model/rotbaum/_preprocess.py in preprocess_from_single_ts(self, time_series)
    184             else:
    185                 featurized_data = self.make_features(
--> 186                     altered_time_series, starting_index
    187                 )
    188                 feature_data.append(featurized_data)

~/.venv/lib/python3.7/site-packages/gluonts/model/rotbaum/_preprocess.py in make_features(self, time_series, starting_index)
    458                 )
    459             )
--> 460             if self.use_feat_dynamic_real
    461             else []
    462         )

~/.venv/lib/python3.7/site-packages/gluonts/model/rotbaum/_preprocess.py in <listcomp>(.0)
    453                                 ]
    454                             )
--> 455                             for ts in time_series["feat_dynamic_real"]
    456                         ]
    457                     ]

TypeError: _pre_transform() missing 2 required positional arguments: 'subtract_mean' and 'count_nans'

Environment

lostella commented 2 years ago

@zoolhasson could you take a look? It looks like _pre_transform is missing some arguments in the calls here and here, but I'm not sure what the behavior is supposed to be.