awslabs / gluonts

Probabilistic time series modeling in Python
https://ts.gluon.ai
Apache License 2.0
4.41k stars 740 forks source link

'pandas._libs.tslibs.offsets.YearBegin' object has no attribute '_period_dtype_code' #3153

Open baharian opened 3 months ago

baharian commented 3 months ago

Description

Training a CanonicalRNNEstimator model leads to the following error: AttributeError: 'pandas._libs.tslibs.offsets.YearBegin' object has no attribute '_period_dtype_code'

Weirdly enough, using the same dataset to train a MQRNNEstimator model (with its extra estimator parameters) works fine. I have made sure that the pandas DataFrame is not irregular and that all time-steps for all item_ids do have non-NULL values. The only difference (apart from the parameters specific to each estimator) is that my version of MQRNNEstimator sets the seeds, whereas I am using the vanilla CanonicalRNNEstimator in GluonTS.

To Reproduce

train_ds = PandasDataset\
                    .from_long_dataframe(
                        dataframe = train_df,
                        target = 'q',
                        item_id = 'item_id',
                        timestamp = 'timestamp',
                        freq = 'Y',
                        static_feature_columns = ['group'])

history = TrainingHistory()
estimator_params = {
    'freq': 'Y',
    'prediction_length': 1,
    'context_length': 4,
    'trainer': Trainer(epochs = 50, learning_rate = 1e-4, callbacks = [history]),
}

estimator = CanonicalRNNEstimator(**estimator_params)
predictor = estimator.train(train_ds)

Error message or code output

  0%|                                                                                                                | 0/50 [00:00<?, ?it/s]

---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
File <timed exec>:16

File ~/miniconda3/envs/gluonts_env/lib/python3.11/site-packages/gluonts/mx/model/estimator.py:239, in GluonEstimator.train(self, training_data, validation_data, shuffle_buffer_length, cache_data, **kwargs)
    231 def train(
    232     self,
    233     training_data: Dataset,
   (...)
    237     **kwargs,
    238 ) -> Predictor:
--> 239     return self.train_model(
    240         training_data=training_data,
    241         validation_data=validation_data,
    242         shuffle_buffer_length=shuffle_buffer_length,
    243         cache_data=cache_data,
    244     ).predictor

File ~/miniconda3/envs/gluonts_env/lib/python3.11/site-packages/gluonts/mx/model/estimator.py:216, in GluonEstimator.train_model(self, training_data, validation_data, from_predictor, shuffle_buffer_length, cache_data)
    213 else:
    214     copy_parameters(from_predictor.network, training_network)
--> 216 self.trainer(
    217     net=training_network,
    218     train_iter=training_data_loader,
    219     validation_iter=validation_data_loader,
    220 )
    222 with self.trainer.ctx:
    223     predictor = self.create_predictor(transformation, training_network)

File ~/miniconda3/envs/gluonts_env/lib/python3.11/site-packages/gluonts/mx/trainer/_base.py:420, in Trainer.__call__(self, net, train_iter, validation_iter)
    415 curr_lr = trainer.learning_rate
    416 logger.info(
    417     f"Epoch[{epoch_no}] Learning rate is {curr_lr}"
    418 )
--> 420 epoch_loss = loop(
    421     epoch_no,
    422     train_iter,
    423     num_batches_to_use=self.num_batches_per_epoch,
    424 )
    426 should_continue = self.callbacks.on_train_epoch_end(
    427     epoch_no=epoch_no,
    428     epoch_loss=loss_value(epoch_loss),
    429     training_network=net,
    430     trainer=trainer,
    431 )
    433 if is_validation_available:

File ~/miniconda3/envs/gluonts_env/lib/python3.11/site-packages/gluonts/mx/trainer/_base.py:275, in Trainer.__call__.<locals>.loop(epoch_no, batch_iter, num_batches_to_use, is_training)
    272 it = tqdm(batch_iter, total=num_batches_to_use)
    273 any_batches = False
--> 275 for batch_no, batch in enumerate(it, start=1):
    276     any_batches = True
    278     # `batch` here is expected to be a dictionary whose fields
    279     # should correspond 1-to-1 with the network inputs
    280     # see below how `batch.values()` is fed into the network

File ~/miniconda3/envs/gluonts_env/lib/python3.11/site-packages/tqdm/std.py:1182, in tqdm.__iter__(self)
   1179 time = self._time
   1181 try:
-> 1182     for obj in iterable:
   1183         yield obj
   1184         # Update and possibly print the progressbar.
   1185         # Note: does not call self.update(1) for speed optimisation.

File ~/miniconda3/envs/gluonts_env/lib/python3.11/site-packages/gluonts/itertools.py:415, in IterableSlice.__iter__(self)
    414 def __iter__(self):
--> 415     yield from itertools.islice(self.iterable, self.length)

File ~/miniconda3/envs/gluonts_env/lib/python3.11/site-packages/gluonts/transform/_base.py:111, in TransformedDataset.__iter__(self)
    110 def __iter__(self) -> Iterator[DataEntry]:
--> 111     yield from self.transformation(
    112         self.base_dataset, is_train=self.is_train
    113     )

File ~/miniconda3/envs/gluonts_env/lib/python3.11/site-packages/gluonts/transform/_base.py:132, in MapTransformation.__call__(self, data_it, is_train)
    129 def __call__(
    130     self, data_it: Iterable[DataEntry], is_train: bool
    131 ) -> Iterator:
--> 132     for data_entry in data_it:
    133         try:
    134             yield self.map_transform(data_entry.copy(), is_train)

File ~/miniconda3/envs/gluonts_env/lib/python3.11/site-packages/gluonts/dataset/loader.py:50, in Batch.__call__(self, data, is_train)
     49 def __call__(self, data, is_train):
---> 50     yield from batcher(data, self.batch_size)

File ~/miniconda3/envs/gluonts_env/lib/python3.11/site-packages/gluonts/itertools.py:131, in batcher.<locals>.get_batch()
    130 def get_batch():
--> 131     return list(itertools.islice(it, batch_size))

File ~/miniconda3/envs/gluonts_env/lib/python3.11/site-packages/gluonts/transform/_base.py:132, in MapTransformation.__call__(self, data_it, is_train)
    129 def __call__(
    130     self, data_it: Iterable[DataEntry], is_train: bool
    131 ) -> Iterator:
--> 132     for data_entry in data_it:
    133         try:
    134             yield self.map_transform(data_entry.copy(), is_train)

File ~/miniconda3/envs/gluonts_env/lib/python3.11/site-packages/gluonts/transform/_base.py:186, in FlatMapTransformation.__call__(self, data_it, is_train)
    182 def __call__(
    183     self, data_it: Iterable[DataEntry], is_train: bool
    184 ) -> Iterator:
    185     num_idle_transforms = 0
--> 186     for data_entry in data_it:
    187         num_idle_transforms += 1
    188         for result in self.flatmap_transform(data_entry.copy(), is_train):

File ~/miniconda3/envs/gluonts_env/lib/python3.11/site-packages/gluonts/itertools.py:87, in Cyclic.__iter__(self)
     85 at_least_one = False
     86 while True:
---> 87     for el in self.iterable:
     88         at_least_one = True
     89         yield el

File ~/miniconda3/envs/gluonts_env/lib/python3.11/site-packages/gluonts/transform/_base.py:111, in TransformedDataset.__iter__(self)
    110 def __iter__(self) -> Iterator[DataEntry]:
--> 111     yield from self.transformation(
    112         self.base_dataset, is_train=self.is_train
    113     )

File ~/miniconda3/envs/gluonts_env/lib/python3.11/site-packages/gluonts/transform/_base.py:132, in MapTransformation.__call__(self, data_it, is_train)
    129 def __call__(
    130     self, data_it: Iterable[DataEntry], is_train: bool
    131 ) -> Iterator:
--> 132     for data_entry in data_it:
    133         try:
    134             yield self.map_transform(data_entry.copy(), is_train)

File ~/miniconda3/envs/gluonts_env/lib/python3.11/site-packages/gluonts/transform/_base.py:132, in MapTransformation.__call__(self, data_it, is_train)
    129 def __call__(
    130     self, data_it: Iterable[DataEntry], is_train: bool
    131 ) -> Iterator:
--> 132     for data_entry in data_it:
    133         try:
    134             yield self.map_transform(data_entry.copy(), is_train)

    [... skipping similar frames: MapTransformation.__call__ at line 132 (1 times)]

File ~/miniconda3/envs/gluonts_env/lib/python3.11/site-packages/gluonts/transform/_base.py:132, in MapTransformation.__call__(self, data_it, is_train)
    129 def __call__(
    130     self, data_it: Iterable[DataEntry], is_train: bool
    131 ) -> Iterator:
--> 132     for data_entry in data_it:
    133         try:
    134             yield self.map_transform(data_entry.copy(), is_train)

File ~/miniconda3/envs/gluonts_env/lib/python3.11/site-packages/gluonts/dataset/pandas.py:217, in PandasDataset.__iter__(self)
    216 def __iter__(self):
--> 217     yield from self._data_entries
    218     self.unchecked = True

File ~/miniconda3/envs/gluonts_env/lib/python3.11/site-packages/gluonts/dataset/pandas.py:174, in PandasDataset._pair_to_dataentry(self, item_id, df)
    169     df.index = pd.DatetimeIndex(df[self.timestamp]).to_period(
    170         freq=self.freq
    171     )
    173 if not isinstance(df.index, pd.PeriodIndex):
--> 174     df = df.to_period(freq=self.freq)
    176 if not self.assume_sorted:
    177     df.sort_index(inplace=True)

File ~/miniconda3/envs/gluonts_env/lib/python3.11/site-packages/pandas/core/frame.py:11985, in DataFrame.to_period(self, freq, axis, copy)
  11982 if not isinstance(old_ax, DatetimeIndex):
  11983     raise TypeError(f"unsupported Type {type(old_ax).__name__}")
> 11985 new_ax = old_ax.to_period(freq=freq)
  11987 setattr(new_obj, axis_name, new_ax)
  11988 return new_obj

File ~/miniconda3/envs/gluonts_env/lib/python3.11/site-packages/pandas/core/indexes/extension.py:95, in _inherit_from_data.<locals>.method(self, *args, **kwargs)
     93 if "inplace" in kwargs:
     94     raise ValueError(f"cannot use inplace with {type(self).__name__}")
---> 95 result = attr(self._data, *args, **kwargs)
     96 if wrap:
     97     if isinstance(result, type(self._data)):

File ~/miniconda3/envs/gluonts_env/lib/python3.11/site-packages/pandas/core/arrays/datetimes.py:1224, in DatetimeArray.to_period(self, freq)
   1220         res = freq
   1222     freq = res
-> 1224 return PeriodArray._from_datetime64(self._ndarray, freq, tz=self.tz)

File ~/miniconda3/envs/gluonts_env/lib/python3.11/site-packages/pandas/core/arrays/period.py:322, in PeriodArray._from_datetime64(cls, data, freq, tz)
    307 @classmethod
    308 def _from_datetime64(cls, data, freq, tz=None) -> Self:
    309     """
    310     Construct a PeriodArray from a datetime64 array
    311 
   (...)
    320     PeriodArray[freq]
    321     """
--> 322     data, freq = dt64arr_to_periodarr(data, freq, tz)
    323     dtype = PeriodDtype(freq)
    324     return cls(data, dtype=dtype)

File ~/miniconda3/envs/gluonts_env/lib/python3.11/site-packages/pandas/core/arrays/period.py:1167, in dt64arr_to_periodarr(data, freq, tz)
   1165 reso = get_unit_from_dtype(data.dtype)
   1166 freq = Period._maybe_convert_freq(freq)
-> 1167 base = freq._period_dtype_code
   1168 return c_dt64arr_to_periodarr(data.view("i8"), base, tz, reso=reso), freq

AttributeError: 'pandas._libs.tslibs.offsets.YearBegin' object has no attribute '_period_dtype_code'

Environment