awslabs / gluonts

Probabilistic time series modeling in Python
https://ts.gluon.ai
Apache License 2.0
4.57k stars 749 forks source link

stable documentation SimpleFeedForwardEstimator example does not work #3103

Closed 300LiterPropofol closed 7 months ago

300LiterPropofol commented 8 months ago

Description

The stable documentation states a SimpleFeedForwardEstimator as an example, (https://ts.gluon.ai/stable/tutorials/forecasting/extended_tutorial.html#Configuring-an-estimator) However the example can not be executed, python reports an error during execution.

To Reproduce

  1. Create a completely new environment without installing anything in Anaconda Prompt Administrator mode, conda create --name mxnet_pure python=3.7
  2. only install gluonts with mxnet version inside. pip install "gluonts[mxnet]"
  3. copy paste the example code in above link, error out

python, the same code as in the documentation, but since mxnet cannot work with jupyter notebook as reported here (https://github.com/awslabs/gluonts/issues/3102), the example code is copied inside a .py file and executed from Anaconda Prompt

from gluonts.dataset.field_names import FieldName
from gluonts.dataset.common import ListDataset
from gluonts.dataset.util import to_pandas
# import matplotlib.pyplot as plt
from pprint import pprint
import numpy as np
import pandas as pd

field_names = [
    f"FieldName.{k} = '{v}'"
    for k, v in FieldName.__dict__.items()
    if not k.startswith("_")
]

"""
pprint(field_names)
["FieldName.ITEM_ID = 'item_id'",
 "FieldName.INFO = 'info'",
 "FieldName.START = 'start'",
 "FieldName.TARGET = 'target'",
 "FieldName.FEAT_STATIC_CAT = 'feat_static_cat'",
 "FieldName.FEAT_STATIC_REAL = 'feat_static_real'",
 "FieldName.FEAT_DYNAMIC_CAT = 'feat_dynamic_cat'",
 "FieldName.FEAT_DYNAMIC_REAL = 'feat_dynamic_real'",
 "FieldName.PAST_FEAT_DYNAMIC_CAT = 'past_feat_dynamic_cat'",
 "FieldName.PAST_FEAT_DYNAMIC_REAL = 'past_feat_dynamic_real'",
 "FieldName.FEAT_DYNAMIC_REAL_LEGACY = 'dynamic_feat'",
 "FieldName.FEAT_DYNAMIC = 'feat_dynamic'",
 "FieldName.PAST_FEAT_DYNAMIC = 'past_feat_dynamic'",
 "FieldName.FEAT_TIME = 'time_feat'",
 "FieldName.FEAT_CONST = 'feat_dynamic_const'",
 "FieldName.FEAT_AGE = 'feat_dynamic_age'",
 "FieldName.OBSERVED_VALUES = 'observed_values'",
 "FieldName.IS_PAD = 'is_pad'",
 "FieldName.FORECAST_START = 'forecast_start'",
 "FieldName.TARGET_DIM_INDICATOR = 'target_dimension_indicator'"]
"""

def create_dataset(num_series, num_steps, period=24, mu=1, sigma=0.3):
    # create target: noise + pattern
    # noise
    noise = np.random.normal(mu, sigma, size=(num_series, num_steps))

    # pattern - sinusoid with different phase
    sin_minusPi_Pi = np.sin(
        np.tile(np.linspace(-np.pi, np.pi, period), int(num_steps / period))
    )
    sin_Zero_2Pi = np.sin(
        np.tile(np.linspace(0, 2 * np.pi, 24), int(num_steps / period))
    )

    pattern = np.concatenate(
        (
            np.tile(sin_minusPi_Pi.reshape(1, -1), (int(np.ceil(num_series / 2)), 1)),
            np.tile(sin_Zero_2Pi.reshape(1, -1), (int(np.floor(num_series / 2)), 1)),
        ),
        axis=0,
    )

    target = noise + pattern

    # create time features: use target one period earlier, append with zeros
    feat_dynamic_real = np.concatenate(
        (np.zeros((num_series, period)), target[:, :-period]), axis=1
    )

    # create categorical static feats: use the sinusoid type as a categorical feature
    feat_static_cat = np.concatenate(
        (
            np.zeros(int(np.ceil(num_series / 2))),
            np.ones(int(np.floor(num_series / 2))),
        ),
        axis=0,
    )

    return target, feat_dynamic_real, feat_static_cat

# define the parameters of the dataset
custom_ds_metadata = {
    "num_series": 100,
    "num_steps": 24 * 7,
    "prediction_length": 24,
    "freq": "1H",
    "start": [pd.Period("01-01-2019", freq="1H") for _ in range(100)],
}

data_out = create_dataset(
    custom_ds_metadata["num_series"],
    custom_ds_metadata["num_steps"],
    custom_ds_metadata["prediction_length"],
)

target, feat_dynamic_real, feat_static_cat = data_out

"""
print(target)
print(feat_dynamic_real)
print(feat_static_cat)

[[1.32015981 0.7612632  0.74272472 ... 1.45697607 1.40383953 1.43911399]
 [1.10845057 0.8962927  0.7400588  ... 1.91216216 0.95436735 1.31003036]
 [1.29672658 0.99477955 0.36300113 ... 1.71635751 1.18727139 0.62563487]
 ...
 [1.22932944 1.38499324 1.3858285  ... 0.74250023 0.31924427 1.18233007]
 [1.31808399 1.43191599 1.67850099 ... 1.11843641 0.95446815 0.85568816]
 [1.31388029 1.07654924 1.9318684  ... 0.33150953 0.47270366 1.37254985]]
[[0.         0.         0.         ... 1.75788996 1.06494913 0.73647041]
 [0.         0.         0.         ... 1.52808105 1.52726468 1.27582639]
 [0.         0.         0.         ... 1.45110741 1.36306829 1.10782492]
 ...
 [0.         0.         0.         ... 0.94085157 0.74588496 1.38034281]
 [0.         0.         0.         ... 0.34828403 0.62805565 1.56957228]
 [0.         0.         0.         ... 0.68219308 1.31306638 0.84959754]]
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1.]
"""
train_ds = ListDataset(
    [
        {
            FieldName.TARGET: target,
            FieldName.START: start,
            FieldName.FEAT_DYNAMIC_REAL: [fdr],
            FieldName.FEAT_STATIC_CAT: [fsc],
        }
        for (target, start, fdr, fsc) in zip(
            target[:, : -custom_ds_metadata["prediction_length"]],
            custom_ds_metadata["start"],
            feat_dynamic_real[:, : -custom_ds_metadata["prediction_length"]],
            feat_static_cat,
        )
    ],
    freq=custom_ds_metadata["freq"],
)

test_ds = ListDataset(
    [
        {
            FieldName.TARGET: target,
            FieldName.START: start,
            FieldName.FEAT_DYNAMIC_REAL: [fdr],
            FieldName.FEAT_STATIC_CAT: [fsc],
        }
        for (target, start, fdr, fsc) in zip(
            target, custom_ds_metadata["start"], feat_dynamic_real, feat_static_cat
        )
    ],
    freq=custom_ds_metadata["freq"],
)

train_entry = next(iter(train_ds))
# print(train_entry.keys())

test_entry = next(iter(test_ds))
# print(test_entry.keys())

# test_series = to_pandas(test_entry)
# train_series = to_pandas(train_entry)

# fig, ax = plt.subplots(2, 1, sharex=True, sharey=True, figsize=(10, 7))

# train_series.plot(ax=ax[0])
# ax[0].grid(which="both")
# ax[0].legend(["train series"], loc="upper left")

# test_series.plot(ax=ax[1])
# ax[1].axvline(train_series.index[-1], color="r")  # end of train dataset
# ax[1].grid(which="both")
# ax[1].legend(["test series", "end of train series"], loc="upper left")

# plt.show()

from gluonts.transform import (
    AddAgeFeature, 
    AddObservedValuesIndicator,
    Chain,
    ExpectedNumInstanceSampler,
    InstanceSplitter,
    SetFieldIfNotPresent,
)

def create_transformation(freq, context_length, prediction_length):
    return Chain(
        [
            AddObservedValuesIndicator(
                target_field=FieldName.TARGET,
                output_field=FieldName.OBSERVED_VALUES,
            ),
            AddAgeFeature(
                target_field=FieldName.TARGET,
                output_field=FieldName.FEAT_AGE,
                pred_length=prediction_length,
                log_scale=True,
            ),
            InstanceSplitter(
                target_field=FieldName.TARGET,
                is_pad_field=FieldName.IS_PAD,
                start_field=FieldName.START,
                forecast_start_field=FieldName.FORECAST_START,
                instance_sampler=ExpectedNumInstanceSampler(
                    num_instances=1,
                    min_future=prediction_length,
                ),
                past_length=context_length,
                future_length=prediction_length,
                time_series_fields=[
                    FieldName.FEAT_AGE,
                    FieldName.FEAT_DYNAMIC_REAL,
                    FieldName.OBSERVED_VALUES,
                ],
            ),
        ]
    )

transformation = create_transformation(
    custom_ds_metadata["freq"],
    2 * custom_ds_metadata["prediction_length"],  # can be any appropriate value
    custom_ds_metadata["prediction_length"],
)

train_tf = transformation(iter(train_ds), is_train=True)
train_tf_entry = next(iter(train_tf))
print(train_tf_entry.keys())
test_tf = transformation(iter(test_ds), is_train=False)
test_entry = next(iter(test_tf))
print(test_entry.keys())

from gluonts.mx import SimpleFeedForwardEstimator, Trainer
estimator = SimpleFeedForwardEstimator(
    num_hidden_dimensions=[10],
    prediction_length=custom_ds_metadata["prediction_length"],
    context_length=2 * custom_ds_metadata["prediction_length"],
    trainer=Trainer(
        ctx="cpu",
        epochs=5,
        learning_rate=1e-3,
        hybridize=False,
        num_batches_per_epoch=100,
    ),
)
predictor = estimator.train(train_ds)

Error message or code output

  0%|                                                                                               | 0/100 [00:00<?, ?it/s]
Traceback (most recent call last):
  File "custom_features.py", line 243, in <module>
    predictor = estimator.train(train_ds)
  File "C:\ProgramData\anaconda3\envs\mxnet_pure\lib\site-packages\gluonts\mx\model\estimator.py", line 243, in train
    cache_data=cache_data,
  File "C:\ProgramData\anaconda3\envs\mxnet_pure\lib\site-packages\gluonts\mx\model\estimator.py", line 219, in train_model
    validation_iter=validation_data_loader,
  File "C:\ProgramData\anaconda3\envs\mxnet_pure\lib\site-packages\gluonts\mx\trainer\_base.py", line 423, in __call__
    num_batches_to_use=self.num_batches_per_epoch,
  File "C:\ProgramData\anaconda3\envs\mxnet_pure\lib\site-packages\gluonts\mx\trainer\_base.py", line 275, in loop
    for batch_no, batch in enumerate(it, start=1):
  File "C:\ProgramData\anaconda3\envs\mxnet_pure\lib\site-packages\tqdm\std.py", line 1182, in __iter__
    for obj in iterable:
  File "C:\ProgramData\anaconda3\envs\mxnet_pure\lib\site-packages\gluonts\itertools.py", line 420, in __iter__
    yield from itertools.islice(self.iterable, self.length)
  File "C:\ProgramData\anaconda3\envs\mxnet_pure\lib\site-packages\gluonts\transform\_base.py", line 112, in __iter__
    self.base_dataset, is_train=self.is_train
  File "C:\ProgramData\anaconda3\envs\mxnet_pure\lib\site-packages\gluonts\transform\_base.py", line 132, in __call__
    for data_entry in data_it:
  File "C:\ProgramData\anaconda3\envs\mxnet_pure\lib\site-packages\gluonts\dataset\loader.py", line 50, in __call__
    yield from batcher(data, self.batch_size)
  File "C:\ProgramData\anaconda3\envs\mxnet_pure\lib\site-packages\gluonts\itertools.py", line 131, in get_batch
    return list(itertools.islice(it, batch_size))
  File "C:\ProgramData\anaconda3\envs\mxnet_pure\lib\site-packages\gluonts\transform\_base.py", line 132, in __call__
    for data_entry in data_it:
  File "C:\ProgramData\anaconda3\envs\mxnet_pure\lib\site-packages\gluonts\transform\_base.py", line 188, in __call__
    for result in self.flatmap_transform(data_entry.copy(), is_train):
  File "C:\ProgramData\anaconda3\envs\mxnet_pure\lib\site-packages\gluonts\transform\split.py", line 161, in flatmap_transform
    yield self._split_instance(entry, idx)
  File "C:\ProgramData\anaconda3\envs\mxnet_pure\lib\site-packages\gluonts\transform\split.py", line 134, in _split_instance
    past_piece, future_piece = self._split_array(entry[ts_field], idx)
  File "C:\ProgramData\anaconda3\envs\mxnet_pure\lib\site-packages\gluonts\transform\split.py", line 118, in _split_array
    value=self.dummy_value,
  File "C:\ProgramData\anaconda3\envs\mxnet_pure\lib\site-packages\gluonts\zebras\_util.py", line 50, in pad_axis
    return np.pad(a, pad_width, constant_values=value)
TypeError: pad() missing 1 required positional argument: 'mode'

Environment

The environment only has the below packages installed

Package            Version
------------------ ------------
annotated-types    0.5.0
certifi            2022.12.7
chardet            3.0.4
colorama           0.4.6
gluonts            0.14.3
graphviz           0.8.4
idna               2.6
importlib-metadata 6.7.0
mxnet              1.7.0.post2
numpy              1.16.6
orjson             3.9.7
pandas             1.2.5
pip                22.3.1
pydantic           2.5.3
pydantic_core      2.14.6
python-dateutil    2.8.2
pytz               2023.3.post1
requests           2.18.4
setuptools         65.6.3
six                1.16.0
toolz              0.12.0
tqdm               4.66.1
typing_extensions  4.7.1
urllib3            1.22
wheel              0.38.4
wincertstore       0.2
zipp               3.15.0
300LiterPropofol commented 8 months ago

TypeError: pad() missing 1 required positional argument: 'mode' seems to be a persisting issue across different examples, the same environment cannot run the anomaly_detection.py example in /example folder either,

C:\ProgramData\anaconda3\envs\mxnet_pure\lib\site-packages\gluonts\dataset\jsonl.py:74: FutureWarning: Conversion of the second argument of issubdtype from `int` to `np.signedinteger` is deprecated. In future, it will be treated as `np.int32 == np.dtype(int).type`.
  if np.issubdtype(arg.dtype, int):
  0%|                                                                                          | 0/100 [00:00<?, ?it/s]
Traceback (most recent call last):
  File "anomaly_detection.py", line 48, in <module>
    train_output = estimator.train_model(dataset.train)
  File "C:\ProgramData\anaconda3\envs\mxnet_pure\lib\site-packages\gluonts\mx\model\estimator.py", line 219, in train_model
    validation_iter=validation_data_loader,
  File "C:\ProgramData\anaconda3\envs\mxnet_pure\lib\site-packages\gluonts\mx\trainer\_base.py", line 423, in __call__
    num_batches_to_use=self.num_batches_per_epoch,
  File "C:\ProgramData\anaconda3\envs\mxnet_pure\lib\site-packages\gluonts\mx\trainer\_base.py", line 275, in loop
    for batch_no, batch in enumerate(it, start=1):
  File "C:\ProgramData\anaconda3\envs\mxnet_pure\lib\site-packages\tqdm\std.py", line 1182, in __iter__
    for obj in iterable:
  File "C:\ProgramData\anaconda3\envs\mxnet_pure\lib\site-packages\gluonts\itertools.py", line 420, in __iter__
    yield from itertools.islice(self.iterable, self.length)
  File "C:\ProgramData\anaconda3\envs\mxnet_pure\lib\site-packages\gluonts\transform\_base.py", line 112, in __iter__
    self.base_dataset, is_train=self.is_train
  File "C:\ProgramData\anaconda3\envs\mxnet_pure\lib\site-packages\gluonts\transform\_base.py", line 132, in __call__
    for data_entry in data_it:
  File "C:\ProgramData\anaconda3\envs\mxnet_pure\lib\site-packages\gluonts\dataset\loader.py", line 50, in __call__
    yield from batcher(data, self.batch_size)
  File "C:\ProgramData\anaconda3\envs\mxnet_pure\lib\site-packages\gluonts\itertools.py", line 131, in get_batch
    return list(itertools.islice(it, batch_size))
  File "C:\ProgramData\anaconda3\envs\mxnet_pure\lib\site-packages\gluonts\transform\_base.py", line 132, in __call__
    for data_entry in data_it:
  File "C:\ProgramData\anaconda3\envs\mxnet_pure\lib\site-packages\gluonts\transform\_base.py", line 188, in __call__
    for result in self.flatmap_transform(data_entry.copy(), is_train):
  File "C:\ProgramData\anaconda3\envs\mxnet_pure\lib\site-packages\gluonts\transform\split.py", line 161, in flatmap_transform
    yield self._split_instance(entry, idx)
  File "C:\ProgramData\anaconda3\envs\mxnet_pure\lib\site-packages\gluonts\transform\split.py", line 134, in _split_instance
    past_piece, future_piece = self._split_array(entry[ts_field], idx)
  File "C:\ProgramData\anaconda3\envs\mxnet_pure\lib\site-packages\gluonts\transform\split.py", line 118, in _split_array
    value=self.dummy_value,
  File "C:\ProgramData\anaconda3\envs\mxnet_pure\lib\site-packages\gluonts\zebras\_util.py", line 50, in pad_axis
    return np.pad(a, pad_width, constant_values=value)
TypeError: pad() missing 1 required positional argument: 'mode'
300LiterPropofol commented 8 months ago

Anaconda seems to not work, I managed to bypass this by using docker. This is my requirements.txt

numpy==1.23.5
matplotlib==3.8.2
gluonts==0.14.3
mxnet==1.6.0
orjson==3.9.7

my Dockerfile

# Use an official Python runtime as a parent image
FROM python:3.9.18-bullseye

# Update and install required dependencies
RUN apt-get update && \
    apt-get upgrade -y && \
    apt-get install -y bash libstdc++6 libgomp1 build-essential

# Install Fortran compiler
RUN apt-get install -y gfortran

# Install musl
RUN apt-get install -y musl

# Set the working directory to /app
WORKDIR /app

# Install any needed packages specified in requirements.txt
COPY requirements.txt ./
RUN pip install --no-cache-dir -r requirements.txt

# Copy the current directory contents into the container at /app
COPY . .

# Make port 3000 available to the world outside this container
EXPOSE 3000

# Run app.py when the container launches
CMD ["python3", "app.py"]

my docker-compose.yml

version: '3'
services:
  app:
    container_name: app
    build:
      context: .
    ports:
      - "3000:3000"
    command: [ "python3", "app.py" ]
    networks:
      - data-stack
    volumes:
      - .:/app/

networks:
  # External network
  data-stack:
    external: true

since docker does not go well with interactive GUI plotting, I also changed all plt.show() into

# Save the figure to a file
plt.savefig("plot2.png")
plt.close()

Now I can run the example!

For anyone who has same issue, try the exact version above in docker