Closed Mr-Geekman closed 1 year ago
Script for old version (1.15.1):
import time
import random
import torch
import pandas as pd
import numpy as np
from loguru import logger
from etna.datasets.tsdataset import TSDataset
from etna.datasets import generate_ar_df
from etna.pipeline import Pipeline
from etna.metrics import SMAPE, MAPE, MAE
from etna.transforms import DateFlagsTransform
from etna.transforms import PytorchForecastingTransform
from etna.models.nn import TFTModel
HORIZON = 7
def generate_tsdataset(dataset_config) -> TSDataset:
periods, n_segments, regressors, exogs, horizon = (
dataset_config["periods"],
dataset_config["n_segments"],
dataset_config["regressors"],
dataset_config["exogs"],
dataset_config["horizon"],
)
df = generate_ar_df(
periods=periods,
start_time="2021-06-01",
n_segments=n_segments,
freq="D",
)
df_exog = None
if exogs:
df_exog = generate_ar_df(
periods=periods + horizon,
start_time="2021-06-01",
n_segments=n_segments,
freq="D",
)
df_exog = df_exog.rename(columns={"target": "exog"})
if regressors:
df_regressors = generate_ar_df(
periods=periods + horizon,
start_time="2021-06-01",
n_segments=n_segments,
freq="D",
)
df_regressors = df_regressors.rename(columns={"target": "regressor"})
df_exog = pd.concat((df_exog, df_regressors[["regressor"]]), axis=1)
df_exog = TSDataset.to_dataset(df_exog)
df = TSDataset.to_dataset(df)
ts = TSDataset(
df=df,
freq="D",
df_exog=df_exog,
known_future=["regressor"] if regressors else (),
)
return ts
def set_seed(seed: int = 42):
"""Set random seed for reproducibility."""
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
def main():
set_seed()
# original_df = pd.read_csv("examples/data/example_dataset.csv")
# df = TSDataset.to_dataset(original_df)
# ts = TSDataset(df, freq="D")
data_config = {
"n_segments": 100,
"periods": 500,
"exogs": True,
"regressors": True,
"horizon": 14,
}
ts = generate_tsdataset(data_config)
set_seed()
dft = DateFlagsTransform(day_number_in_week=True, day_number_in_month=False, out_column="regressor_dateflag")
pft = PytorchForecastingTransform(
max_encoder_length=21,
min_encoder_length=21,
max_prediction_length=HORIZON,
time_varying_known_reals=["time_idx"],
time_varying_known_categoricals=["regressor_dateflag_day_number_in_week"],
time_varying_unknown_reals=["target"],
static_categoricals=["segment"],
target_normalizer=None,
)
model_tft = TFTModel(trainer_kwargs=dict(max_epochs=1))
transforms = [dft, pft]
pipeline_tft = Pipeline(model=model_tft, transforms=transforms, horizon=HORIZON)
start_time = time.perf_counter()
metrics_tft, forecast_tft, fold_info_tft = pipeline_tft.backtest(
ts, metrics=[SMAPE(), MAPE(), MAE()], n_folds=3, n_jobs=1
)
run_time = time.perf_counter() - start_time
logger.info(f"Run time: {run_time:.3f}")
logger.info(f"Metrics: {metrics_tft['MAE'].mean():.3f}")
if __name__ == "__main__":
main()
Results:
Script for new version:
import time
import random
import torch
import pandas as pd
import numpy as np
from loguru import logger
from etna.datasets.tsdataset import TSDataset
from etna.datasets import generate_ar_df
from etna.pipeline import Pipeline
from etna.metrics import SMAPE, MAPE, MAE
from etna.transforms import DateFlagsTransform
from etna.models.nn.utils import PytorchForecastingDatasetBuilder
from etna.models.nn import TFTModel
HORIZON = 7
def generate_tsdataset(dataset_config) -> TSDataset:
periods, n_segments, regressors, exogs, horizon = (
dataset_config["periods"],
dataset_config["n_segments"],
dataset_config["regressors"],
dataset_config["exogs"],
dataset_config["horizon"],
)
df = generate_ar_df(
periods=periods,
start_time="2021-06-01",
n_segments=n_segments,
freq="D",
)
df_exog = None
if exogs:
df_exog = generate_ar_df(
periods=periods + horizon,
start_time="2021-06-01",
n_segments=n_segments,
freq="D",
)
df_exog = df_exog.rename(columns={"target": "exog"})
if regressors:
df_regressors = generate_ar_df(
periods=periods + horizon,
start_time="2021-06-01",
n_segments=n_segments,
freq="D",
)
df_regressors = df_regressors.rename(columns={"target": "regressor"})
df_exog = pd.concat((df_exog, df_regressors[["regressor"]]), axis=1)
df_exog = TSDataset.to_dataset(df_exog)
df = TSDataset.to_dataset(df)
ts = TSDataset(
df=df,
freq="D",
df_exog=df_exog,
known_future=["regressor"] if regressors else (),
)
return ts
def set_seed(seed: int = 42):
"""Set random seed for reproducibility."""
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
def main():
set_seed()
# original_df = pd.read_csv("examples/data/example_dataset.csv")
# df = TSDataset.to_dataset(original_df)
# ts = TSDataset(df, freq="D")
data_config = {
"n_segments": 100,
"periods": 500,
"exogs": True,
"regressors": True,
"horizon": 14,
}
ts = generate_tsdataset(data_config)
set_seed()
dft = DateFlagsTransform(day_number_in_week=True, day_number_in_month=False, out_column="regressor_dateflag")
model_tft = TFTModel(
dataset_builder=PytorchForecastingDatasetBuilder(
max_encoder_length=21,
min_encoder_length=21,
max_prediction_length=HORIZON,
time_varying_known_reals=["time_idx"],
time_varying_known_categoricals=["regressor_dateflag_day_number_in_week"],
time_varying_unknown_reals=["target"],
static_categoricals=["segment"],
target_normalizer=None,
),
trainer_params=dict(max_epochs=1),
)
transforms = [dft]
pipeline_tft = Pipeline(model=model_tft, transforms=transforms, horizon=HORIZON)
start_time = time.perf_counter()
metrics_tft, forecast_tft, fold_info_tft = pipeline_tft.backtest(
ts, metrics=[SMAPE(), MAPE(), MAE()], n_folds=3, n_jobs=1
)
run_time = time.perf_counter() - start_time
logger.info(f"Run time: {run_time:.3f}")
logger.info(f"Metrics: {metrics_tft['MAE'].mean():.3f}")
if __name__ == "__main__":
main()
Results:
Merging #1322 (0f994a0) into master (75e8fc1) will increase coverage by
0.14%
. The diff coverage is100.00%
.
:exclamation: Your organization is not using the GitHub App Integration. As a result you may experience degraded service beginning May 15th. Please install the Github App Integration for your organization. Read more.
@@ Coverage Diff @@
## master #1322 +/- ##
==========================================
+ Coverage 88.95% 89.09% +0.14%
==========================================
Files 193 204 +11
Lines 12319 12638 +319
==========================================
+ Hits 10958 11260 +302
- Misses 1361 1378 +17
Impacted Files | Coverage Δ | |
---|---|---|
etna/models/nn/utils.py | 85.61% <100.00%> (+0.40%) |
:arrow_up: |
... and 11 files with indirect coverage changes
:mega: We’re building smart automated test selection to slash your CI/CD build times. Learn more
🚀 Deployed on https://deploy-preview-1322--etna-docs.netlify.app
Before submitting (must do checklist)
Proposed Changes
Optmize creation of
time_idx
feature.Closing issues