The width of prediction Intervals is narrow but it cannot cover the observed values well.

Hi, I'm using deepar for my dataset. But the results I got are rather frustrating. I'm confused why the width of the prediction intervals are so narrow, and it can not cover the observed values well. Increasing the training epochs doesn't bring a better result. 3(with scaling)

I attach my code, dataset for reproduction.

import mxnet as mx
from mxnet import gluon

from gluonts.dataset.common import FileDataset, TrainDatasets, load_datasets

from gluonts.distribution import PoissonOutput, NegativeBinomialOutput, StudentTOutput

from gluonts.model.simple_feedforward import SimpleFeedForwardEstimator
from gluonts.mx.trainer import Trainer
from gluonts.model import deepar

from gluonts.evaluation.backtest import make_evaluation_predictions

from gluonts.evaluation import Evaluator

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import json

train_data = FileDataset(r'E:/1.13/train', freq="H")
test_data = FileDataset(r'E:/1.13/test', freq="H")

# trainer=Trainer(ctx="cpu", epochs=200,batch_size=32, num_batches_per_epoch=50,learning_rate=1e-3)
trainer=Trainer(ctx="cpu", epochs=10, num_batches_per_epoch=100,learning_rate=1e-3)

estimator = deepar.DeepAREstimator(
    num_layers=2,     #number of RNN layers(default:2)
    num_cells=40,     #number of RNN cells for each layer(default:40)
    prediction_length=12,
    context_length=12,
    scaling=True,
    # distr_output=StudentTOutput(),  
    use_feat_dynamic_real=False, #Whether to use the feat_dynamic_real field from the data(default:False)
    cell_type="lstm",  #可选lstm or gru
    dropout_rate=0.1, #Dropout regularization parameter(default:0.1)   
    # num_parallel_samples=20, #Number of evaluation samples per time series to incerase parallelism during inference.
    #This is a model optimization that does not affect the accuracy(default:100)
    freq="H",# Frequency of the data to train on and predict.
    trainer=trainer

)
predictor = estimator.train(training_data=train_data)

forecast_it, ts_it = make_evaluation_predictions(
    dataset=test_data,      # test dataset
    predictor=predictor,  # predictor
    num_samples=100,      # number of sample paths we want for evaluation
)

forecasts = list(forecast_it)  
tss = list(ts_it)              

# ts_entry = tss[0]
# forecast_entry = forecasts[0]

for i in range(10):
    plt.subplot(5,2,i+1)
    ts_entry = tss[i]
    forecast_entry = forecasts[i]
    ts_entry[0][-120:].plot()
    forecast_entry.plot(color='g', prediction_intervals=(50.0, 90.0))
plt.show()

evaluator = Evaluator(quantiles=[0.1, 0.5, 0.9])  
agg_metrics, item_metrics = evaluator(iter(tss), iter(forecasts), num_series=len(test_data))

print(json.dumps(agg_metrics, indent=4))

dataset: dataset.zip

awslabs / gluonts

The width of prediction Intervals is narrow but it cannot cover the observed values well. #1275