Closed daihaozxn closed 9 months ago
Could you post the code? Your screenshot is too small to read.
Could you post the code? Your screenshot is too small to read.
Thank you for the quick reply again. The following are the data file (sst_tw.csv) and code: sst_tw.csv
from darts import TimeSeries
from darts.models import RNNModel, NBEATSModel
from darts.metrics import mape, smape, mae, rmse
from darts.utils.data import InferenceDataset, TrainingDataset, PastCovariatesTrainingDataset, PastCovariatesInferenceDataset
import torch
import torch.nn as nn
import torch.optim
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import os
from sklearn.preprocessing import StandardScaler
class MyDataset_Train(PastCovariatesTrainingDataset):
def __init__(self, data, border1s, border2s, seq_len, pred_len, flag='train'):
super(MyDataset_Train, self).__init__()
# init
self.seq_len = seq_len
self.pred_len = pred_len
assert flag in ['train', 'val', 'test']
type_map = {'train': 0, 'val': 1, 'test': 2}
self.set_type = type_map[flag]
border1 = border1s[self.set_type]
border2 = border2s[self.set_type]
self.data_x = data[border1:border2]
self.data_y = data[border1:border2]
def __getitem__(self, index):
s_begin = index
s_end = s_begin + self.seq_len
r_begin = s_end
r_end = r_begin + self.pred_len
seq_x = self.data_x[s_begin:s_end]
seq_y = self.data_y[r_begin:r_end]
return (seq_x, None, None, seq_y)
def __len__(self):
return len(self.data_x) - self.seq_len - self.pred_len + 1
class MyDataset_Test(PastCovariatesInferenceDataset):
def __init__(self, target_series, seq_len):
# super(MyDataset_Test, self).__init__()
self.target_series = target_series
self.seq_len = seq_len
def __getitem__(self, index):
target_series = self.target_series[index : (index+self.seq_len)]
past_target = target_series.values(copy=False, sample=0)
return (past_target, None, None, None, target_series)
def __len__(self):
return len(self.target_series)
root_path = './dataset/sst/'
data_path = 'sst_tw.csv'
features = 'M'
target = 'OT'
scale = True
seq_len = 360
pred_len = 30
df_raw = pd.read_csv(os.path.join(root_path, data_path))
num_train = int(len(df_raw) * 0.7)
num_test = int(len(df_raw) * 0.2)
num_vali = len(df_raw) - num_train - num_test
border1s = [0, num_train - seq_len, len(df_raw) - num_test - seq_len]
border2s = [num_train, num_train + num_vali, len(df_raw)]
if features == 'M':
cols_data = df_raw.columns[1:]
df_data = df_raw[cols_data]
elif features == 'S':
df_data = df_raw[[target]]
scaler = StandardScaler()
if scale:
train_data = df_data[border1s[0]:border2s[0]]
scaler.fit(train_data.values)
data = scaler.transform(df_data.values)
else:
data = df_data.values
train_data = MyDataset_Train(data=data, border1s=border1s, border2s=border2s, seq_len=seq_len, pred_len=pred_len, flag='train')
val_data = MyDataset_Train(data=data, border1s=border1s, border2s=border2s, seq_len=seq_len, pred_len=pred_len, flag='val')
model_NBEATS = NBEATSModel(
input_chunk_length=seq_len,
output_chunk_length=pred_len,
generic_architecture=True,
num_stacks=2,
num_blocks=1,
num_layers=2,
layer_widths=64,
expansion_coefficient_dim=2,
trend_polynomial_degree=2,
dropout=0.0,
batch_size=1024,
activation='ReLU',
nr_epochs_val_period=1,
model_name="nbeats_run",
n_epochs=10,
loss_fn=nn.MSELoss(),
save_checkpoints=False,
optimizer_cls=torch.optim.Adam,
optimizer_kwargs={'lr': 0.001},
random_state=None,
pl_trainer_kwargs={
'accelerator': 'gpu',
'devices': [0]
}
)
model_NBEATS.fit_from_dataset(train_dataset=train_data, val_dataset=val_data, verbose=True)
target_series = TimeSeries.from_values(data[-num_test:], columns=None, fillna_value=None, static_covariates=None, hierarchy=None)
test_data = MyDataset_Test(target_series=target_series, seq_len=seq_len)
forecast_NBEATS = model_NBEATS.predict_from_dataset(n=pred_len,
input_series_dataset=test_data,
trainer=None,
batch_size=None,
verbose=True,
n_jobs=1,
roll_size=None,
num_samples=1,
num_loader_workers=0,
mc_dropout=False)
I should find out what the problem is. When I modify the class MyDataset_Test to:
class MyDataset_Test(PastCovariatesInferenceDataset):
def __init__(self, target_series, seq_len, pred_len):
# super(MyDataset_Test, self).__init__()
self.target_series = target_series
self.seq_len = seq_len
self.pred_len = pred_len
def __getitem__(self, index):
target_series = self.target_series[index : (index+self.seq_len)]
past_target = target_series.values(copy=False, sample=0)
return (past_target, None, None, None, target_series)
def __len__(self):
return len(self.target_series) - self.seq_len - self.pred_len + 1
, and modify the two lines of code accordingly as
target_series = TimeSeries.from_values(data[border1s[2]:border2s[2]], columns=None, fillna_value=None, static_covariates=None, hierarchy=None)
test_data = MyDataset_Test(target_series=target_series, seq_len=seq_len, pred_len=pred_len)
It can work.
Hi @daihaozxn,
As you found a solution to your problem, I am going to close this issue but feel free to reopen it if something remains unclear.
I use the model NBEATS and the function predict_from_dataset to make predictions on the test dataset. During the prediction, an error occurred (ValueError: all input arrays must have the same shape), as shown in the figure below:
It seems to be due to the inconsistent shape of the samples in the last batch. When the sequence length is less than input_chunk_length (i.e. 360), it will still construct the sample with stride=1, resulting in multiple samples with a shape of(359, 25),(358, 25),......,(3, 25),(2, 25), (1, 25). How can I solve this problem? Thx.