Open Zhen-Bu-Chuo opened 1 week ago
您好,我们之前试过Corrformer,似乎没有遇到问题。您可以贴上更多的代码,这样方便我们debug。
arch 代码
import numpy
import pandas as pd
import torch
import datetime
import torch.nn as nn
from baselines.Corrformer_ori.arch.Corr_layers.Embed import DataEmbedding
from baselines.Corrformer_ori.arch.Corr_layers.Causal_Conv import CausalConv
from baselines.Corrformer_ori.arch.Corr_layers.Multi_Correlation import AutoCorrelation, AutoCorrelationLayer, CrossCorrelation, CrossCorrelationLayer, \
MultiCorrelation
from baselines.Corrformer_ori.arch.Corr_layers.Corrformer_EncDec import Encoder, Decoder, EncoderLayer, DecoderLayer, \
my_Layernorm, series_decomp
from datetime import datetime, timedelta
from baselines.Corrformer.arch.utils.timefeatures import time_features
torch.autograd.set_detect_anomaly(True)
class Corrformer_ori(nn.Module):
def __init__(self, **model_args):
super(Corrformer_ori, self).__init__()
self.seq_len = model_args["input_len"]
self.label_len = model_args["label_len"]
self.pred_len = model_args["output_len"]
self.num_nodes = model_args["num_nodes"]
self.node_num = model_args["num_node"]
self.node_list = model_args["node_list"] # node_num = node_list[0]*node_list[1]*node_list[2]...
self.node_list = [int(x) for x in self.node_list.split(',')]
self.device = model_args["device"]
self.freq = model_args["freq"]
self.output_attention = model_args["output_attention"]
# Decomp
kernel_size = model_args["moving_avg"]
self.decomp = series_decomp(kernel_size)
# Encoding
self.enc_embedding = DataEmbedding(model_args["enc_in"], model_args["d_model"], model_args["root_path"],
model_args["num_nodes"], model_args["embed"], model_args["freq"],
model_args["dropout"])
self.dec_embedding = DataEmbedding(model_args["dec_in"], model_args["d_model"], model_args["root_path"],
model_args["num_nodes"], model_args["embed"], model_args["freq"],
model_args["dropout"])
# Encoder
self.encoder = Encoder(
[
EncoderLayer(
MultiCorrelation(
AutoCorrelationLayer(
AutoCorrelation(False, model_args["factor_temporal"], attention_dropout=model_args["dropout"],
output_attention=model_args["output_attention"]),
model_args["d_model"], model_args["n_heads"]),
CrossCorrelationLayer(
CrossCorrelation(
CausalConv(
num_inputs=model_args["d_model"] // model_args["n_heads"] * self.seq_len,
num_channels=[model_args["d_model"] // model_args["n_heads"] * self.seq_len] \
* model_args["dec_tcn_layers"],
kernel_size=3),
False, model_args["factor_spatial"], attention_dropout=model_args["dropout"],
output_attention=self.output_attention),
model_args["d_model"], model_args["n_heads"]),
self.node_num,
self.node_list,
dropout=model_args["dropout"],
),
model_args["d_model"],
model_args["d_ff"],
moving_avg=model_args["moving_avg"],
dropout=model_args["dropout"],
activation=model_args["activation"]
) for l in range(model_args["e_layers"])
],
norm_layer=my_Layernorm(model_args["d_model"])
)
# Decoder
self.decoder = Decoder(
[
DecoderLayer(
MultiCorrelation(
AutoCorrelationLayer(
AutoCorrelation(True, model_args["factor_temporal"], attention_dropout=model_args["dropout"],
output_attention=False),
model_args["d_model"], model_args["n_heads"]),
CrossCorrelationLayer(
CrossCorrelation(
CausalConv(
num_inputs=model_args["d_model"] // model_args["n_heads"] * (self.label_len + self.pred_len),
num_channels=[model_args["d_model"] // model_args["n_heads"] * (self.label_len + self.pred_len)] \
* model_args["dec_tcn_layers"],
kernel_size=3),
False, model_args["factor_spatial"], attention_dropout=model_args["dropout"],
output_attention=self.output_attention),
model_args["d_model"], model_args["n_heads"]),
self.node_num,
self.node_list,
dropout=model_args["dropout"],
),
MultiCorrelation(
AutoCorrelationLayer(
AutoCorrelation(False, model_args["factor_temporal"], attention_dropout=model_args["dropout"],
output_attention=False),
model_args["d_model"], model_args["n_heads"]),
CrossCorrelationLayer(
CrossCorrelation(
CausalConv(
num_inputs=model_args["d_model"] // model_args["n_heads"] * (self.label_len + self.pred_len),
num_channels=[model_args["d_model"] // model_args["n_heads"] * (self.label_len + self.pred_len)] \
* model_args["dec_tcn_layers"],
kernel_size=3),
False, model_args["factor_spatial"], attention_dropout=model_args["dropout"],
output_attention=self.output_attention),
model_args["d_model"], model_args["n_heads"]),
self.node_num,
self.node_list,
dropout=model_args["dropout"],
),
model_args["d_model"],
model_args["c_out"],
model_args["d_ff"],
moving_avg=model_args["moving_avg"],
dropout=model_args["dropout"],
activation=model_args["activation"],
)
for l in range(model_args["d_layers"])
],
norm_layer=my_Layernorm(model_args["d_model"]),
projection=nn.Linear(model_args["d_model"], model_args["c_out"], bias=True)
)
self.affine_weight = nn.Parameter(torch.ones(1, 1, model_args["enc_in"]))
self.affine_bias = nn.Parameter(torch.zeros(1, 1, model_args["enc_in"]))
self.tran_oriDim_to_tarDim = nn.Conv1d(in_channels=self.num_nodes, out_channels=self.node_num, kernel_size=1)
self.tran_tarDim_to_oriDim = nn.Conv1d(in_channels=self.node_num, out_channels=self.num_nodes, kernel_size=1)
def forward(self, history_data,
enc_self_mask=None, dec_self_mask=None, dec_enc_mask=None,
future_data: torch.Tensor = None, batch_seen: int = None, **kwargs) -> torch.Tensor:
# init & normalization
b,l,n,_ = history_data.shape
x_enc = history_data[:, :, :, 0]
# decoder input
x_dec = torch.zeros_like(future_data[:, :, :, 0]).float()
x_dec = torch.cat([x_enc[:, -self.label_len: , :], x_dec], dim=1).float().to(self.device)
#encoder mark
x_mark_enc = history_data[:, :, :, 3]
x_mark_enc = float_array_to_date_array(x_mark_enc)
x_mark_enc = pd.to_datetime(x_mark_enc)
x_mark_enc = time_features(pd.to_datetime(x_mark_enc), freq=self.freq)
x_mark_enc = x_mark_enc.transpose(1,0)
x_mark_enc = torch.tensor(x_mark_enc.reshape(b,l,x_mark_enc.shape[-1]), dtype=torch.float).to(self.device)
#decoder mark
x_mark_dec = future_data[:, :, :, 3]
x_mark_dec = float_array_to_date_array(x_mark_dec)
x_mark_dec = pd.to_datetime(x_mark_dec)
x_mark_dec = time_features(pd.to_datetime(x_mark_dec), freq=self.freq)
x_mark_dec = x_mark_dec.transpose(1, 0)
x_mark_dec = torch.tensor(x_mark_dec.reshape(b, l, x_mark_dec.shape[-1]), dtype=torch.float).to(self.device)
x_mark_dec = torch.cat((x_mark_enc[:, -self.label_len:, :], x_mark_dec), dim=1)
# init & normalization
means = x_enc.mean(1, keepdim=True).detach()
x_enc = x_enc - means
stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5)
x_enc /= stdev
x_enc = x_enc * self.affine_weight.repeat(1, 1, self.node_num) + self.affine_bias.repeat(1, 1, self.node_num)
# decomp
mean = torch.mean(x_enc, dim=1).unsqueeze(1).repeat(1, self.pred_len, 1)
zeros = torch.zeros([x_dec.shape[0], self.pred_len, x_dec.shape[2]]).cuda()
seasonal_init, trend_init = self.decomp(x_enc)
# decoder input init
trend_init = torch.cat([trend_init[:, -self.label_len:, :], mean], dim=1)
seasonal_init = torch.cat([seasonal_init[:, -self.label_len:, :], zeros], dim=1)
# enc
B, L, D = x_enc.shape
_, _, C = x_mark_enc.shape
x_enc = x_enc.view(B, L, self.node_num, -1).permute(0, 2, 1, 3).contiguous() \
.view(B * self.node_num, L, D // self.node_num)
x_mark_enc = x_mark_enc.unsqueeze(1).repeat(1, self.node_num, 1, 1).view(B * self.node_num, L, C)
enc_out = self.enc_embedding(x_enc, x_mark_enc)
enc_out = self.encoder(enc_out, attn_mask=enc_self_mask)
# dec
B, L, D = seasonal_init.shape
_, _, C = x_mark_dec.shape
seasonal_init = seasonal_init.view(B, L, self.node_num, -1).permute(0, 2, 1, 3).contiguous() \
.view(B * self.node_num, L, D // self.node_num)
trend_init = trend_init.view(B, L, self.node_num, -1).permute(0, 2, 1, 3).contiguous() \
.view(B * self.node_num, L, D // self.node_num)
x_mark_dec = x_mark_dec.unsqueeze(1).repeat(1, self.node_num, 1, 1).view(B * self.node_num, L, C)
dec_out = self.dec_embedding(seasonal_init, x_mark_dec)
seasonal_part, trend_part = self.decoder(dec_out, enc_out, x_mask=dec_self_mask, cross_mask=dec_enc_mask,
trend=trend_init)
# final
dec_out = trend_part + seasonal_part
dec_out = dec_out[:, -self.pred_len:, :] \
.view(B, self.node_num, self.pred_len, D // self.node_num).permute(0, 2, 1, 3).contiguous() \
.view(B, self.pred_len, D) # B L D
# scale back
dec_out = dec_out - self.affine_bias.repeat(1, 1, self.node_num)
dec_out = dec_out / (self.affine_weight.repeat(1, 1, self.node_num) + 1e-10)
dec_out = dec_out * (stdev[:, 0, :].unsqueeze(1).repeat(1, self.pred_len, 1))
dec_out = dec_out + (means[:, 0, :].unsqueeze(1).repeat(1, self.pred_len, 1))
dec_out = dec_out.unsqueeze(-1)
return dec_out # [B, L, D]
def float_array_to_date_array(float_array, start_date=datetime(1970, 1, 1)):
temp = float_array
res = []
b, l, n = temp.shape
# print(temp.shape)
for i in range(b):
for j in range(l):
# print((start_date + timedelta(seconds=float(temp[i,j,0]))).strftime('%Y-%m-%d %H:%M:%S'))
# for k in range(n):
# print(type(float_value))
# print(float_value)
res.append((start_date + timedelta(seconds=float(temp[i,j,0]))).strftime('%Y-%m-%d %H:%M:%S'))
res = numpy.array(res)
# res = numpy.reshape(res, (b,l,n))
return res
generate data代码
import os
import json
import shutil
import time
import numpy as np
from generate_adj_mx import generate_adj_pems04 as generate_adj
from datetime import datetime, timedelta
# Hyperparameters
dataset_name = 'PEMS04'
data_file_path = f'datasets/raw_data/{dataset_name}/{dataset_name}.npz'
graph_file_path = f'datasets/raw_data/{dataset_name}/adj_{dataset_name}.pkl'
output_dir = f'datasets/{dataset_name}'
target_channel = [0] # Target traffic flow channel
add_time_of_day = True # Add time of day as a feature
add_day_of_week = True # Add day of the week as a feature
steps_per_day = 288 # Number of time steps per day
frequency = 1440 // steps_per_day
domain = 'traffic flow'
feature_description = [domain, 'time of day', 'day of week']
regular_settings = {
'INPUT_LEN': 12,
'OUTPUT_LEN': 12,
'TRAIN_VAL_TEST_RATIO': [0.6, 0.2, 0.2],
'NORM_EACH_CHANNEL': False,
'RESCALE': True,
'METRICS': ['MAE', 'RMSE', 'MAPE'],
'NULL_VAL': 0.0
}
def load_and_preprocess_data():
'''Load and preprocess raw data, selecting the specified channel(s).'''
print(np.load(data_file_path))
data = np.load(data_file_path)['data']
data = data[..., target_channel]
print(f'Raw time series shape: {data.shape}')
return data
def seconds_to_time_series(iter):
time_lst = []
# date_lst = []
base_time = datetime(2018, 1, 1)
for i in range(iter):
time_str = (base_time + timedelta(seconds=i*5*60)).strftime('%Y-%m-%d %H:%M:%S')
datetime_obj = datetime.strptime(time_str, '%Y-%m-%d %H:%M:%S')
# date_lst.append([datetime_obj])
timestamp = time.mktime(datetime_obj.timetuple())
time_lst.append([timestamp])
# time_lst.append([i])
# date_lst = np.array(date_lst).repeat(307, axis=1)
# date_lst = np.reshape(date_lst, (date_lst.shape[0], date_lst.shape[1], 1))
# print(date_lst)
time_lst = np.array(time_lst).repeat(307, axis=1)
time_lst = np.reshape(time_lst, (time_lst.shape[0], time_lst.shape[1], 1))
print(time_lst)
return time_lst
def add_temporal_features(data):
'''Add time of day and day of week as features to the data.'''
l, n, _ = data.shape
feature_list = [data]
if add_time_of_day:
time_of_day = np.array([i % steps_per_day / steps_per_day for i in range(l)])
time_of_day_tiled = np.tile(time_of_day, [1, n, 1]).transpose((2, 1, 0))
feature_list.append(time_of_day_tiled)
if add_day_of_week:
day_of_week = np.array([(i // steps_per_day) % 7 / 7 for i in range(l)])
day_of_week_tiled = np.tile(day_of_week, [1, n, 1]).transpose((2, 1, 0))
feature_list.append(day_of_week_tiled)
time_list = seconds_to_time_series(16992)
feature_list.append(time_list)
data_with_features = np.concatenate(feature_list, axis=-1) # L x N x C
return data_with_features
def save_data(data):
'''Save the preprocessed data to a binary file.'''
if not os.path.exists(output_dir):
os.makedirs(output_dir)
file_path = os.path.join(output_dir, 'data.dat')
fp = np.memmap(file_path, dtype='float32', mode='w+', shape=data.shape)
fp[:] = data[:]
fp.flush()
del fp
print(f'Data saved to {file_path}')
def save_graph():
'''Save the adjacency matrix to the output directory, generating it if necessary.'''
output_graph_path = os.path.join(output_dir, 'adj_mx.pkl')
if os.path.exists(graph_file_path):
shutil.copyfile(graph_file_path, output_graph_path)
else:
generate_adj()
shutil.copyfile(graph_file_path, output_graph_path)
print(f'Adjacency matrix saved to {output_graph_path}')
def save_description(data):
'''Save a description of the dataset to a JSON file.'''
description = {
'name': dataset_name,
'domain': domain,
'shape': data.shape,
'num_time_steps': data.shape[0],
'num_nodes': data.shape[1],
'num_features': data.shape[2],
'feature_description': feature_description,
'has_graph': graph_file_path is not None,
'frequency (minutes)': frequency,
'regular_settings': regular_settings
}
description_path = os.path.join(output_dir, 'desc.json')
with open(description_path, 'w') as f:
json.dump(description, f, indent=4)
print(f'Description saved to {description_path}')
print(description)
def main():
# Load and preprocess data
data = load_and_preprocess_data()
# Add temporal features
data_with_features = add_temporal_features(data)
# Save processed data
save_data(data_with_features)
# Copy or generate and save adjacency matrix
save_graph()
# Save dataset description
save_description(data_with_features)
if __name__ == '__main__':
main()
参数设置
import os
import sys
import torch
from easydict import EasyDict
sys.path.append(os.path.abspath(__file__ + '/../../..'))
from basicts.metrics import masked_mae, masked_mape, masked_rmse
from basicts.data import TimeSeriesForecastingDataset
from basicts.runners import SimpleTimeSeriesForecastingRunner
from basicts.scaler import ZScoreScaler
from basicts.utils import get_regular_settings, load_adj
from .arch import Corrformer_ori
############################## Hot Parameters ##############################
# Dataset & Metrics configuration
DATA_NAME = 'PEMS04' # Dataset name
regular_settings = get_regular_settings(DATA_NAME)
INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
# Model architecture and parameters
MODEL_ARCH = Corrformer_ori
MODEL_PARAM = {
"input_len": INPUT_LEN,
"label_len": INPUT_LEN // 2,
"output_len": OUTPUT_LEN,
"num_nodes": 307,
"num_node": 307,
"node_list": '307',
"output_attention": False,
"moving_avg": 25,
"enc_in": 1,
"dec_in": 1,
"c_out": 1,
"d_model": 1,
"root_path": "datasets/" + DATA_NAME,
"embed": "timeF",
# freq for time features encoding, options:[s:secondly, t:minutely, h:hourly, d:daily, b:business days, w:weekly, m:monthly],
# you can also use more detailed freq like 15min or 3h
"freq": "h",
"dropout": 0.05,
"factor_temporal": 1,
"n_heads": 1,
"enc_tcn_layers": 1,
"dec_tcn_layers": 1,
"factor_spatial": 1,
"d_ff": 1,
"activation": 'gelu',
"e_layers": 2,
"d_layers": 1,
"device": 0,
}
NUM_EPOCHS = 150
############################## General Configuration ##############################
CFG = EasyDict()
# General settings
CFG.DESCRIPTION = 'An Example Config'
CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
# Runner
CFG.RUNNER = SimpleTimeSeriesForecastingRunner
############################## Dataset Configuration ##############################
CFG.DATASET = EasyDict()
# Dataset settings
CFG.DATASET.NAME = DATA_NAME
CFG.DATASET.TYPE = TimeSeriesForecastingDataset
CFG.DATASET.PARAM = EasyDict({
'dataset_name': DATA_NAME,
'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
'input_len': INPUT_LEN,
'output_len': OUTPUT_LEN,
# 'mode' is automatically set by the runner
})
############################## Scaler Configuration ##############################
CFG.SCALER = EasyDict()
# Scaler settings
CFG.SCALER.TYPE = ZScoreScaler # Scaler class
CFG.SCALER.PARAM = EasyDict({
'dataset_name': DATA_NAME,
'train_ratio': TRAIN_VAL_TEST_RATIO[0],
'norm_each_channel': NORM_EACH_CHANNEL,
'rescale': RESCALE,
})
############################## Model Configuration ##############################
CFG.MODEL = EasyDict()
# Model settings
CFG.MODEL.NAME = MODEL_ARCH.__name__
CFG.MODEL.ARCH = MODEL_ARCH
CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1, 2, 3]
CFG.MODEL.TARGET_FEATURES = [0]
############################## Metrics Configuration ##############################
CFG.METRICS = EasyDict()
# Metrics settings
CFG.METRICS.FUNCS = EasyDict({
'MAE': masked_mae,
'MAPE': masked_mape,
'RMSE': masked_rmse,
})
CFG.METRICS.TARGET = 'MAE'
CFG.METRICS.NULL_VAL = NULL_VAL
############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
'checkpoints',
MODEL_ARCH.__name__,
'_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
)
CFG.TRAIN.LOSS = masked_mae
# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.001,
"weight_decay": 0.0001,
}
# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
"milestones": [1, 50, 80],
"gamma": 0.5
}
CFG.TRAIN.CLIP_GRAD_PARAM = {
'max_norm': 5.0
}
# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
CFG.TRAIN.DATA.BATCH_SIZE = 64
CFG.TRAIN.DATA.SHUFFLE = True
############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
CFG.VAL.DATA = EasyDict()
CFG.VAL.DATA.BATCH_SIZE = 64
############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 16
CFG.TEST.DATA = EasyDict()
CFG.TEST.DATA.BATCH_SIZE = 64
############################## Evaluation Configuration ##############################
CFG.EVAL = EasyDict()
# Evaluation parameters
CFG.EVAL.HORIZONS = [3, 6, 12] # Prediction horizons for evaluation. Default: []
CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True
我删除了原代码中的位置编码的部分
抱歉,我无法复现出您的报错信息,我这边跑着是正常的。您用的什么版本的pytorch?
torch 1.10.0+cu113 torchvision 0.11.0+cu113 numpy 1.21.6 easy-torch 1.3.2
请问您的环境是什么,我换成您的环境试一试
我在更换环境后还是出现了这个问题
这是我的训练日志
2024-11-20 14:17:27,067 - easytorch-training - INFO - Initializing training.
2024-11-20 14:17:27,067 - easytorch-training - INFO - Set clip grad, param: {'max_norm': 5.0}
2024-11-20 14:17:27,068 - easytorch-training - INFO - Building training data loader.
2024-11-20 14:17:27,106 - easytorch-training - INFO - Train dataset length: 10173
2024-11-20 14:17:29,087 - easytorch-training - INFO - Set optim: Adam (
Parameter Group 0
amsgrad: False
betas: (0.9, 0.999)
capturable: False
differentiable: False
eps: 1e-08
foreach: None
fused: None
lr: 0.001
maximize: False
weight_decay: 0.0001
)
2024-11-20 14:17:29,088 - easytorch-training - INFO - Set lr_scheduler: <torch.optim.lr_scheduler.MultiStepLR object at 0x00000269BCB7A890>
2024-11-20 14:17:29,092 - easytorch-training - INFO - Initializing validation.
2024-11-20 14:17:29,093 - easytorch-training - INFO - Building val data loader.
2024-11-20 14:17:29,116 - easytorch-training - INFO - Validation dataset length: 3375
2024-11-20 14:17:29,147 - easytorch-training - INFO - Test dataset length: 3375
2024-11-20 14:17:29,148 - easytorch-training - INFO - Number of parameters: 195067
2024-11-20 14:17:29,149 - easytorch-training - INFO - Epoch 1 / 150
2024-11-20 14:23:17,190 - easytorch-training - INFO - Result
奇怪,麻烦您将代码发送到zezhishao@gmail.com吧,我需要用您的代码来debug。 您可以添加README文档中的微信群,这样交流更方便一些。
您好,在我将corrformer代码移植进BasicTS后遇到了Function 'WeightNormCudaInterfaceBackward0' returned nan value的问题,请问您之前是否有遇到过相似的情况