PaddlePaddle / PaddleTS

Awesome Easy-to-Use Deep Time Series Modeling based on PaddlePaddle, including comprehensive functionality modules like TSDataset, Analysis, Transform, Models, AutoTS, and Ensemble, etc., supporting versatile tasks like time series forecasting, representation learning, and anomaly detection, etc., featured with quick tracking of SOTA deep models.
Apache License 2.0
477 stars 116 forks source link

NHiTSModel模型fit时候加入验证集报错。 #458

Closed suntao2015005848 closed 6 months ago

suntao2015005848 commented 10 months ago
#Pipeline训练

import pandas as pd

from paddlets.pipeline.pipeline import Pipeline
import numpy as np
import matplotlib.pyplot as plt
import datetime
import paddlets
from paddlets import TSDataset
from paddlets import TimeSeries
from paddlets.models.forecasting.dl import * #引入了全部预测模型
from paddlets.models.forecasting import * #引入了全部预测模型
from paddlets.transform import OneHot, StandardScaler,TimeFeatureGenerator ,KSigma,MinMaxScaler
from paddlets.metrics import MSE, MAE
import warnings
warnings.filterwarnings('ignore')
from paddlets.automl.autots import AutoTS
import os 
from paddlets.automl.autots import SearchSpaceConfiger
from ray.tune import uniform, qrandint, choice,quniform
from paddlets.transform import TimeFeatureGenerator

# 读取CSV文件
df = pd.read_csv('/home/aistudio/ydl/fh_power_data.csv')
df = df.filter(items=['monitorTime', 'presentValue'])

target_cov_dataset = TSDataset.load_from_dataframe(
    df,
    time_col='monitorTime',
    target_cols='presentValue',
    known_cov_cols=['monitorTime'],
    freq='5min',
    fill_missing_dates=True,
    fillna_method='pre'
)    
# 是否是工作日
time_feature_generator = TimeFeatureGenerator(feature_cols=['is_workday','weekday','hour'])
target_cov_dataset = time_feature_generator.fit_transform(target_cov_dataset)

df_2 =  target_cov_dataset.to_dataframe()
df_2['weekday'] = df_2['weekday'].astype(float)
df_2['hour'] = df_2['hour'].astype(float)

target_cov_dataset_1 = TSDataset.load_from_dataframe(
    df_2,
    time_col='monitorTime',
    target_cols='presentValue',
    known_cov_cols=['is_workday','weekday','hour'],
    freq='5min',
    fill_missing_dates=True,
    fillna_method='pre'
)  

val_dataset = TSDataset.load_from_csv(
    filepath_or_buffer="/home/aistudio/ydl/val_test.csv",
    time_col='monitorTime',
    target_cols='presentValue',
    known_cov_cols=['is_workday','weekday','hour'],
    freq='5min',
    fill_missing_dates=True,
    fillna_method='pre'
)  

pipeline_list = [
    (NHiTSModel , {
    'eval_metrics': ["mse", "mae"],
    'batch_size': 256, 
    'max_epochs': 10, 
    'patience': 10,
    'sampling_stride': 12,
    'in_chunk_len': 288,# 输入序列的长度
    'out_chunk_len':288,# 输出序列的长度
    'verbose':1,# 打印日志的详细程度,这里设置为1
    })
]

pipeline =  Pipeline(pipeline_list)

pipeline.fit(target_cov_dataset_1)
pipeline.save(path="/home/aistudio/ydl/model_nhits/in288_out288_autots/")

报错:

opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/sklearn/utils/validation.py in check_array(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, estimator)
    719         if force_all_finite:
    720             _assert_all_finite(array,
--> 721                                allow_nan=force_all_finite == 'allow-nan')
    722 
    723     if ensure_min_samples > 0:

/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/sklearn/utils/validation.py in _assert_all_finite(X, allow_nan, msg_dtype)
    104                     msg_err.format
    105                     (type_err,
--> 106                      msg_dtype if msg_dtype is not None else X.dtype)
    107             )
    108     # for object dtype data, we only check for NaNs (GH-13254)

ValueError: Input contains NaN, infinity or a value too large for dtype('float32').
Sunting78 commented 7 months ago

您好,根据提示检查一下数据是否为空