awslabs / gluonts

Probabilistic time series modeling in Python
https://ts.gluon.ai
Apache License 2.0
4.64k stars 755 forks source link

ValueError: setting an array element with a sequence. The requested array has an inhomogeneous #3135

Open moghadas76 opened 9 months ago

moghadas76 commented 9 months ago

ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (32,) + inhomogeneous part.

`from typing import Any, Dict, Iterable, Optional from gluonts.dataset.loader import TrainDataLoader from gluonts.itertools import Cached from gluonts.torch.batchify import batchify import pytorch_lightning as pl import torch from gluonts.core.component import validated from gluonts.dataset.common import Dataset from gluonts.dataset.field_names import FieldName from gluonts.dataset.loader import as_stacked_batches from gluonts.dataset.stat import calculate_dataset_statistics from gluonts.itertools import Cyclic from gluonts.time_feature import ( get_lags_for_frequency, time_features_from_frequency_str, ) from gluonts.torch.model.estimator import PyTorchLightningEstimator from gluonts.torch.model.predictor import PyTorchPredictor from gluonts.torch.modules.loss import DistributionLoss, NegativeLogLikelihood from gluonts.transform import ( AddObservedValuesIndicator, AddTimeFeatures, Chain, DummyValueImputation, ExpectedNumInstanceSampler, InstanceSampler, InstanceSplitter, TestSplitSampler, Transformation, ValidationSplitSampler, VstackFeatures ) from peft import LoraConfig, get_peft_model

from gluonts.torch.model.deepar import DeepAREstimator from gluonts.torch.distributions import StudentTOutput, NormalOutput from gluon_utils.gluon_ts_distributions.implicit_quantile_network import ( ImplicitQuantileNetworkOutput, )

from lag_llama.gluon.lightning_module import LagLlamaLightningModule

PREDICTION_INPUT_NAMES = [ "past_target", "past_observed_values", ] TRAINING_INPUT_NAMES = PREDICTION_INPUT_NAMES + [ "future_target", "future_observed_values", ]

class LagLlamaEstimator(PyTorchLightningEstimator): """ An estimator training a ConvTSMixer model for forecasting.

This class is uses the model defined in ``ConvTSMixerModel``,
and wraps it into a ``ConvTSMixerLightningModule`` for training
purposes: training is performed using PyTorch Lightning's ``pl.Trainer``
class.

Parameters
----------
prediction_length
    Length of the prediction horizon.
context_length
    Number of time steps prior to prediction time that the model
    takes as inputs (default: ``10 * prediction_length``).
lr
    Learning rate (default: ``1e-3``).
weight_decay
    Weight decay regularization parameter (default: ``1e-8``).
distr_output
    Distribution to use to evaluate observations and sample predictions
    (default: StudentTOutput()).
loss
    Loss to be optimized during training
    (default: ``NegativeLogLikelihood()``).
batch_norm
    Whether to apply batch normalization.
batch_size
    The size of the batches to be used for training (default: 32).
num_batches_per_epoch
    Number of batches to be processed in each training epoch
        (default: 50).
trainer_kwargs
    Additional arguments to provide to ``pl.Trainer`` for construction.
train_sampler
    Controls the sampling of windows during training.
validation_sampler
    Controls the sampling of windows during validation.
"""

@validated()
def __init__(
    self,
    prediction_length: int,
    context_length: Optional[int] = None,
    input_size: int = 1,
    n_layer: int = 1,
    n_embd_per_head: int = 32,
    n_head: int = 4,
    max_context_length: int = 2048,
    rope_scaling=None,
    scaling: Optional[str] = "mean",
    lr: float = 1e-3,
    weight_decay: float = 1e-8,
    # Augmentations arguments
    aug_prob: float = 0.1,
    freq_mask_rate: float = 0.1,
    freq_mixing_rate: float = 0.1,
    jitter_prob: float = 0.0,
    jitter_sigma: float = 0.03,
    scaling_prob: float = 0.0,
    scaling_sigma: float = 0.1,
    rotation_prob: float = 0.0,
    permutation_prob: float = 0.0,
    permutation_max_segments: int = 5,
    permutation_seg_mode: str = "equal",
    magnitude_warp_prob: float = 0.0,
    magnitude_warp_sigma: float = 0.2,
    magnitude_warp_knot: int = 4,
    time_warp_prob: float = 0.0,
    time_warp_sigma: float = 0.2,
    time_warp_knot: int = 4,
    window_slice_prob: float = 0.0,
    window_slice_reduce_ratio: float = 0.9,
    window_warp_prob: float = 0.0,
    window_warp_window_ratio: float = 0.1,
    window_warp_scales: list = [0.5, 2.0],
    # Continuning model arguments
    distr_output: str = "studentT",
    loss: DistributionLoss = NegativeLogLikelihood(),
    num_parallel_samples: int = 100,
    batch_size: int = 32,
    num_batches_per_epoch: int = 50,
    trainer_kwargs: Optional[Dict[str, Any]] = None,
    train_sampler: Optional[InstanceSampler] = None,
    validation_sampler: Optional[InstanceSampler] = None,
    time_feat: bool = False,
    dropout: float = 0.0,
    lags_seq: list = ["Q", "M", "W", "D", "H", "T", "S"],
    data_id_to_name_map: dict = {},
    use_cosine_annealing_lr: bool = False,
    cosine_annealing_lr_args: dict = {},
    track_loss_per_series: bool = False,
    ckpt_path: Optional[str] = None,
    use_feat_dynamic_real=True,
) -> None:
    default_trainer_kwargs = {"max_epochs": 100}
    if trainer_kwargs is not None:
        default_trainer_kwargs.update(trainer_kwargs)
    super().__init__(trainer_kwargs=default_trainer_kwargs)

    self.scaling = scaling
    self.input_size = input_size
    self.prediction_length = prediction_length
    self.context_length = context_length
    self.max_context_length = max_context_length

    lag_indices = []
    for freq in lags_seq:
        lag_indices.extend(
            get_lags_for_frequency(freq_str=freq, num_default_lags=1)
        )

    if len(lag_indices):
        self.lags_seq = sorted(set(lag_indices))
        self.lags_seq = [lag_index - 1 for lag_index in self.lags_seq] # len 83, max: 1092
    else:
        self.lags_seq = []

    self.n_head = n_head
    self.n_layer = n_layer
    self.n_embd_per_head = n_embd_per_head
    self.rope_scaling = rope_scaling

    self.lr = lr
    self.weight_decay = weight_decay
    if distr_output == "studentT":
        distr_output = StudentTOutput()
    elif distr_output == "iqn":
        distr_output = ImplicitQuantileNetworkOutput()
    self.distr_output = distr_output
    self.num_parallel_samples = num_parallel_samples
    self.loss = loss
    self.batch_size = batch_size # 32
    self.num_batches_per_epoch = num_batches_per_epoch # 50

    self.train_sampler = train_sampler or ExpectedNumInstanceSampler(
        num_instances=1.0, min_future=prediction_length
    )
    self.validation_sampler = validation_sampler or ValidationSplitSampler(
        min_future=prediction_length
    )

    self.aug_prob = aug_prob
    self.freq_mask_rate = freq_mask_rate
    self.freq_mixing_rate = freq_mixing_rate
    self.jitter_prob = jitter_prob
    self.jitter_sigma = jitter_sigma
    self.scaling_prob = scaling_prob
    self.scaling_sigma = scaling_sigma
    self.rotation_prob = rotation_prob
    self.permutation_prob = permutation_prob
    self.permutation_max_segments = permutation_max_segments
    self.permutation_seg_mode = permutation_seg_mode
    self.magnitude_warp_prob = magnitude_warp_prob
    self.magnitude_warp_sigma = magnitude_warp_sigma
    self.magnitude_warp_knot = magnitude_warp_knot
    self.time_warp_prob = time_warp_prob
    self.time_warp_sigma = time_warp_sigma
    self.time_warp_knot = time_warp_knot
    self.window_slice_prob = window_slice_prob
    self.window_slice_reduce_ratio = window_slice_reduce_ratio
    self.window_warp_prob = window_warp_prob
    self.window_warp_window_ratio = window_warp_window_ratio
    self.window_warp_scales = window_warp_scales
    self.track_loss_per_series = track_loss_per_series

    self.time_feat = time_feat
    self.dropout = dropout
    self.data_id_to_name_map = data_id_to_name_map
    self.ckpt_path = ckpt_path

    self.use_cosine_annealing_lr = use_cosine_annealing_lr
    self.cosine_annealing_lr_args = cosine_annealing_lr_args
    # self.transformation = self.create_transformation()

@classmethod
def derive_auto_fields(cls, train_iter):
    stats = calculate_dataset_statistics(train_iter)

    return {
        "num_feat_dynamic_real": stats.num_feat_dynamic_real,
        "num_feat_static_cat": len(stats.feat_static_cat),
        "cardinality": [len(cats) for cats in stats.feat_static_cat],
    }

def create_transformation(self) -> Transformation:
    if self.time_feat:
        return Chain(
            [
                AddTimeFeatures(
                    start_field=FieldName.START,
                    target_field=FieldName.TARGET,
                    output_field=FieldName.FEAT_TIME,
                    time_features=time_features_from_frequency_str("S"),
                    pred_length=self.prediction_length,
                ),
                # VstackFeatures(
                #     output_field=FieldName.FEAT_TIME,
                #     input_fields=[FieldName.FEAT_TIME] + [FieldName.FEAT_DYNAMIC_REAL]
                # ),
                # FilterTransformation(lambda x: sum(abs(x[FieldName.TARGET])) > 0),
                AddObservedValuesIndicator(
                    target_field=FieldName.TARGET,
                    output_field=FieldName.OBSERVED_VALUES,
                    imputation_method=DummyValueImputation(0.0),
                ),

            ]
        )
    else:
        return Chain(
            [
                AddObservedValuesIndicator(
                    target_field=FieldName.TARGET,
                    output_field=FieldName.OBSERVED_VALUES,
                    imputation_method=DummyValueImputation(0.0),
                ),
            ]
        )

def create_lightning_module(self, use_kv_cache: bool = False) -> pl.LightningModule:
    model_kwargs = {
        "input_size": self.input_size,
        "context_length": self.context_length,
        "max_context_length": self.max_context_length,
        "lags_seq": self.lags_seq,
        "n_layer": self.n_layer,
        "n_embd_per_head": self.n_embd_per_head,
        "n_head": self.n_head,
        "scaling": self.scaling,
        "distr_output": self.distr_output,
        "num_parallel_samples": self.num_parallel_samples,
        "rope_scaling": self.rope_scaling,
        "time_feat": self.time_feat,
        "dropout": self.dropout,
    }
    if self.ckpt_path is not None:
        module = LagLlamaLightningModule.load_from_checkpoint(
            checkpoint_path=self.ckpt_path,
            loss=self.loss,
            lr=self.lr,
            weight_decay=self.weight_decay,
            context_length=self.context_length,
            prediction_length=self.prediction_length,
            model_kwargs=model_kwargs,
            # Augmentations
            aug_prob=self.aug_prob,
            freq_mask_rate=self.freq_mask_rate,
            freq_mixing_rate=self.freq_mixing_rate,
            jitter_prob=self.jitter_prob,
            jitter_sigma=self.jitter_sigma,
            scaling_prob=self.scaling_prob,
            scaling_sigma=self.scaling_sigma,
            rotation_prob=self.rotation_prob,
            permutation_prob=self.permutation_prob,
            permutation_max_segments=self.permutation_max_segments,
            permutation_seg_mode=self.permutation_seg_mode,
            magnitude_warp_prob=self.magnitude_warp_prob,
            magnitude_warp_sigma=self.magnitude_warp_sigma,
            magnitude_warp_knot=self.magnitude_warp_knot,
            time_warp_prob=self.time_warp_prob,
            time_warp_sigma=self.time_warp_sigma,
            time_warp_knot=self.time_warp_knot,
            window_slice_prob=self.window_slice_prob,
            window_slice_reduce_ratio=self.window_slice_reduce_ratio,
            window_warp_prob=self.window_warp_prob,
            window_warp_window_ratio=self.window_warp_window_ratio,
            window_warp_scales=self.window_warp_scales,
            use_kv_cache=use_kv_cache,
            data_id_to_name_map=self.data_id_to_name_map,
            use_cosine_annealing_lr=self.use_cosine_annealing_lr,
            cosine_annealing_lr_args=self.cosine_annealing_lr_args,
            track_loss_per_series=self.track_loss_per_series,
        )
        # config = LoraConfig(
        #     r=16,
        #     lora_alpha=16,
        #     target_modules=["q_proj", "kv_proj"],
        #     lora_dropout=0.1,
        #     # use_original_init=False,
        #     bias="none",
        #     modules_to_save=["classifier"],
        # )
        # lora_model = get_peft_model(module.model, config)
        # module.model = lora_model
        return module
    else:
        return LagLlamaLightningModule(
            loss=self.loss,
            lr=self.lr,
            weight_decay=self.weight_decay,
            context_length=self.context_length,
            prediction_length=self.prediction_length,
            model_kwargs=model_kwargs,
            # Augmentations
            aug_prob=self.aug_prob,
            freq_mask_rate=self.freq_mask_rate,
            freq_mixing_rate=self.freq_mixing_rate,
            jitter_prob=self.jitter_prob,
            jitter_sigma=self.jitter_sigma,
            scaling_prob=self.scaling_prob,
            scaling_sigma=self.scaling_sigma,
            rotation_prob=self.rotation_prob,
            permutation_prob=self.permutation_prob,
            permutation_max_segments=self.permutation_max_segments,
            permutation_seg_mode=self.permutation_seg_mode,
            magnitude_warp_prob=self.magnitude_warp_prob,
            magnitude_warp_sigma=self.magnitude_warp_sigma,
            magnitude_warp_knot=self.magnitude_warp_knot,
            time_warp_prob=self.time_warp_prob,
            time_warp_sigma=self.time_warp_sigma,
            time_warp_knot=self.time_warp_knot,
            window_slice_prob=self.window_slice_prob,
            window_slice_reduce_ratio=self.window_slice_reduce_ratio,
            window_warp_prob=self.window_warp_prob,
            window_warp_window_ratio=self.window_warp_window_ratio,
            window_warp_scales=self.window_warp_scales,
            use_kv_cache=use_kv_cache,
            data_id_to_name_map=self.data_id_to_name_map,
            use_cosine_annealing_lr=self.use_cosine_annealing_lr,
            cosine_annealing_lr_args=self.cosine_annealing_lr_args,
            track_loss_per_series=self.track_loss_per_series,
        )

def _create_instance_splitter(self, module: LagLlamaLightningModule, mode: str):
    assert mode in ["training", "validation", "test"]

    instance_sampler = {
        "training": self.train_sampler,
        "validation": self.validation_sampler,
        "test": TestSplitSampler(),
    }[mode]

    return InstanceSplitter(
        target_field=FieldName.TARGET,
        is_pad_field=FieldName.IS_PAD,
        start_field=FieldName.START,
        forecast_start_field=FieldName.FORECAST_START,
        instance_sampler=instance_sampler,
        past_length=self.context_length + max(self.lags_seq),
        future_length=self.prediction_length,
        time_series_fields=[FieldName.FEAT_TIME, FieldName.OBSERVED_VALUES]
        if self.time_feat
        else [FieldName.OBSERVED_VALUES],
        dummy_value=self.distr_output.value_in_support,
    )

def create_training_data_loader(
    self,
    data: Dataset,
    module: LagLlamaLightningModule,
    shuffle_buffer_length: Optional[int] = None,
    **kwargs,
) -> Iterable:
    data = Cyclic(data).stream()
    instances = self._create_instance_splitter(module, "training").apply(
        data, is_train=True
    )
    if self.time_feat:
        return as_stacked_batches(
            instances,
            batch_size=self.batch_size,
            shuffle_buffer_length=shuffle_buffer_length,
            field_names=TRAINING_INPUT_NAMES
            + ["past_time_feat", "future_time_feat", "data_id", "item_id"],
            # + ["past_time_feat", "future_time_feat"],
            output_type=torch.tensor,
            num_batches_per_epoch=self.num_batches_per_epoch,
        )

    else:
        return as_stacked_batches(
            instances,
            batch_size=self.batch_size,
            shuffle_buffer_length=shuffle_buffer_length,
            # field_names=TRAINING_INPUT_NAMES,
            field_names=TRAINING_INPUT_NAMES + ["data_id", "item_id"],
            output_type=torch.tensor,
            num_batches_per_epoch=self.num_batches_per_epoch,
        )

def create_validation_data_loader(
    self,
    data: Dataset,
    module: LagLlamaLightningModule,
    **kwargs,
) -> Iterable:
    instances = self._create_instance_splitter(module, "validation").apply(
        data, is_train=True
    )
    if self.time_feat:
        return as_stacked_batches(
            instances,
            batch_size=self.batch_size,
            field_names=TRAINING_INPUT_NAMES
            + ["past_time_feat", "future_time_feat", "data_id", "item_id"],
            # + ["past_time_feat", "future_time_feat"],
            output_type=torch.tensor,
        )
    else:
        return as_stacked_batches(
            instances,
            batch_size=self.batch_size,
            field_names=TRAINING_INPUT_NAMES + ["data_id", "item_id"],
            # field_names=TRAINING_INPUT_NAMES,
            output_type=torch.tensor,
        )

def create_trainer_dl(self, dataset, module):
    # instances = self._create_instance_splitter(module, "training").apply(
    #     dataset, is_train=True
    # )
    if self.time_feat:
        # return as_stacked_batches(
        #     instances,
        #     batch_size=self.batch_size,
        #     field_names=TRAINING_INPUT_NAMES
        #     + ["past_time_feat", "future_time_feat", "data_id", "item_id"],
        #     # + ["past_time_feat", "future_time_feat"],
        #     output_type=torch.tensor,
        # )
        data_loader = TrainDataLoader(
# We cache the dataset, to make training faster
            Cached(dataset),
            batch_size=self.batch_size,
            stack_fn=batchify,
            transform=self.create_transformation(),
            num_batches_per_epoch=100,
        )
        return data_loader

def create_predictor(
    self,
    transformation: Transformation,
    module,
) -> PyTorchPredictor:
    prediction_splitter = self._create_instance_splitter(module, "test")
    if self.time_feat:
        return PyTorchPredictor(
            input_transform=transformation + prediction_splitter,
            input_names=PREDICTION_INPUT_NAMES
            + ["past_time_feat", "future_time_feat"],
            prediction_net=module,
            batch_size=self.batch_size,
            prediction_length=self.prediction_length,
            device="cuda" if torch.cuda.is_available() else "cpu",
        )
    else:
        return PyTorchPredictor(
            input_transform=transformation + prediction_splitter,
            input_names=PREDICTION_INPUT_NAMES,
            prediction_net=module,
            batch_size=self.batch_size,
            prediction_length=self.prediction_length,
            device="cuda" if torch.cuda.is_available() else "cpu",
        )

`

Usama-Samad commented 7 months ago

Hello, Im Also getting the same error, can someone please check

**>

dataset = get_dataset("solar_nips", regenerate=False) dataset.metadata train_grouper = MultivariateGrouper(max_target_dim=int(dataset.metadata.feat_static_cat[0].cardinality))

test_grouper = MultivariateGrouper(num_test_dates=int(len(dataset.test)/len(dataset.train)), max_target_dim=int(dataset.metadata.feat_static_cat[0].cardinality)) dataset_train = train_grouper(dataset.train) dataset_test = test_grouper(dataset.test)**


ValueError Traceback (most recent call last) Cell In[7], line 2 1 dataset_train = train_grouper(dataset.train) ----> 2 dataset_test = test_grouper(dataset.test)

File ~\anaconda3\envs\Thesis_2\lib\site-packages\gluonts\dataset\multivariate_grouper.py:87, in MultivariateGrouper.call(self, dataset) 85 def call(self, dataset: Dataset) -> Dataset: 86 self._preprocess(dataset) ---> 87 return self._group_all(dataset)

File ~\anaconda3\envs\Thesis_2\lib\site-packages\gluonts\dataset\multivariate_grouper.py:125, in MultivariateGrouper._group_all(self, dataset) 123 grouped_dataset = self._prepare_train_data(dataset) 124 else: --> 125 grouped_dataset = self._prepare_test_data(dataset) 126 return grouped_dataset

File ~\anaconda3\envs\Thesis_2\lib\site-packages\gluonts\dataset\multivariate_grouper.py:152, in MultivariateGrouper._prepare_test_data(self, dataset) 148 assert self.num_test_dates is not None 150 logging.info("group test time series to datasets") --> 152 grouped_data = self._transform_target(self._left_pad_data, dataset) 153 # splits test dataset with rolling date into N R^d time series where 154 # N is the number of rolling evaluation dates 155 split_dataset = np.split( 156 grouped_data[FieldName.TARGET], self.num_test_dates 157 )

File ~\anaconda3\envs\Thesis_2\lib\site-packages\gluonts\dataset\multivariate_grouper.py:205, in MultivariateGrouper._transform_target(funcs, dataset) 203 @staticmethod 204 def _transform_target(funcs, dataset: Dataset) -> DataEntry: --> 205 return {FieldName.TARGET: np.array([funcs(data) for data in dataset])}

ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (959,) + inhomogeneous part.

Shiv716 commented 5 months ago

Hello, I have the error as well. `# Load the tokenized datasets train_dataset = load_from_disk('train_dataset') eval_dataset = load_from_disk('eval_dataset')

Load CLang-8 dataset

df = pd.read_csv('Data/clang8.csv')

Create Hugging Face Dataset

dataset = Dataset.from_pandas(df)

Split dataset

train_test_split = dataset.train_test_split(test_size=0.1)

train_dataset = train_test_split['train']

eval_dataset = train_test_split['test']

Load pretrained model from the specific checkpoint

model = T5ForConditionalGeneration.from_pretrained('./results/checkpoint-3000')

Load tokenizer

tokenizer = AutoTokenizer.from_pretrained('t5-small')

Define training arguments (no need to set training-specific args for evaluation)

training_args = TrainingArguments( output_dir='./results', per_device_eval_batch_size=2, # Reduced batch size for evaluation )

Initialize Trainer

trainer = Trainer( model=model, args=training_args, eval_dataset=eval_dataset, tokenizer=tokenizer, )

Function to evaluate in chunks

def evaluate_in_chunks(trainer, dataset, chunk_size=100):

total_size = len(dataset)

all_predictions = []

for i in range(0, total_size, chunk_size):

chunk = dataset.select(range(i, min(i + chunk_size, total_size)))

predictions = trainer.predict(chunk)

all_predictions.append(predictions.predictions)

return all_predictions

Evaluate the model in chunks

predictions = evaluate_in_chunks(trainer, eval_dataset, chunk_size=100)

Generate predictions

predictions = trainer.predict(eval_dataset)

-- Check if predictions are real or not

print("Checking predictions: "+str(len(predictions))) print(predictions)

predictions = np.asarray(predictions, dtype='object')

--

pred_texts = tokenizer.batch_decode(np.squeeze(predictions), skip_special_tokens=True)

pred_texts = []

for prediction in predictions:

pred_texts.append(tokenizer.decode(prediction[prediction], skip_special_token=True))

--

print(predictions.shape)

--

Save predictions and references

with open('predictions.txt', 'w') as pred_file, open('references.txt', 'w') as ref_file: for pred, ref in zip(pred_texts, eval_dataset['Column2']): pred_file.write(pred + '\n') ref_file.write(ref.strip() + '\n')

Initialize ERRANT

annotator = errant.load('en')

Align predictions and references

with open('predictions.txt', 'r') as pred_file, open('references.txt', 'r') as ref_file: pred_sents = pred_file.readlines() ref_sents = ref_file.readlines()

aligned = [] for pred, ref in zip(pred_sents, ref_sents): pred_sent = annotator.parse(pred.strip()) ref_sent = annotator.parse(ref.strip()) aligned.append(annotator.align(pred_sent, ref_sent))

Evaluate using ERRANT

P, R, F = errant.scorer(aligned) print(f'Precision: {P:.2f}, Recall: {R:.2f}, F0.5: {F:.2f}')`