sktime / pytorch-forecasting

Time series forecasting with PyTorch
https://pytorch-forecasting.readthedocs.io/
MIT License
4.01k stars 635 forks source link

StopIteration error #244

Closed kevkid closed 3 years ago

kevkid commented 3 years ago

Expected behavior

I am trying to predict future closing prices from past opening prices of bitcoin. I have downloaded a dataset for all open and close prices for the past 6 years. The format of the data is: image

Here is my code:

import pytorch_lightning as pl
from pytorch_lightning.callbacks import EarlyStopping, LearningRateMonitor
from pytorch_forecasting.metrics import QuantileLoss
from pytorch_forecasting import TimeSeriesDataSet, TemporalFusionTransformer
import pandas as pd
data = pd.read_csv('BTC-USD.csv')
data = data[['Date','Open', 'Close']]
data['time_idx'] = data.index
data['groups'] = 0
# define dataset
max_encode_length = 1
max_prediction_length = 1
training_cutoff = "2019-12-31"  # day for cutoff

training = TimeSeriesDataSet(
    data[lambda x: x.Date <= training_cutoff],
    time_idx= 'time_idx',
    target= 'Close',
    group_ids=['groups'],
    #min_encoder_length = 1,
    max_encoder_length=max_encode_length,
    max_prediction_length=max_prediction_length,
    allow_missings=True,
    #static_categoricals=[ ... ],
    #static_reals=[ ... ],
    #time_varying_known_categoricals=[ ... ],
    #time_varying_known_reals=[ ... ],
    #time_varying_unknown_categoricals=[ ... ],
    #time_varying_unknown_reals=[ ... ],
)
validation = TimeSeriesDataSet.from_dataset(training, data, min_prediction_idx=training.index.time.max() + 1, stop_randomization=True)
batch_size = 16
train_dataloader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=8)
val_dataloader = validation.to_dataloader(train=False, batch_size=batch_size, num_workers=8)

early_stop_callback = EarlyStopping(monitor="val_loss", min_delta=1e-4, patience=1, verbose=False, mode="min")
lr_logger = LearningRateMonitor()
trainer = pl.Trainer(
    max_epochs=100,
    gpus=None,
    gradient_clip_val=0.1,
    limit_train_batches=30,
    callbacks=[lr_logger, early_stop_callback],
)

tft = TemporalFusionTransformer.from_dataset(
    training,
    learning_rate=0.03,
    hidden_size=32,
    attention_head_size=1,
    dropout=0.1,
    hidden_continuous_size=16,
    output_size=7,
    loss=QuantileLoss(),
    log_interval=2,
    reduce_on_plateau_patience=4
)
tft.cpu()
print(f"Number of parameters in network: {tft.size()/1e3:.1f}k")
trainer.fit(
    tft, train_dataloader=train_dataloader, val_dataloaders=val_dataloader,
)

Here is what I get as output:

   | Name                               | Type                            | Params
----------------------------------------------------------------------------------------
0  | loss                               | QuantileLoss                    | 0     
1  | logging_metrics                    | ModuleList                      | 0     
2  | input_embeddings                   | MultiEmbedding                  | 0     
3  | prescalers                         | ModuleDict                      | 0     
4  | static_variable_selection          | VariableSelectionNetwork        | 0     
5  | encoder_variable_selection         | VariableSelectionNetwork        | 0     
6  | decoder_variable_selection         | VariableSelectionNetwork        | 0     
7  | static_context_variable_selection  | GatedResidualNetwork            | 4.3 K 
8  | static_context_initial_hidden_lstm | GatedResidualNetwork            | 4.3 K 
9  | static_context_initial_cell_lstm   | GatedResidualNetwork            | 4.3 K 
10 | static_context_enrichment          | GatedResidualNetwork            | 4.3 K 
11 | lstm_encoder                       | LSTM                            | 8.4 K 
12 | lstm_decoder                       | LSTM                            | 8.4 K 
13 | post_lstm_gate_encoder             | GatedLinearUnit                 | 2.1 K 
14 | post_lstm_add_norm_encoder         | AddNorm                         | 64    
15 | static_enrichment                  | GatedResidualNetwork            | 5.3 K 
16 | multihead_attn                     | InterpretableMultiHeadAttention | 4.2 K 
17 | post_attn_gate_norm                | GateAddNorm                     | 2.2 K 
18 | pos_wise_ff                        | GatedResidualNetwork            | 4.3 K 
19 | pre_output_gate_norm               | GateAddNorm                     | 2.2 K 
20 | output_layer                       | Linear                          | 231   
----------------------------------------------------------------------------------------
54.6 K    Trainable params
0         Non-trainable params
54.6 K    Total params
Validation sanity check: 0it [00:00, ?it/s]
---------------------------------------------------------------------------
StopIteration                             Traceback (most recent call last)
<ipython-input-46-e7b6a836a434> in <module>
----> 1 trainer.fit(
      2     tft, train_dataloader=train_dataloader, val_dataloaders=val_dataloader,
      3 )

~/projects/crypto_trading/crypto-env/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py in fit(self, model, train_dataloader, val_dataloaders, datamodule)
    468         self.call_hook('on_fit_start')
    469 
--> 470         results = self.accelerator_backend.train()
    471         self.accelerator_backend.teardown()
    472 

~/projects/crypto_trading/crypto-env/lib/python3.8/site-packages/pytorch_lightning/accelerators/cpu_accelerator.py in train(self)
     60 
     61         # train or test
---> 62         results = self.train_or_test()
     63         return results
     64 

~/projects/crypto_trading/crypto-env/lib/python3.8/site-packages/pytorch_lightning/accelerators/accelerator.py in train_or_test(self)
     67             results = self.trainer.run_test()
     68         else:
---> 69             results = self.trainer.train()
     70         return results
     71 

~/projects/crypto_trading/crypto-env/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py in train(self)
    490 
    491     def train(self):
--> 492         self.run_sanity_check(self.get_model())
    493 
    494         # set stage for logging

~/projects/crypto_trading/crypto-env/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py in run_sanity_check(self, ref_model)
    688 
    689             # run eval step
--> 690             _, eval_results = self.run_evaluation(test_mode=False, max_batches=self.num_sanity_val_batches)
    691 
    692             # allow no returns from eval

~/projects/crypto_trading/crypto-env/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py in run_evaluation(self, test_mode, max_batches)
    604                 # lightning module methods
    605                 with self.profiler.profile("evaluation_step_and_end"):
--> 606                     output = self.evaluation_loop.evaluation_step(test_mode, batch, batch_idx, dataloader_idx)
    607                     output = self.evaluation_loop.evaluation_step_end(output)
    608 

~/projects/crypto_trading/crypto-env/lib/python3.8/site-packages/pytorch_lightning/trainer/evaluation_loop.py in evaluation_step(self, test_mode, batch, batch_idx, dataloader_idx)
    176         else:
    177             model_ref._current_fx_name = "validation_step"
--> 178             output = self.trainer.accelerator_backend.validation_step(args)
    179 
    180         # capture any logged information

~/projects/crypto_trading/crypto-env/lib/python3.8/site-packages/pytorch_lightning/accelerators/cpu_accelerator.py in validation_step(self, args)
     75 
     76     def validation_step(self, args):
---> 77         return self._step(self.trainer.model.validation_step, args)
     78 
     79     def test_step(self, args):

~/projects/crypto_trading/crypto-env/lib/python3.8/site-packages/pytorch_lightning/accelerators/cpu_accelerator.py in _step(self, model_step, args)
     68                 output = model_step(*args)
     69         else:
---> 70             output = model_step(*args)
     71         return output
     72 

~/projects/crypto_trading/crypto-env/lib/python3.8/site-packages/pytorch_forecasting/models/base_model.py in validation_step(self, batch, batch_idx)
    205     def validation_step(self, batch, batch_idx):
    206         x, y = batch
--> 207         log, _ = self.step(x, y, batch_idx)  # log loss
    208         self.log("val_loss", log["loss"], on_step=False, on_epoch=True, prog_bar=True)
    209         return log

~/projects/crypto_trading/crypto-env/lib/python3.8/site-packages/pytorch_forecasting/models/temporal_fusion_transformer/__init__.py in step(self, x, y, batch_idx)
    545         """
    546         # extract data and run model
--> 547         log, out = super().step(x, y, batch_idx)
    548         # calculate interpretations etc for latter logging
    549         if self.log_interval > 0:

~/projects/crypto_trading/crypto-env/lib/python3.8/site-packages/pytorch_forecasting/models/base_model.py in step(self, x, y, batch_idx, **kwargs)
    283             loss = loss * (1 + monotinicity_loss)
    284         else:
--> 285             out = self(x, **kwargs)
    286             out["prediction"] = self.transform_output(out)
    287 

~/projects/crypto_trading/crypto-env/lib/python3.8/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
    725             result = self._slow_forward(*input, **kwargs)
    726         else:
--> 727             result = self.forward(*input, **kwargs)
    728         for hook in itertools.chain(
    729                 _global_forward_hooks.values(),

~/projects/crypto_trading/crypto-env/lib/python3.8/site-packages/pytorch_forecasting/models/temporal_fusion_transformer/__init__.py in forward(self, x)
    452             name: input_vectors[name][:, :max_encoder_length] for name in self.encoder_variables
    453         }
--> 454         embeddings_varying_encoder, encoder_sparse_weights = self.encoder_variable_selection(
    455             embeddings_varying_encoder,
    456             static_context_variable_selection[:, :max_encoder_length],

~/projects/crypto_trading/crypto-env/lib/python3.8/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
    725             result = self._slow_forward(*input, **kwargs)
    726         else:
--> 727             result = self.forward(*input, **kwargs)
    728         for hook in itertools.chain(
    729                 _global_forward_hooks.values(),

~/projects/crypto_trading/crypto-env/lib/python3.8/site-packages/pytorch_forecasting/models/temporal_fusion_transformer/sub_modules.py in forward(self, x, context)
    339             outputs = outputs.sum(dim=-1)
    340         else:  # for one input, do not perform variable selection but just encoding
--> 341             name = next(iter(self.single_variable_grns.keys()))
    342             variable_embedding = x[name]
    343             if name in self.prescalers:

StopIteration: 

I thought that it had something to do with the train/valid loader so I just looped over one to see:

for batch, _ in val_dataloader:    
      output = tft(batch)
      print(batch['encoder_target'][0])
      print(batch['decoder_target'][0])
      print(output['prediction'][0])
      break

and got:

StopIteration                             Traceback (most recent call last)
<ipython-input-8-cb65078db993> in <module>
      1 for batch, _ in val_dataloader:
----> 2       output = tft(batch)
      3       print(batch['encoder_target'][0])
      4       print(batch['decoder_target'][0])
      5       print(output['prediction'][0])

~/projects/crypto_trading/crypto-env/lib/python3.8/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
    725             result = self._slow_forward(*input, **kwargs)
    726         else:
--> 727             result = self.forward(*input, **kwargs)
    728         for hook in itertools.chain(
    729                 _global_forward_hooks.values(),

~/projects/crypto_trading/crypto-env/lib/python3.8/site-packages/pytorch_forecasting/models/temporal_fusion_transformer/__init__.py in forward(self, x)
    452             name: input_vectors[name][:, :max_encoder_length] for name in self.encoder_variables
    453         }
--> 454         embeddings_varying_encoder, encoder_sparse_weights = self.encoder_variable_selection(
    455             embeddings_varying_encoder,
    456             static_context_variable_selection[:, :max_encoder_length],

~/projects/crypto_trading/crypto-env/lib/python3.8/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
    725             result = self._slow_forward(*input, **kwargs)
    726         else:
--> 727             result = self.forward(*input, **kwargs)
    728         for hook in itertools.chain(
    729                 _global_forward_hooks.values(),

~/projects/crypto_trading/crypto-env/lib/python3.8/site-packages/pytorch_forecasting/models/temporal_fusion_transformer/sub_modules.py in forward(self, x, context)
    339             outputs = outputs.sum(dim=-1)
    340         else:  # for one input, do not perform variable selection but just encoding
--> 341             name = next(iter(self.single_variable_grns.keys()))
    342             variable_embedding = x[name]
    343             if name in self.prescalers:

StopIteration: 

but my batch looks like:

{'encoder_cat': tensor([], size=(16, 1, 0), dtype=torch.int64),
 'encoder_cont': tensor([], size=(16, 1, 0)),
 'encoder_target': tensor([[7292.9951],
         [7193.5991],
         [7200.1743],
         [6985.4702],
         [7344.8843],
         [7410.6567],
         [7411.3174],
         [7769.2192],
         [8163.6924],
         [8079.8628],
         [7879.0713],
         [8166.5542],
         [8037.5376],
         [8192.4941],
         [8144.1943],
         [8827.7646]]),
 'encoder_lengths': tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]),
 'decoder_cat': tensor([], size=(16, 1, 0), dtype=torch.int64),
 'decoder_cont': tensor([], size=(16, 1, 0)),
 'decoder_target': tensor([[7193.5991],
         [7200.1743],
         [6985.4702],
         [7344.8843],
         [7410.6567],
         [7411.3174],
         [7769.2192],
         [8163.6924],
         [8079.8628],
         [7879.0713],
         [8166.5542],
         [8037.5376],
         [8192.4941],
         [8144.1943],
         [8827.7646],
         [8807.0107]]),
 'decoder_lengths': tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]),
 'decoder_time_idx': tensor([[1931],
         [1932],
         [1933],
         [1934],
         [1935],
         [1936],
         [1937],
         [1938],
         [1939],
         [1940],
         [1941],
         [1942],
         [1943],
         [1944],
         [1945],
         [1946]]),
 'groups': tensor([[0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0]]),
 'target_scale': tensor([[3763.6758, 3957.4490],
         [3763.6758, 3957.4490],
         [3763.6758, 3957.4490],
         [3763.6758, 3957.4490],
         [3763.6758, 3957.4490],
         [3763.6758, 3957.4490],
         [3763.6758, 3957.4490],
         [3763.6758, 3957.4490],
         [3763.6758, 3957.4490],
         [3763.6758, 3957.4490],
         [3763.6758, 3957.4490],
         [3763.6758, 3957.4490],
         [3763.6758, 3957.4490],
         [3763.6758, 3957.4490],
         [3763.6758, 3957.4490],
         [3763.6758, 3957.4490]])}

Any idea why this would occur? I am not sure if my dataloader is in the wrong format?

jdb78 commented 3 years ago

The reason seems to be missing input. encoder_cont and encoder_cat are both empty. I suggest to specify at least the target to be in time_varying_unknown_reals. You probably also want to add, at least, the time_idx there.

kevkid commented 3 years ago

@jdb78 Thank you for that hint. I apologize I am new to time series data. I did want to understand this structure a little further so I will ask you a few questions that will clear things up for me. Why would target be in time_varying_unknown_reals when there is a target parameter when setting up the dataset? Also time_varying_unknown_reals vs time_varying_known_reals in what context would we use one over the other? One final question: Where would my time_index go? I just made a column that for each row, this column increments by 1 and set the time_idx= 'time_idx'.

EDIT: I tried it again with your suggestions and things seem to be working. Although I am curious, does this look like the right approach?

# define dataset
max_encode_length = 36
max_prediction_length = 6
training_cutoff = "2019-12-31"  # day for cutoff

training = TimeSeriesDataSet(
    data[lambda x: x.Date <= training_cutoff],
    time_idx= 'time_idx',
    target= 'Close',
    group_ids=['groups'],
    #min_encoder_length = 1,
    max_encoder_length=max_encode_length,
    max_prediction_length=max_prediction_length,
    allow_missings=True,
    #static_categoricals=[ ... ],
    #static_reals=[ ... ],
    #time_varying_known_categoricals=[ ... ],
    time_varying_known_reals=['time_idx' ],#, 'High', 'Low', 'Volume' ],
    #time_varying_unknown_categoricals=[ ... ],
    time_varying_unknown_reals=[ 'Close','Open'],
)

validation = TimeSeriesDataSet.from_dataset(training, data, min_prediction_idx=training.index.time.max() + 1, stop_randomization=True)
batch_size = 32
train_dataloader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=8)
val_dataloader = validation.to_dataloader(train=False, batch_size=batch_size, num_workers=8)

early_stop_callback = EarlyStopping(monitor="val_loss", min_delta=1e-4, patience=1, verbose=False, mode="min")
lr_logger = LearningRateMonitor()
trainer = pl.Trainer(
    max_epochs=100,
    gpus=None,
    gradient_clip_val=0.1,
    limit_train_batches=30,
    callbacks=[lr_logger, early_stop_callback],
)

tft = TemporalFusionTransformer.from_dataset(
    training,
    learning_rate=0.01,
    hidden_size=16,
    attention_head_size=1,
    dropout=0.1,
    hidden_continuous_size=16,
    output_size=7,
    loss=QuantileLoss(),
    log_interval=2,
    reduce_on_plateau_patience=4
)
tft.cpu()
print(f"Number of parameters in network: {tft.size()/1e3:.1f}k")
jdb78 commented 3 years ago

Yes :) Looks good to me.

paapu88 commented 3 years ago

Thanks for this issue! Sorted out my problems also. Markus

LiuNull commented 2 years ago

Thanks for this issue! Sorted out my problems too.

JustusMzB commented 1 year ago

Using the time id as a time varying known real has led to a special kind of overfitting for me: The TFT starts learning the values to appropriate time id's by heart. Therefore I can't recommend this workaround.