awslabs / gluonts

Probabilistic time series modeling in Python
https://ts.gluon.ai
Apache License 2.0
4.41k stars 740 forks source link

CategoricalOutput not working in DeepAREstimator #3201

Open DushyantSahoo opened 1 week ago

DushyantSahoo commented 1 week ago

Description

I am trying to predict discrete integer values between 1 and 22. I started with Gaussian distribution but the predictions were way out of range (1 and 22). Next, I moved to NegativeBinomial but the model is having hard time converging and predictions are off. I am trying to use CategoricalOutput but there seems to be some bug in there.

estimator = DeepAREstimator(
                                 prediction_length=12,
                                 context_length = context_length,
                                 freq="M",
    num_layers=4,
    scaling = True,
    use_feat_dynamic_real=True,
    lags_seq = [1,2,3,6,12,24],
    dropout_rate = 0.2,
    distr_output = CategoricalOutput(num_cats=22),
    num_cells = 30,
    batch_size = batch_size,

    trainer=Trainer(ctx="cpu",epochs=100, learning_rate = 1e-3))

Error message

File /opt/omniai/work/instance1/jupyter/ssm-env/lib/python3.8/site-packages/gluonts/mx/model/estimator.py:239, in GluonEstimator.train(self, training_data, validation_data, shuffle_buffer_length, cache_data, kwargs) 231 def train( 232 self, 233 training_data: Dataset, (...) 237 kwargs, 238 ) -> Predictor: --> 239 return self.train_model( 240 training_data=training_data, 241 validation_data=validation_data, 242 shuffle_buffer_length=shuffle_buffer_length, 243 cache_data=cache_data, 244 ).predictor

File /opt/omniai/work/instance1/jupyter/ssm-env/lib/python3.8/site-packages/gluonts/mx/model/estimator.py:216, in GluonEstimator.train_model(self, training_data, validation_data, from_predictor, shuffle_buffer_length, cache_data) 213 else: 214 copy_parameters(from_predictor.network, training_network) --> 216 self.trainer( 217 net=training_network, 218 train_iter=training_data_loader, 219 validation_iter=validation_data_loader, 220 ) 222 with self.trainer.ctx: 223 predictor = self.create_predictor(transformation, training_network)

File /opt/omniai/work/instance1/jupyter/ssm-env/lib/python3.8/site-packages/gluonts/mx/trainer/_base.py:420, in Trainer.call(self, net, train_iter, validation_iter) 415 curr_lr = trainer.learning_rate 416 logger.info( 417 f"Epoch[{epoch_no}] Learning rate is {curr_lr}" 418 ) --> 420 epoch_loss = loop( 421 epoch_no, 422 train_iter, 423 num_batches_to_use=self.num_batches_per_epoch, 424 ) 426 should_continue = self.callbacks.on_train_epoch_end( 427 epoch_no=epoch_no, 428 epoch_loss=loss_value(epoch_loss), 429 training_network=net, 430 trainer=trainer, 431 ) 433 if is_validation_available:

File /opt/omniai/work/instance1/jupyter/ssm-env/lib/python3.8/site-packages/gluonts/mx/trainer/_base.py:286, in Trainer.call..loop(epoch_no, batch_iter, num_batches_to_use, is_training) 284 if first_forward: 285 firstforward = False --> 286 = net(*batch.values()) 288 self.callbacks.on_network_initializing_end( 289 training_network=net 290 ) 292 # Call the batch start callback as the model was not 293 # compiled before

File /opt/omniai/work/instance1/jupyter/ssm-env/lib/python3.8/site-packages/mxnet/gluon/block.py:825, in Block.call(self, args) 822 for hook in self._forward_pre_hooks.values(): 823 hook(self, args) --> 825 out = self.forward(args) 827 for hook in self._forward_hooks.values(): 828 hook(self, args, out)

File /opt/omniai/work/instance1/jupyter/ssm-env/lib/python3.8/site-packages/mxnet/gluon/block.py:1492, in HybridBlock.forward(self, x, args) 1486 raise ValueError('Find multiple contexts in the input, ' 1487 'After hybridized, the HybridBlock only supports one input ' 1488 'context. You can print the ele.ctx in the ' 1489 'input arguments to inspect their contexts. ' 1490 'Find all contexts = {}'.format(ctx_set)) 1491 with ctx: -> 1492 return self._call_cached_op(x, args) 1493 with ctx: 1494 try:

File /opt/omniai/work/instance1/jupyter/ssm-env/lib/python3.8/site-packages/mxnet/gluon/block.py:1200, in HybridBlock._call_cached_op(self, args) 1198 def _call_cached_op(self, args): 1199 if self._cached_op is None: -> 1200 self._build_cache(*args) 1201 assert self._cached_op, "Gluon failed to build the cache. " \ 1202 "This should never happen. " \ 1203 "Please submit an issue on Github" \ 1204 " https://github.com/apache/incubator-mxnet." 1205 if self._callback:

File /opt/omniai/work/instance1/jupyter/ssm-env/lib/python3.8/site-packages/mxnet/gluon/block.py:1068, in HybridBlock._build_cache(self, args) 1067 def _build_cache(self, args): -> 1068 data, out = self._get_graph(*args) 1069 data_names = {data.name: i for i, data in enumerate(data)} 1070 input_names = out.list_inputs()

File /opt/omniai/work/instance1/jupyter/ssm-env/lib/python3.8/site-packages/mxnet/gluon/block.py:1060, in HybridBlock._get_graph(self, args) 1058 params = {i: j.var() for i, j in self._reg_params.items()} 1059 with self.name_scope(): -> 1060 out = self.hybrid_forward(symbol, grouped_inputs, **params) # pylint: disable=no-value-for-parameter 1061 out, self._out_format = _flatten(out, "output") 1063 self._cached_graph = symbol_inputs, symbol.Group(out, _check_same_symbol_type(out))

File /opt/omniai/work/instance1/jupyter/ssm-env/lib/python3.8/site-packages/gluonts/mx/model/deepar/_network.py:928, in DeepARTrainingNetwork.hybrid_forward(self, F, feat_static_cat, feat_static_real, past_time_feat, past_target, past_observed_values, past_is_pad, future_time_feat, future_target, future_observed_values) 894 def hybrid_forward( 895 self, 896 F, (...) 905 future_observed_values: Tensor, 906 ) -> Tensor: 907 """ 908 Computes the loss for training DeepAR, all inputs tensors representing 909 time series have NTC layout. (...) 925 ------- 926 """ --> 928 outputs = self.distribution( 929 feat_static_cat=feat_static_cat, 930 feat_static_real=feat_static_real, 931 past_time_feat=past_time_feat, 932 past_target=past_target, 933 past_observed_values=past_observed_values, 934 past_is_pad=past_is_pad, 935 future_time_feat=future_time_feat, 936 future_target=future_target, 937 future_observed_values=future_observed_values, 938 return_rnn_outputs=True, 939 ) 940 # since return_rnn_outputs=True, assert: 941 assert isinstance(outputs, tuple)

File /opt/omniai/work/instance1/jupyter/ssm-env/lib/python3.8/site-packages/gluonts/mx/model/deepar/_network.py:887, in DeepARTrainingNetwork.distribution(self, feat_static_cat, feat_static_real, past_time_feat, past_target, past_observed_values, past_is_pad, future_time_feat, future_target, future_observed_values, return_rnn_outputs) 880 distr_args = self.proj_distr_args(rnn_outputs) 882 # return the output of rnn layers if return_rnn_outputs=True, so that 883 # it can be used for regularization later assume no dropout for 884 # outputs, so can be directly used for activation regularization 885 return ( 886 ( --> 887 self.distr_output.distribution(distr_args, scale=scale), 888 rnn_outputs, 889 ) 890 if return_rnn_outputs 891 else self.distr_output.distribution(distr_args, scale=scale) 892 )

File /opt/omniai/work/instance1/jupyter/ssm-env/lib/python3.8/site-packages/gluonts/mx/distribution/categorical.py:120, in CategoricalOutput.distribution(self, distr_args, loc, scale, kwargs) 117 def distribution( 118 self, distr_args, loc=None, scale=None, kwargs 119 ) -> Distribution: --> 120 distr = Categorical(distr_args) 121 return distr

File /opt/omniai/work/instance1/jupyter/ssm-env/lib/python3.8/site-packages/gluonts/core/component.py:364, in validated..validator..init_wrapper(*args, kwargs) 361 self.class.__getnewargs_ex = validated_getnewargs_ex 362 self.class.repr__ = validated_repr --> 364 return init(self, all_args)

File /opt/omniai/work/instance1/jupyter/ssm-env/lib/python3.8/site-packages/gluonts/mx/distribution/categorical.py:42, in Categorical.init(self, log_probs) 40 super().init() 41 self.log_probs = log_probs ---> 42 self.num_cats = self.log_probs.shape[-1] 43 self.cats = self.F.arange(self.num_cats) 44 self._probs = None

AttributeError: 'Symbol' object has no attribute 'shape'

Environment