I am trying to predict discrete integer values between 1 and 22. I started with Gaussian distribution but the predictions were way out of range (1 and 22). Next, I moved to NegativeBinomial but the model is having hard time converging and predictions are off. I am trying to use CategoricalOutput but there seems to be some bug in there.
File /opt/omniai/work/instance1/jupyter/ssm-env/lib/python3.8/site-packages/gluonts/mx/trainer/_base.py:286, in Trainer.call..loop(epoch_no, batch_iter, num_batches_to_use, is_training)
284 if first_forward:
285 firstforward = False
--> 286 = net(*batch.values())
288 self.callbacks.on_network_initializing_end(
289 training_network=net
290 )
292 # Call the batch start callback as the model was not
293 # compiled before
File /opt/omniai/work/instance1/jupyter/ssm-env/lib/python3.8/site-packages/mxnet/gluon/block.py:825, in Block.call(self, args)
822 for hook in self._forward_pre_hooks.values():
823 hook(self, args)
--> 825 out = self.forward(args)
827 for hook in self._forward_hooks.values():
828 hook(self, args, out)
File /opt/omniai/work/instance1/jupyter/ssm-env/lib/python3.8/site-packages/mxnet/gluon/block.py:1492, in HybridBlock.forward(self, x, args)
1486 raise ValueError('Find multiple contexts in the input, '
1487 'After hybridized, the HybridBlock only supports one input '
1488 'context. You can print the ele.ctx in the '
1489 'input arguments to inspect their contexts. '
1490 'Find all contexts = {}'.format(ctx_set))
1491 with ctx:
-> 1492 return self._call_cached_op(x, args)
1493 with ctx:
1494 try:
File /opt/omniai/work/instance1/jupyter/ssm-env/lib/python3.8/site-packages/mxnet/gluon/block.py:1200, in HybridBlock._call_cached_op(self, args)
1198 def _call_cached_op(self, args):
1199 if self._cached_op is None:
-> 1200 self._build_cache(*args)
1201 assert self._cached_op, "Gluon failed to build the cache. " \
1202 "This should never happen. " \
1203 "Please submit an issue on Github" \
1204 " https://github.com/apache/incubator-mxnet."
1205 if self._callback:
File /opt/omniai/work/instance1/jupyter/ssm-env/lib/python3.8/site-packages/mxnet/gluon/block.py:1068, in HybridBlock._build_cache(self, args)
1067 def _build_cache(self, args):
-> 1068 data, out = self._get_graph(*args)
1069 data_names = {data.name: i for i, data in enumerate(data)}
1070 input_names = out.list_inputs()
File /opt/omniai/work/instance1/jupyter/ssm-env/lib/python3.8/site-packages/mxnet/gluon/block.py:1060, in HybridBlock._get_graph(self, args)
1058 params = {i: j.var() for i, j in self._reg_params.items()}
1059 with self.name_scope():
-> 1060 out = self.hybrid_forward(symbol, grouped_inputs, **params) # pylint: disable=no-value-for-parameter
1061 out, self._out_format = _flatten(out, "output")
1063 self._cached_graph = symbol_inputs, symbol.Group(out, _check_same_symbol_type(out))
File /opt/omniai/work/instance1/jupyter/ssm-env/lib/python3.8/site-packages/gluonts/mx/model/deepar/_network.py:928, in DeepARTrainingNetwork.hybrid_forward(self, F, feat_static_cat, feat_static_real, past_time_feat, past_target, past_observed_values, past_is_pad, future_time_feat, future_target, future_observed_values)
894 def hybrid_forward(
895 self,
896 F,
(...)
905 future_observed_values: Tensor,
906 ) -> Tensor:
907 """
908 Computes the loss for training DeepAR, all inputs tensors representing
909 time series have NTC layout.
(...)
925 -------
926 """
--> 928 outputs = self.distribution(
929 feat_static_cat=feat_static_cat,
930 feat_static_real=feat_static_real,
931 past_time_feat=past_time_feat,
932 past_target=past_target,
933 past_observed_values=past_observed_values,
934 past_is_pad=past_is_pad,
935 future_time_feat=future_time_feat,
936 future_target=future_target,
937 future_observed_values=future_observed_values,
938 return_rnn_outputs=True,
939 )
940 # since return_rnn_outputs=True, assert:
941 assert isinstance(outputs, tuple)
File /opt/omniai/work/instance1/jupyter/ssm-env/lib/python3.8/site-packages/gluonts/mx/model/deepar/_network.py:887, in DeepARTrainingNetwork.distribution(self, feat_static_cat, feat_static_real, past_time_feat, past_target, past_observed_values, past_is_pad, future_time_feat, future_target, future_observed_values, return_rnn_outputs)
880 distr_args = self.proj_distr_args(rnn_outputs)
882 # return the output of rnn layers if return_rnn_outputs=True, so that
883 # it can be used for regularization later assume no dropout for
884 # outputs, so can be directly used for activation regularization
885 return (
886 (
--> 887 self.distr_output.distribution(distr_args, scale=scale),
888 rnn_outputs,
889 )
890 if return_rnn_outputs
891 else self.distr_output.distribution(distr_args, scale=scale)
892 )
Description
I am trying to predict discrete integer values between 1 and 22. I started with Gaussian distribution but the predictions were way out of range (1 and 22). Next, I moved to NegativeBinomial but the model is having hard time converging and predictions are off. I am trying to use CategoricalOutput but there seems to be some bug in there.
Error message
File /opt/omniai/work/instance1/jupyter/ssm-env/lib/python3.8/site-packages/gluonts/mx/model/estimator.py:239, in GluonEstimator.train(self, training_data, validation_data, shuffle_buffer_length, cache_data, kwargs) 231 def train( 232 self, 233 training_data: Dataset, (...) 237 kwargs, 238 ) -> Predictor: --> 239 return self.train_model( 240 training_data=training_data, 241 validation_data=validation_data, 242 shuffle_buffer_length=shuffle_buffer_length, 243 cache_data=cache_data, 244 ).predictor
File /opt/omniai/work/instance1/jupyter/ssm-env/lib/python3.8/site-packages/gluonts/mx/model/estimator.py:216, in GluonEstimator.train_model(self, training_data, validation_data, from_predictor, shuffle_buffer_length, cache_data) 213 else: 214 copy_parameters(from_predictor.network, training_network) --> 216 self.trainer( 217 net=training_network, 218 train_iter=training_data_loader, 219 validation_iter=validation_data_loader, 220 ) 222 with self.trainer.ctx: 223 predictor = self.create_predictor(transformation, training_network)
File /opt/omniai/work/instance1/jupyter/ssm-env/lib/python3.8/site-packages/gluonts/mx/trainer/_base.py:420, in Trainer.call(self, net, train_iter, validation_iter) 415 curr_lr = trainer.learning_rate 416 logger.info( 417 f"Epoch[{epoch_no}] Learning rate is {curr_lr}" 418 ) --> 420 epoch_loss = loop( 421 epoch_no, 422 train_iter, 423 num_batches_to_use=self.num_batches_per_epoch, 424 ) 426 should_continue = self.callbacks.on_train_epoch_end( 427 epoch_no=epoch_no, 428 epoch_loss=loss_value(epoch_loss), 429 training_network=net, 430 trainer=trainer, 431 ) 433 if is_validation_available:
File /opt/omniai/work/instance1/jupyter/ssm-env/lib/python3.8/site-packages/gluonts/mx/trainer/_base.py:286, in Trainer.call..loop(epoch_no, batch_iter, num_batches_to_use, is_training)
284 if first_forward:
285 firstforward = False
--> 286 = net(*batch.values())
288 self.callbacks.on_network_initializing_end(
289 training_network=net
290 )
292 # Call the batch start callback as the model was not
293 # compiled before
File /opt/omniai/work/instance1/jupyter/ssm-env/lib/python3.8/site-packages/mxnet/gluon/block.py:825, in Block.call(self, args) 822 for hook in self._forward_pre_hooks.values(): 823 hook(self, args) --> 825 out = self.forward(args) 827 for hook in self._forward_hooks.values(): 828 hook(self, args, out)
File /opt/omniai/work/instance1/jupyter/ssm-env/lib/python3.8/site-packages/mxnet/gluon/block.py:1492, in HybridBlock.forward(self, x, args) 1486 raise ValueError('Find multiple contexts in the input, ' 1487 'After hybridized, the HybridBlock only supports one input ' 1488 'context. You can print the ele.ctx in the ' 1489 'input arguments to inspect their contexts. ' 1490 'Find all contexts = {}'.format(ctx_set)) 1491 with ctx: -> 1492 return self._call_cached_op(x, args) 1493 with ctx: 1494 try:
File /opt/omniai/work/instance1/jupyter/ssm-env/lib/python3.8/site-packages/mxnet/gluon/block.py:1200, in HybridBlock._call_cached_op(self, args) 1198 def _call_cached_op(self, args): 1199 if self._cached_op is None: -> 1200 self._build_cache(*args) 1201 assert self._cached_op, "Gluon failed to build the cache. " \ 1202 "This should never happen. " \ 1203 "Please submit an issue on Github" \ 1204 " https://github.com/apache/incubator-mxnet." 1205 if self._callback:
File /opt/omniai/work/instance1/jupyter/ssm-env/lib/python3.8/site-packages/mxnet/gluon/block.py:1068, in HybridBlock._build_cache(self, args) 1067 def _build_cache(self, args): -> 1068 data, out = self._get_graph(*args) 1069 data_names = {data.name: i for i, data in enumerate(data)} 1070 input_names = out.list_inputs()
File /opt/omniai/work/instance1/jupyter/ssm-env/lib/python3.8/site-packages/mxnet/gluon/block.py:1060, in HybridBlock._get_graph(self, args) 1058 params = {i: j.var() for i, j in self._reg_params.items()} 1059 with self.name_scope(): -> 1060 out = self.hybrid_forward(symbol, grouped_inputs, **params) # pylint: disable=no-value-for-parameter 1061 out, self._out_format = _flatten(out, "output") 1063 self._cached_graph = symbol_inputs, symbol.Group(out, _check_same_symbol_type(out))
File /opt/omniai/work/instance1/jupyter/ssm-env/lib/python3.8/site-packages/gluonts/mx/model/deepar/_network.py:928, in DeepARTrainingNetwork.hybrid_forward(self, F, feat_static_cat, feat_static_real, past_time_feat, past_target, past_observed_values, past_is_pad, future_time_feat, future_target, future_observed_values) 894 def hybrid_forward( 895 self, 896 F, (...) 905 future_observed_values: Tensor, 906 ) -> Tensor: 907 """ 908 Computes the loss for training DeepAR, all inputs tensors representing 909 time series have NTC layout. (...) 925 ------- 926 """ --> 928 outputs = self.distribution( 929 feat_static_cat=feat_static_cat, 930 feat_static_real=feat_static_real, 931 past_time_feat=past_time_feat, 932 past_target=past_target, 933 past_observed_values=past_observed_values, 934 past_is_pad=past_is_pad, 935 future_time_feat=future_time_feat, 936 future_target=future_target, 937 future_observed_values=future_observed_values, 938 return_rnn_outputs=True, 939 ) 940 # since return_rnn_outputs=True, assert: 941 assert isinstance(outputs, tuple)
File /opt/omniai/work/instance1/jupyter/ssm-env/lib/python3.8/site-packages/gluonts/mx/model/deepar/_network.py:887, in DeepARTrainingNetwork.distribution(self, feat_static_cat, feat_static_real, past_time_feat, past_target, past_observed_values, past_is_pad, future_time_feat, future_target, future_observed_values, return_rnn_outputs) 880 distr_args = self.proj_distr_args(rnn_outputs) 882 # return the output of rnn layers if return_rnn_outputs=True, so that 883 # it can be used for regularization later assume no dropout for 884 # outputs, so can be directly used for activation regularization 885 return ( 886 ( --> 887 self.distr_output.distribution(distr_args, scale=scale), 888 rnn_outputs, 889 ) 890 if return_rnn_outputs 891 else self.distr_output.distribution(distr_args, scale=scale) 892 )
File /opt/omniai/work/instance1/jupyter/ssm-env/lib/python3.8/site-packages/gluonts/mx/distribution/categorical.py:120, in CategoricalOutput.distribution(self, distr_args, loc, scale, kwargs) 117 def distribution( 118 self, distr_args, loc=None, scale=None, kwargs 119 ) -> Distribution: --> 120 distr = Categorical(distr_args) 121 return distr
File /opt/omniai/work/instance1/jupyter/ssm-env/lib/python3.8/site-packages/gluonts/core/component.py:364, in validated..validator..init_wrapper(*args, kwargs)
361 self.class.__getnewargs_ex = validated_getnewargs_ex
362 self.class.repr__ = validated_repr
--> 364 return init(self, all_args)
File /opt/omniai/work/instance1/jupyter/ssm-env/lib/python3.8/site-packages/gluonts/mx/distribution/categorical.py:42, in Categorical.init(self, log_probs) 40 super().init() 41 self.log_probs = log_probs ---> 42 self.num_cats = self.log_probs.shape[-1] 43 self.cats = self.F.arange(self.num_cats) 44 self._probs = None
AttributeError: 'Symbol' object has no attribute 'shape'
Environment