here is the error I receive:
0%| | 0/1 [00:00<?, ?it/s]2017-10-21 18:46:13,638 - INFO - allennlp.training.trainer - Training Traceback (most recent call last): File "/informatik2/wtm/home/lakomkin/anaconda3/envs/summarization/lib/python3.6/runpy.py", line 193, in _run_module_as_main "__main__", mod_spec) File "/informatik2/wtm/home/lakomkin/anaconda3/envs/summarization/lib/python3.6/runpy.py", line 85, in _run_code exec(code, run_globals) File "/informatik2/wtm/home/lakomkin/distr/allennlp/allennlp/run.py", line 13, in <module> main(prog="python -m allennlp.run") File "/informatik2/wtm/home/lakomkin/distr/allennlp/allennlp/commands/__init__.py", line 61, in main args.func(args) File "/informatik2/wtm/home/lakomkin/distr/allennlp/allennlp/commands/train.py", line 63, in _train_model_from_args train_model_from_file(args.param_path, args.serialization_dir) File "/informatik2/wtm/home/lakomkin/distr/allennlp/allennlp/commands/train.py", line 83, in train_model_from_file return train_model(params, serialization_dir) File "/informatik2/wtm/home/lakomkin/distr/allennlp/allennlp/commands/train.py", line 168, in train_model trainer.train() File "/informatik2/wtm/home/lakomkin/distr/allennlp/allennlp/training/trainer.py", line 370, in train train_metrics = self._train_epoch(epoch) File "/informatik2/wtm/home/lakomkin/distr/allennlp/allennlp/training/trainer.py", line 221, in _train_epoch loss = self._batch_loss(batch, for_training=True) File "/informatik2/wtm/home/lakomkin/distr/allennlp/allennlp/training/trainer.py", line 176, in _batch_loss output_dict = self._forward(batch, for_training=for_training) File "/informatik2/wtm/home/lakomkin/distr/allennlp/allennlp/training/trainer.py", line 411, in _forward return self._model.forward(**tensor_batch) File "/informatik2/wtm/home/lakomkin/distr/allennlp/allennlp/models/encoder_decoders/simple_seq2seq.py", line 159, in forward (decoder_hidden, decoder_context)) File "/informatik2/wtm/home/lakomkin/anaconda3/envs/summarization/lib/python3.6/site-packages/torch/nn/modules/module.py", line 224, in __call__ result = self.forward(*input, **kwargs) File "/informatik2/wtm/home/lakomkin/anaconda3/envs/summarization/lib/python3.6/site-packages/torch/nn/modules/rnn.py", line 608, in forward self.bias_ih, self.bias_hh, File "/informatik2/wtm/home/lakomkin/anaconda3/envs/summarization/lib/python3.6/site-packages/torch/nn/_functions/rnn.py", line 28, in LSTMCell return state(igates, hgates, hidden[1]) if b_ih is None else state(igates, hgates, hidden[1], b_ih, b_hh) File "/informatik2/wtm/home/lakomkin/anaconda3/envs/summarization/lib/python3.6/site-packages/torch/nn/_functions/thnn/rnnFusedPointwise.py", line 78, in forward cx, hy, cy) TypeError: CudaLSTMFused_updateOutput received an invalid combination of arguments - got (int, torch.cuda.FloatTensor, torch.cuda.FloatTensor, torch.cuda.FloatTensor, torch.cuda.FloatTensor, torch.FloatTensor, torch.cuda.FloatTensor, torch.cuda.FloatTensor), but expected (int state, torch.cuda.FloatTensor input, torch.cuda.FloatTensor hidden, [torch.cuda.FloatTensor bias1 or None], [torch.cuda.FloatTensor bias2 or None], torch.cuda.FloatTensor cx, torch.cuda.FloatTensor hy, torch.cuda.FloatTensor cy)
Maybe the problem is that LSTM's hidden state is not moved to cuda, but I am not sure.
I tried to run simple_seq2seq experiment and everything is working fine unless I set cude_device to 0 from -1:
here is the error I receive:
0%| | 0/1 [00:00<?, ?it/s]2017-10-21 18:46:13,638 - INFO - allennlp.training.trainer - Training Traceback (most recent call last): File "/informatik2/wtm/home/lakomkin/anaconda3/envs/summarization/lib/python3.6/runpy.py", line 193, in _run_module_as_main "__main__", mod_spec) File "/informatik2/wtm/home/lakomkin/anaconda3/envs/summarization/lib/python3.6/runpy.py", line 85, in _run_code exec(code, run_globals) File "/informatik2/wtm/home/lakomkin/distr/allennlp/allennlp/run.py", line 13, in <module> main(prog="python -m allennlp.run") File "/informatik2/wtm/home/lakomkin/distr/allennlp/allennlp/commands/__init__.py", line 61, in main args.func(args) File "/informatik2/wtm/home/lakomkin/distr/allennlp/allennlp/commands/train.py", line 63, in _train_model_from_args train_model_from_file(args.param_path, args.serialization_dir) File "/informatik2/wtm/home/lakomkin/distr/allennlp/allennlp/commands/train.py", line 83, in train_model_from_file return train_model(params, serialization_dir) File "/informatik2/wtm/home/lakomkin/distr/allennlp/allennlp/commands/train.py", line 168, in train_model trainer.train() File "/informatik2/wtm/home/lakomkin/distr/allennlp/allennlp/training/trainer.py", line 370, in train train_metrics = self._train_epoch(epoch) File "/informatik2/wtm/home/lakomkin/distr/allennlp/allennlp/training/trainer.py", line 221, in _train_epoch loss = self._batch_loss(batch, for_training=True) File "/informatik2/wtm/home/lakomkin/distr/allennlp/allennlp/training/trainer.py", line 176, in _batch_loss output_dict = self._forward(batch, for_training=for_training) File "/informatik2/wtm/home/lakomkin/distr/allennlp/allennlp/training/trainer.py", line 411, in _forward return self._model.forward(**tensor_batch) File "/informatik2/wtm/home/lakomkin/distr/allennlp/allennlp/models/encoder_decoders/simple_seq2seq.py", line 159, in forward (decoder_hidden, decoder_context)) File "/informatik2/wtm/home/lakomkin/anaconda3/envs/summarization/lib/python3.6/site-packages/torch/nn/modules/module.py", line 224, in __call__ result = self.forward(*input, **kwargs) File "/informatik2/wtm/home/lakomkin/anaconda3/envs/summarization/lib/python3.6/site-packages/torch/nn/modules/rnn.py", line 608, in forward self.bias_ih, self.bias_hh, File "/informatik2/wtm/home/lakomkin/anaconda3/envs/summarization/lib/python3.6/site-packages/torch/nn/_functions/rnn.py", line 28, in LSTMCell return state(igates, hgates, hidden[1]) if b_ih is None else state(igates, hgates, hidden[1], b_ih, b_hh) File "/informatik2/wtm/home/lakomkin/anaconda3/envs/summarization/lib/python3.6/site-packages/torch/nn/_functions/thnn/rnnFusedPointwise.py", line 78, in forward cx, hy, cy) TypeError: CudaLSTMFused_updateOutput received an invalid combination of arguments - got (int, torch.cuda.FloatTensor, torch.cuda.FloatTensor, torch.cuda.FloatTensor, torch.cuda.FloatTensor, torch.FloatTensor, torch.cuda.FloatTensor, torch.cuda.FloatTensor), but expected (int state, torch.cuda.FloatTensor input, torch.cuda.FloatTensor hidden, [torch.cuda.FloatTensor bias1 or None], [torch.cuda.FloatTensor bias2 or None], torch.cuda.FloatTensor cx, torch.cuda.FloatTensor hy, torch.cuda.FloatTensor cy)
Maybe the problem is that LSTM's hidden state is not moved to cuda, but I am not sure.