Exception occurs when processing SZ002252_2013-09-25_63107665
Traceback (most recent call last):
File "/data/jelly/src/nlp/event_extraction/Doc2EDAG/dee/dee_task.py", line 238, in get_loss_on_batch
doc_batch_dict, features, use_gold_span=use_gold_span, train_flag=True, teacher_prob=teacher_prob
File "/home/ubuntu/Anaconda/anaconda3/envs/pytorch/lib/python3.6/site-packages/torch/nn/modules/module.py", line 532, in call
result = self.forward(*input, kwargs)
File "/home/ubuntu/Anaconda/anaconda3/envs/pytorch/lib/python3.6/site-packages/torch/nn/parallel/distributed.py", line 447, in forward
output = self.module(*inputs[0], *kwargs[0])
File "/home/ubuntu/Anaconda/anaconda3/envs/pytorch/lib/python3.6/site-packages/torch/nn/modules/module.py", line 532, in call
result = self.forward(input, kwargs)
File "/data/jelly/src/nlp/event_extraction/Doc2EDAG/dee/dee_model.py", line 641, in forward
doc_span_info_list[batch_idx],
File "/data/jelly/src/nlp/event_extraction/Doc2EDAG/dee/dee_model.py", line 407, in get_loss_on_doc
doc_sent_context, batch_span_context, event_idx, field_idx2pre_path2cur_span_idx_set,
File "/data/jelly/src/nlp/event_extraction/Doc2EDAG/dee/dee_model.py", line 329, in get_field_mle_loss_list
event_idx, field_idx, prev_decode_context, batch_span_context
File "/data/jelly/src/nlp/event_extraction/Doc2EDAG/dee/dee_model.py", line 305, in conduct_field_level_reasoning
total_cand_emb = self.field_context_encoder(total_cand_emb, None).squeeze(0)
File "/home/ubuntu/Anaconda/anaconda3/envs/pytorch/lib/python3.6/site-packages/torch/nn/modules/module.py", line 532, in call
result = self.forward(*input, kwargs)
File "/data/jelly/src/nlp/event_extraction/Doc2EDAG/dee/transformer.py", line 86, in forward
x = layer(x, mask)
File "/home/ubuntu/Anaconda/anaconda3/envs/pytorch/lib/python3.6/site-packages/torch/nn/modules/module.py", line 532, in call
result = self.forward(*input, *kwargs)
File "/data/jelly/src/nlp/event_extraction/Doc2EDAG/dee/transformer.py", line 117, in forward
x = self.sublayer[0](x, lambda x: self.self_attn(x, x, x, mask))
File "/home/ubuntu/Anaconda/anaconda3/envs/pytorch/lib/python3.6/site-packages/torch/nn/modules/module.py", line 532, in call
result = self.forward(input, kwargs)
File "/data/jelly/src/nlp/event_extraction/Doc2EDAG/dee/transformer.py", line 102, in forward
return x + self.dropout(sublayer(self.norm(x)))
File "/data/jelly/src/nlp/event_extraction/Doc2EDAG/dee/transformer.py", line 117, in
x = self.sublayer[0](x, lambda x: self.self_attn(x, x, x, mask))
File "/home/ubuntu/Anaconda/anaconda3/envs/pytorch/lib/python3.6/site-packages/torch/nn/modules/module.py", line 532, in call
result = self.forward(*input, *kwargs)
File "/data/jelly/src/nlp/event_extraction/Doc2EDAG/dee/transformer.py", line 195, in forward
for l, x in zip(self.linears, (query, key, value))
File "/data/jelly/src/nlp/event_extraction/Doc2EDAG/dee/transformer.py", line 195, in
for l, x in zip(self.linears, (query, key, value))
File "/home/ubuntu/Anaconda/anaconda3/envs/pytorch/lib/python3.6/site-packages/torch/nn/modules/module.py", line 532, in call
result = self.forward(input, **kwargs)
File "/home/ubuntu/Anaconda/anaconda3/envs/pytorch/lib/python3.6/site-packages/torch/nn/modules/linear.py", line 87, in forward
return F.linear(input, self.weight, self.bias)
File "/home/ubuntu/Anaconda/anaconda3/envs/pytorch/lib/python3.6/site-packages/torch/nn/functional.py", line 1372, in linear
output = input.matmul(weight.t())
RuntimeError: CUDA out of memory. Tried to allocate 2.00 MiB (GPU 0; 10.76 GiB total capacity; 9.36 GiB already allocated; 2.62 MiB free; 9.97 GiB reserved in total by PyTorch)
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "run_dee_task.py", line 69, in
dee_task.train(save_cpt_flag=in_argv.save_cpt_flag)
File "/data/jelly/src/nlp/event_extraction/Doc2EDAG/dee/dee_task.py", line 294, in train
base_epoch_idx=resume_base_epoch,
File "/data/jelly/src/nlp/event_extraction/Doc2EDAG/dee/base_task.py", line 528, in base_train
loss = get_loss_func(self, batch, **kwargs_dict1)
File "/data/jelly/src/nlp/event_extraction/Doc2EDAG/dee/dee_task.py", line 244, in get_loss_on_batch
raise Exception('Cannot get the loss')
Exception: Cannot get the loss
Traceback (most recent call last):
File "/home/ubuntu/Anaconda/anaconda3/envs/pytorch/lib/python3.6/runpy.py", line 193, in _run_module_as_main
"main", mod_spec)
File "/home/ubuntu/Anaconda/anaconda3/envs/pytorch/lib/python3.6/runpy.py", line 85, in _run_code
exec(code, run_globals)
File "/home/ubuntu/Anaconda/anaconda3/envs/pytorch/lib/python3.6/site-packages/torch/distributed/launch.py", line 263, in
main()
File "/home/ubuntu/Anaconda/anaconda3/envs/pytorch/lib/python3.6/site-packages/torch/distributed/launch.py", line 259, in main
cmd=cmd)
subprocess.CalledProcessError: Command '['/home/ubuntu/Anaconda/anaconda3/envs/pytorch/bin/python', '-u', 'run_dee_task.py', '--local_rank=0', '--task_name', 'ee']' returned non-zero exit status 1.
Exception occurs when processing SZ002252_2013-09-25_63107665 Traceback (most recent call last): File "/data/jelly/src/nlp/event_extraction/Doc2EDAG/dee/dee_task.py", line 238, in get_loss_on_batch doc_batch_dict, features, use_gold_span=use_gold_span, train_flag=True, teacher_prob=teacher_prob File "/home/ubuntu/Anaconda/anaconda3/envs/pytorch/lib/python3.6/site-packages/torch/nn/modules/module.py", line 532, in call result = self.forward(*input, kwargs) File "/home/ubuntu/Anaconda/anaconda3/envs/pytorch/lib/python3.6/site-packages/torch/nn/parallel/distributed.py", line 447, in forward output = self.module(*inputs[0], *kwargs[0]) File "/home/ubuntu/Anaconda/anaconda3/envs/pytorch/lib/python3.6/site-packages/torch/nn/modules/module.py", line 532, in call result = self.forward(input, kwargs) File "/data/jelly/src/nlp/event_extraction/Doc2EDAG/dee/dee_model.py", line 641, in forward doc_span_info_list[batch_idx], File "/data/jelly/src/nlp/event_extraction/Doc2EDAG/dee/dee_model.py", line 407, in get_loss_on_doc doc_sent_context, batch_span_context, event_idx, field_idx2pre_path2cur_span_idx_set, File "/data/jelly/src/nlp/event_extraction/Doc2EDAG/dee/dee_model.py", line 329, in get_field_mle_loss_list event_idx, field_idx, prev_decode_context, batch_span_context File "/data/jelly/src/nlp/event_extraction/Doc2EDAG/dee/dee_model.py", line 305, in conduct_field_level_reasoning total_cand_emb = self.field_context_encoder(total_cand_emb, None).squeeze(0) File "/home/ubuntu/Anaconda/anaconda3/envs/pytorch/lib/python3.6/site-packages/torch/nn/modules/module.py", line 532, in call result = self.forward(*input, kwargs) File "/data/jelly/src/nlp/event_extraction/Doc2EDAG/dee/transformer.py", line 86, in forward x = layer(x, mask) File "/home/ubuntu/Anaconda/anaconda3/envs/pytorch/lib/python3.6/site-packages/torch/nn/modules/module.py", line 532, in call result = self.forward(*input, *kwargs) File "/data/jelly/src/nlp/event_extraction/Doc2EDAG/dee/transformer.py", line 117, in forward x = self.sublayer[0](x, lambda x: self.self_attn(x, x, x, mask)) File "/home/ubuntu/Anaconda/anaconda3/envs/pytorch/lib/python3.6/site-packages/torch/nn/modules/module.py", line 532, in call result = self.forward(input, kwargs) File "/data/jelly/src/nlp/event_extraction/Doc2EDAG/dee/transformer.py", line 102, in forward return x + self.dropout(sublayer(self.norm(x))) File "/data/jelly/src/nlp/event_extraction/Doc2EDAG/dee/transformer.py", line 117, in
x = self.sublayer[0](x, lambda x: self.self_attn(x, x, x, mask))
File "/home/ubuntu/Anaconda/anaconda3/envs/pytorch/lib/python3.6/site-packages/torch/nn/modules/module.py", line 532, in call
result = self.forward(*input, *kwargs)
File "/data/jelly/src/nlp/event_extraction/Doc2EDAG/dee/transformer.py", line 195, in forward
for l, x in zip(self.linears, (query, key, value))
File "/data/jelly/src/nlp/event_extraction/Doc2EDAG/dee/transformer.py", line 195, in
for l, x in zip(self.linears, (query, key, value))
File "/home/ubuntu/Anaconda/anaconda3/envs/pytorch/lib/python3.6/site-packages/torch/nn/modules/module.py", line 532, in call
result = self.forward( input, **kwargs)
File "/home/ubuntu/Anaconda/anaconda3/envs/pytorch/lib/python3.6/site-packages/torch/nn/modules/linear.py", line 87, in forward
return F.linear(input, self.weight, self.bias)
File "/home/ubuntu/Anaconda/anaconda3/envs/pytorch/lib/python3.6/site-packages/torch/nn/functional.py", line 1372, in linear
output = input.matmul(weight.t())
RuntimeError: CUDA out of memory. Tried to allocate 2.00 MiB (GPU 0; 10.76 GiB total capacity; 9.36 GiB already allocated; 2.62 MiB free; 9.97 GiB reserved in total by PyTorch)
During handling of the above exception, another exception occurred:
Traceback (most recent call last): File "run_dee_task.py", line 69, in
dee_task.train(save_cpt_flag=in_argv.save_cpt_flag)
File "/data/jelly/src/nlp/event_extraction/Doc2EDAG/dee/dee_task.py", line 294, in train
base_epoch_idx=resume_base_epoch,
File "/data/jelly/src/nlp/event_extraction/Doc2EDAG/dee/base_task.py", line 528, in base_train
loss = get_loss_func(self, batch, **kwargs_dict1)
File "/data/jelly/src/nlp/event_extraction/Doc2EDAG/dee/dee_task.py", line 244, in get_loss_on_batch
raise Exception('Cannot get the loss')
Exception: Cannot get the loss
Traceback (most recent call last): File "/home/ubuntu/Anaconda/anaconda3/envs/pytorch/lib/python3.6/runpy.py", line 193, in _run_module_as_main "main", mod_spec) File "/home/ubuntu/Anaconda/anaconda3/envs/pytorch/lib/python3.6/runpy.py", line 85, in _run_code exec(code, run_globals) File "/home/ubuntu/Anaconda/anaconda3/envs/pytorch/lib/python3.6/site-packages/torch/distributed/launch.py", line 263, in
main()
File "/home/ubuntu/Anaconda/anaconda3/envs/pytorch/lib/python3.6/site-packages/torch/distributed/launch.py", line 259, in main
cmd=cmd)
subprocess.CalledProcessError: Command '['/home/ubuntu/Anaconda/anaconda3/envs/pytorch/bin/python', '-u', 'run_dee_task.py', '--local_rank=0', '--task_name', 'ee']' returned non-zero exit status 1.