Closed minji-o-j closed 2 years ago
RuntimeError Traceback (most recent call last)
torch.tensor(index_of_words).to(device) to(device)과정에서 에러 발생
RuntimeError: cuda runtime error (710) : device-side assert triggered at /pytorch/aten/src/THC/THCCachingHostAllocator.cpp:278
CPU로 돌려보니 자세한 에러가 나왔다.
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
/tmp/ipykernel_24681/686626782.py in <module>
45 for i, data in enumerate(train_loader):
46 optimizer.zero_grad()
---> 47 outputs = model(data, labels=data)
48 _, logits = outputs[:2]
49
/opt/conda/envs/lightweight/lib/python3.7/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
720 result = self._slow_forward(*input, **kwargs)
721 else:
--> 722 result = self.forward(*input, **kwargs)
723 for hook in itertools.chain(
724 _global_forward_hooks.values(),
/tmp/ipykernel_24681/3889550879.py in forward(self, input, labels)
38 def forward(self, input, labels = None):
39 if labels is not None:
---> 40 outputs = self.kogpt2(input, labels=labels)
41 else:
42 outputs = self.kogpt2(input)
/opt/conda/envs/lightweight/lib/python3.7/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
720 result = self._slow_forward(*input, **kwargs)
721 else:
--> 722 result = self.forward(*input, **kwargs)
723 for hook in itertools.chain(
724 _global_forward_hooks.values(),
/opt/conda/envs/lightweight/lib/python3.7/site-packages/transformers/models/gpt2/modeling_gpt2.py in forward(self, input_ids, past_key_values, attention_mask, token_type_ids, position_ids, head_mask, inputs_embeds, encoder_hidden_states, encoder_attention_mask, labels, use_cache, output_attentions, output_hidden_states, return_dict)
1053 output_attentions=output_attentions,
1054 output_hidden_states=output_hidden_states,
-> 1055 return_dict=return_dict,
1056 )
1057 hidden_states = transformer_outputs[0]
/opt/conda/envs/lightweight/lib/python3.7/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
720 result = self._slow_forward(*input, **kwargs)
721 else:
--> 722 result = self.forward(*input, **kwargs)
723 for hook in itertools.chain(
724 _global_forward_hooks.values(),
/opt/conda/envs/lightweight/lib/python3.7/site-packages/transformers/models/gpt2/modeling_gpt2.py in forward(self, input_ids, past_key_values, attention_mask, token_type_ids, position_ids, head_mask, inputs_embeds, encoder_hidden_states, encoder_attention_mask, use_cache, output_attentions, output_hidden_states, return_dict)
827
828 if inputs_embeds is None:
--> 829 inputs_embeds = self.wte(input_ids)
830 position_embeds = self.wpe(position_ids)
831 hidden_states = inputs_embeds + position_embeds
/opt/conda/envs/lightweight/lib/python3.7/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
720 result = self._slow_forward(*input, **kwargs)
721 else:
--> 722 result = self.forward(*input, **kwargs)
723 for hook in itertools.chain(
724 _global_forward_hooks.values(),
/opt/conda/envs/lightweight/lib/python3.7/site-packages/torch/nn/modules/sparse.py in forward(self, input)
124 return F.embedding(
125 input, self.weight, self.padding_idx, self.max_norm,
--> 126 self.norm_type, self.scale_grad_by_freq, self.sparse)
127
128 def extra_repr(self) -> str:
/opt/conda/envs/lightweight/lib/python3.7/site-packages/torch/nn/functional.py in embedding(input, weight, padding_idx, max_norm, norm_type, scale_grad_by_freq, sparse)
1812 # remove once script supports set_grad_enabled
1813 _no_grad_embedding_renorm_(weight, input, max_norm, norm_type)
-> 1814 return torch.embedding(weight, input, padding_idx, scale_grad_by_freq, sparse)
1815
1816
IndexError: index out of range in self
print(f'max: {max(index_of_words)}, min: {min(index_of_words)}')
를 해보니 max값이 50000 이상일 때 에러가 뜨는 것을 확인하였다. 따라서 max가 50000이상일 경우 data에 넣지 않았다
RuntimeError: CUDA error: device-side assert triggered CUDA kernel errors might be asynchronously reported at some other API call,so the stacktrace below might be incorrect. For debugging consider passing CUDA_LAUNCH_BLOCKING=1.