Hi, I face different errors under different gpus to run get_emb.py with generated personal dataset.
If I run it based on gtx 1080 ti, the error is:
File /gpfs/gibbs/project/zhao/tl688/conda_envs/scgpt/lib/python3.8/site-packages/transformers/models/bert/modeling_bert.py:495, in BertLayer.forward(self, hidden_states, attention_mask, head_mask, encoder_hidden_states, encoder_attention_mask, past_key_value, output_attentions)
483 def forward(
484 self,
485 hidden_states: torch.Tensor,
(...)
492 ) -> Tuple[torch.Tensor]:
493 # decoder uni-directional self-attention cached key/values tuple is at positions 1,2
494 self_attn_past_key_value = past_key_value[:2] if past_key_value is not None else None
--> 495 self_attention_outputs = self.attention(
496 hidden_states,
497 attention_mask,
498 head_mask,
499 output_attentions=output_attentions,
500 past_key_value=self_attn_past_key_value,
501 )
502 attention_output = self_attention_outputs[0]
504 # if decoder, the last output is tuple of self-attn cache
File /gpfs/gibbs/project/zhao/tl688/conda_envs/scgpt/lib/python3.8/site-packages/torch/nn/modules/module.py:1190, in Module._call_impl(self, *input, **kwargs)
1186 # If we don't have any hooks, we want to skip the rest of the logic in
1187 # this function, and just call forward.
1188 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1189 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1190 return forward_call(*input, **kwargs)
1191 # Do not call functions when jit is used
1192 full_backward_hooks, non_full_backward_hooks = [], []
File /gpfs/gibbs/project/zhao/tl688/conda_envs/scgpt/lib/python3.8/site-packages/transformers/models/bert/modeling_bert.py:425, in BertAttention.forward(self, hidden_states, attention_mask, head_mask, encoder_hidden_states, encoder_attention_mask, past_key_value, output_attentions)
415 def forward(
416 self,
417 hidden_states: torch.Tensor,
(...)
423 output_attentions: Optional[bool] = False,
424 ) -> Tuple[torch.Tensor]:
--> 425 self_outputs = self.self(
426 hidden_states,
427 attention_mask,
428 head_mask,
429 encoder_hidden_states,
430 encoder_attention_mask,
431 past_key_value,
432 output_attentions,
433 )
434 attention_output = self.output(self_outputs[0], hidden_states)
435 outputs = (attention_output,) + self_outputs[1:] # add attentions if we output them
File /gpfs/gibbs/project/zhao/tl688/conda_envs/scgpt/lib/python3.8/site-packages/torch/nn/modules/module.py:1190, in Module._call_impl(self, *input, **kwargs)
1186 # If we don't have any hooks, we want to skip the rest of the logic in
1187 # this function, and just call forward.
1188 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1189 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1190 return forward_call(*input, **kwargs)
1191 # Do not call functions when jit is used
1192 full_backward_hooks, non_full_backward_hooks = [], []
File /gpfs/gibbs/project/zhao/tl688/conda_envs/scgpt/lib/python3.8/site-packages/transformers/models/bert/modeling_bert.py:284, in BertSelfAttention.forward(self, hidden_states, attention_mask, head_mask, encoder_hidden_states, encoder_attention_mask, past_key_value, output_attentions)
274 def forward(
275 self,
276 hidden_states: torch.Tensor,
(...)
282 output_attentions: Optional[bool] = False,
283 ) -> Tuple[torch.Tensor]:
--> 284 mixed_query_layer = self.query(hidden_states)
286 # If this is instantiated as a cross-attention module, the keys
287 # and values come from an encoder; the attention mask needs to be
288 # such that the encoder's padding tokens are not attended to.
289 is_cross_attention = encoder_hidden_states is not None
File /gpfs/gibbs/project/zhao/tl688/conda_envs/scgpt/lib/python3.8/site-packages/torch/nn/modules/module.py:1190, in Module._call_impl(self, *input, **kwargs)
1186 # If we don't have any hooks, we want to skip the rest of the logic in
1187 # this function, and just call forward.
1188 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1189 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1190 return forward_call(*input, **kwargs)
1191 # Do not call functions when jit is used
1192 full_backward_hooks, non_full_backward_hooks = [], []
File /gpfs/gibbs/project/zhao/tl688/conda_envs/scgpt/lib/python3.8/site-packages/torch/nn/modules/linear.py:114, in Linear.forward(self, input)
113 def forward(self, input: Tensor) -> Tensor:
--> 114 return F.linear(input, self.weight, self.bias)
RuntimeError: CUDA error: CUBLAS_STATUS_EXECUTION_FAILED when calling `cublasLtMatmul( ltHandle, computeDesc.descriptor(), &alpha_val, mat1_ptr, Adesc.descriptor(), mat2_ptr, Bdesc.descriptor(), &beta_val, result_ptr, Cdesc.descriptor(), result_ptr, Cdesc.descriptor(), &heuristicResult.algo, workspace.data_ptr(), workspaceSize, at::cuda::getCurrentCUDAStream())`
If I run it in the gpu with higher level, the error is:
Traceback (most recent call last):
File "test_get_emb.py", line 44, in <module>
EmbeddingGenerator.get_GeneCompass_cls_new_embedding(Path = Path, dataset_path=dataset_path, checkpoint_path=checkpoint_path, get_emb = True, emb_file_path = emb_file_path)
File "/gpfs/gibbs/pi/zhao/tl688/GeneCompass/downstream_tasks/gene_expression_profiling/EmbeddingGenerator.py", line 101, in get_GeneCompass_cls_new_embedding
new_emb = model.bert.forward(input_ids=input_id, values= values, species=species)[0]
File "/gpfs/gibbs/pi/zhao/tl688/GeneCompass/downstream_tasks/gene_expression_profiling/../../genecompass/modeling_bert.py", line 394, in forward
embedding_output = self.embeddings(
File "/gpfs/gibbs/project/zhao/tl688/conda_envs/scgpt/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1190, in _call_impl
return forward_call(*input, **kwargs)
File "/gpfs/gibbs/pi/zhao/tl688/GeneCompass/downstream_tasks/gene_expression_profiling/../../genecompass/knowledge_embeddings.py", line 194, in forward
input_ids_shifted[species.squeeze(1) == 1] = self.homologous_index[input_ids_shifted[species.squeeze(1) == 1]]
RuntimeError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call,so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
Did I miss anythings in the pre-processing stage? Thanks.
Hi, I face different errors under different gpus to run get_emb.py with generated personal dataset.
If I run it based on gtx 1080 ti, the error is:
If I run it in the gpu with higher level, the error is:
Did I miss anythings in the pre-processing stage? Thanks.