While Doing Quick Start, I Encountered an AssertionError:

Jingyao711 commented 8 months ago

I was running code from quick start part, but I got this error;

AssertionError Traceback (most recent call last) Cell In[8], line 3 1 dna = "ACGTAGCATCGGATCTATCTATCGACACTTGGTTATCGATCTACGAGCATCTCGTTAGC" 2 inputs = DNA2tokenizer(dna, return_tensors = 'pt')["input_ids"] ----> 3 hidden_states = DNA2model(inputs)[0] # [1, sequence_length, 768] 5 # embedding with mean pooling 6 embedding_mean = torch.mean(hidden_states[0], dim=0)

File ~/miniconda3/envs/dna/lib/python3.8/site-packages/torch/nn/modules/module.py:1518, in Module._wrapped_call_impl(self, *args, kwargs) 1516 return self._compiled_call_impl(*args, *kwargs) # type: ignore[misc] 1517 else: -> 1518 return self._call_impl(args, kwargs)

File ~/miniconda3/envs/dna/lib/python3.8/site-packages/torch/nn/modules/module.py:1527, in Module._call_impl(self, *args, *kwargs) 1522 # If we don't have any hooks, we want to skip the rest of the logic in 1523 # this function, and just call forward. 1524 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks 1525 or _global_backward_pre_hooks or _global_backward_hooks 1526 or _global_forward_hooks or _global_forward_pre_hooks): -> 1527 return forward_call(args, **kwargs) 1529 try: 1530 result = None

File ~/.cache/huggingface/modules/transformers_modules/zhihan1996/DNABERT-2-117M/25abaf0bd247444fcfa837109f12088114898d98/bert_layers.py:608, in BertModel.forward(self, input_ids, token_type_ids, attention_mask, position_ids, output_all_encoded_layers, masked_tokens_mask, **kwargs) 605 first_col_mask[:, 0] = True 606 subset_mask = masked_tokens_mask | first_col_mask --> 608 encoder_outputs = self.encoder( 609 embedding_output, 610 attention_mask, 611 output_all_encoded_layers=output_all_encoded_layers, 612 subset_mask=subset_mask) 614 if masked_tokens_mask is None: 615 sequence_output = encoder_outputs[-1]

File ~/miniconda3/envs/dna/lib/python3.8/site-packages/torch/nn/modules/module.py:1518, in Module._wrapped_call_impl(self, *args, kwargs) 1516 return self._compiled_call_impl(*args, *kwargs) # type: ignore[misc] 1517 else: -> 1518 return self._call_impl(args, kwargs)

File ~/miniconda3/envs/dna/lib/python3.8/site-packages/torch/nn/modules/module.py:1527, in Module._call_impl(self, *args, *kwargs) 1522 # If we don't have any hooks, we want to skip the rest of the logic in 1523 # this function, and just call forward. 1524 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks 1525 or _global_backward_pre_hooks or _global_backward_hooks 1526 or _global_forward_hooks or _global_forward_pre_hooks): -> 1527 return forward_call(args, **kwargs) 1529 try: 1530 result = None

File ~/.cache/huggingface/modules/transformers_modules/zhihan1996/DNABERT-2-117M/25abaf0bd247444fcfa837109f12088114898d98/bert_layers.py:446, in BertEncoder.forward(self, hidden_states, attention_mask, output_all_encoded_layers, subset_mask) 444 if subset_mask is None: 445 for layer_module in self.layer: --> 446 hidden_states = layer_module(hidden_states, 447 cu_seqlens, 448 seqlen, 449 None, 450 indices, 451 attn_mask=attention_mask, 452 bias=alibi_attn_mask) 453 if output_all_encoded_layers: 454 all_encoder_layers.append(hidden_states)

File ~/miniconda3/envs/dna/lib/python3.8/site-packages/torch/nn/modules/module.py:1518, in Module._wrapped_call_impl(self, *args, kwargs) 1516 return self._compiled_call_impl(*args, *kwargs) # type: ignore[misc] 1517 else: -> 1518 return self._call_impl(args, kwargs)

File ~/miniconda3/envs/dna/lib/python3.8/site-packages/torch/nn/modules/module.py:1527, in Module._call_impl(self, *args, *kwargs) 1522 # If we don't have any hooks, we want to skip the rest of the logic in 1523 # this function, and just call forward. 1524 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks 1525 or _global_backward_pre_hooks or _global_backward_hooks 1526 or _global_forward_hooks or _global_forward_pre_hooks): -> 1527 return forward_call(args, **kwargs) 1529 try: 1530 result = None

File ~/.cache/huggingface/modules/transformers_modules/zhihan1996/DNABERT-2-117M/25abaf0bd247444fcfa837109f12088114898d98/bert_layers.py:327, in BertLayer.forward(self, hidden_states, cu_seqlens, seqlen, subset_idx, indices, attn_mask, bias) 305 def forward( 306 self, 307 hidden_states: torch.Tensor, (...) 313 bias: Optional[torch.Tensor] = None, 314 ) -> torch.Tensor: 315 """Forward pass for a BERT layer, including both attention and MLP. 316 317 Args: (...) 325 bias: None or (batch, heads, max_seqlen_in_batch, max_seqlen_in_batch) 326 """ --> 327 attention_output = self.attention(hidden_states, cu_seqlens, seqlen, 328 subset_idx, indices, attn_mask, bias) 329 layer_output = self.mlp(attention_output) 330 return layer_output

File ~/miniconda3/envs/dna/lib/python3.8/site-packages/torch/nn/modules/module.py:1518, in Module._wrapped_call_impl(self, *args, kwargs) 1516 return self._compiled_call_impl(*args, *kwargs) # type: ignore[misc] 1517 else: -> 1518 return self._call_impl(args, kwargs)

File ~/miniconda3/envs/dna/lib/python3.8/site-packages/torch/nn/modules/module.py:1527, in Module._call_impl(self, *args, *kwargs) 1522 # If we don't have any hooks, we want to skip the rest of the logic in 1523 # this function, and just call forward. 1524 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks 1525 or _global_backward_pre_hooks or _global_backward_hooks 1526 or _global_forward_hooks or _global_forward_pre_hooks): -> 1527 return forward_call(args, **kwargs) 1529 try: 1530 result = None

File ~/.cache/huggingface/modules/transformers_modules/zhihan1996/DNABERT-2-117M/25abaf0bd247444fcfa837109f12088114898d98/bert_layers.py:240, in BertUnpadAttention.forward(self, input_tensor, cu_seqlens, max_s, subset_idx, indices, attn_mask, bias) 218 def forward( 219 self, 220 input_tensor: torch.Tensor, (...) 226 bias: Optional[torch.Tensor] = None, 227 ) -> torch.Tensor: 228 """Forward pass for scaled self-attention without padding. 229 230 Arguments: (...) 238 bias: None or (batch, heads, max_seqlen_in_batch, max_seqlen_in_batch) 239 """ --> 240 self_output = self.self(input_tensor, cu_seqlens, max_s, indices, 241 attn_mask, bias) 242 if subset_idx is not None: 243 return self.output(index_first_axis(self_output, subset_idx), 244 index_first_axis(input_tensor, subset_idx))

File ~/miniconda3/envs/dna/lib/python3.8/site-packages/torch/nn/modules/module.py:1518, in Module._wrapped_call_impl(self, *args, kwargs) 1516 return self._compiled_call_impl(*args, *kwargs) # type: ignore[misc] 1517 else: -> 1518 return self._call_impl(args, kwargs)

File ~/miniconda3/envs/dna/lib/python3.8/site-packages/torch/nn/modules/module.py:1527, in Module._call_impl(self, *args, *kwargs) 1522 # If we don't have any hooks, we want to skip the rest of the logic in 1523 # this function, and just call forward. 1524 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks 1525 or _global_backward_pre_hooks or _global_backward_hooks 1526 or _global_forward_hooks or _global_forward_pre_hooks): -> 1527 return forward_call(args, **kwargs) 1529 try: 1530 result = None

File ~/.cache/huggingface/modules/transformers_modules/zhihan1996/DNABERT-2-117M/25abaf0bd247444fcfa837109f12088114898d98/bert_layers.py:181, in BertUnpadSelfAttention.forward(self, hidden_states, cu_seqlens, max_seqlen_in_batch, indices, attn_mask, bias) 179 bias_dtype = bias.dtype 180 bias = bias.to(torch.float16) --> 181 attention = flash_attn_qkvpacked_func(qkv, bias) 182 attention = attention.to(orig_dtype) 183 bias = bias.to(bias_dtype)

File ~/miniconda3/envs/dna/lib/python3.8/site-packages/torch/autograd/function.py:539, in Function.apply(cls, *args, *kwargs) 536 if not torch._C._are_functorch_transforms_active(): 537 # See NOTE: [functorch vjp and autograd interaction] 538 args = _functorch.utils.unwrap_dead_wrappers(args) --> 539 return super().apply(args, **kwargs) # type: ignore[misc] 541 if cls.setup_context == _SingleLevelFunction.setup_context: 542 raise RuntimeError( 543 "In order to use an autograd.Function with functorch transforms " 544 "(vmap, grad, jvp, jacrev, ...), it must override the setup_context " 545 "staticmethod. For more details, please see " 546 "https://pytorch.org/docs/master/notes/extending.func.html" 547 )

File ~/.cache/huggingface/modules/transformers_modules/zhihan1996/DNABERT-2-117M/25abaf0bd247444fcfa837109f12088114898d98/flash_attn_triton.py:1021, in _FlashAttnQKVPackedFunc.forward(ctx, qkv, bias, causal, softmax_scale) 1019 if qkv.stride(-1) != 1: 1020 qkv = qkv.contiguous() -> 1021 o, lse, ctx.softmax_scale = _flash_attn_forward( 1022 qkv[:, :, 0], 1023 qkv[:, :, 1], 1024 qkv[:, :, 2], 1025 bias=bias, 1026 causal=causal, 1027 softmax_scale=softmax_scale) 1028 ctx.save_for_backward(qkv, o, lse, bias) 1029 ctx.causal = causal

File ~/.cache/huggingface/modules/transformers_modules/zhihan1996/DNABERT-2-117M/25abaf0bd247444fcfa837109f12088114898d98/flash_attn_triton.py:781, in _flash_attn_forward(q, k, v, bias, causal, softmax_scale) 778 assert q.dtype == k.dtype == v.dtype, 'All tensors must have the same type' 779 assert q.dtype in [torch.float16, 780 torch.bfloat16], 'Only support fp16 and bf16' --> 781 assert q.is_cuda and k.is_cuda and v.is_cuda 782 softmax_scale = softmax_scale or 1.0 / math.sqrt(d) 784 has_bias = bias is not None

AssertionError:

I thought it might because I was not running on GPU, but the same error still exist even after I changed my device to GPU:

This is my device information:

搜狗截图20240202121333

Zhihan1996 commented 7 months ago

Please try pip uninstall triton and run it again.

Jingyao711 commented 4 months ago

Please try pip uninstall triton and run it again.

Thank you it works

MAGICS-LAB / DNABERT_2

While Doing Quick Start, I Encountered an AssertionError: #66

I was running code from quick start part, but I got this error;

AssertionError: