RuntimeError: CUDA error: CUBLAS_STATUS_INVALID_VALUE when calling

RuntimeError: CUDA error: CUBLAS_STATUS_INVALID_VALUE when callingcublasSgemmStridedBatched( handle, opa, opb, m, n, k, &alpha, a, lda, stridea, b, ldb, strideb, &beta, c, ldc, stridec, num_batches)``

CUDA version 11.6 GPU NVIDIA A100

I get the above error when executing the example code on Hugging Face :

import torch
from instruct_pipeline import InstructionTextGenerationPipeline
from transformers import AutoModelForCausalLM, AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("databricks/dolly-v2-12b", padding_side="left")
model = AutoModelForCausalLM.from_pretrained("databricks/dolly-v2-12b", device_map="auto", torch_dtype=torch.bfloat16)

generate_text = InstructionTextGenerationPipeline(model=model, tokenizer=tokenizer)
res = generate_text("Explain to me the difference between nuclear fission and fusion.")
print(res[0]["generated_text"])

Traceback (most recent call last): File "/data/vkuttichikeloth/llm_experiments/dolly_2.0/generate.py", line 34, in res = generate_text("Explain to me the difference between nuclear fission and fusion.") File "/home/vkuttichikeloth/.conda/envs/venv_dolly/lib/python3.10/site-packages/transformers/pipelines/base.py", line 1109, in call return self.run_single(inputs, preprocess_params, forward_params, postprocess_params) File "/home/vkuttichikeloth/.conda/envs/venv_dolly/lib/python3.10/site-packages/transformers/pipelines/base.py", line 1116, in run_single model_outputs = self.forward(model_inputs, forward_params) File "/home/vkuttichikeloth/.conda/envs/venv_dolly/lib/python3.10/site-packages/transformers/pipelines/base.py", line 1015, in forward model_outputs = self._forward(model_inputs, forward_params) File "/data/vkuttichikeloth/llm_experiments/dolly_2.0/instruct_pipeline.py", line 133, in _forward generated_sequence = self.model.generate( File "/home/vkuttichikeloth/.conda/envs/venv_dolly/lib/python3.10/site-packages/torch/autograd/grad_mode.py", line 27, in decorate_context return func(*args, kwargs) File "/home/vkuttichikeloth/.conda/envs/venv_dolly/lib/python3.10/site-packages/transformers/generation/utils.py", line 1485, in generate return self.sample( File "/home/vkuttichikeloth/.conda/envs/venv_dolly/lib/python3.10/site-packages/transformers/generation/utils.py", line 2524, in sample outputs = self( File "/home/vkuttichikeloth/.conda/envs/venv_dolly/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl return forward_call(*input, *kwargs) File "/home/vkuttichikeloth/.conda/envs/venv_dolly/lib/python3.10/site-packages/accelerate/hooks.py", line 165, in new_forward output = old_forward(args, kwargs) File "/home/vkuttichikeloth/.conda/envs/venv_dolly/lib/python3.10/site-packages/transformers/models/gpt_neox/modeling_gpt_neox.py", line 662, in forward outputs = self.gpt_neox( File "/home/vkuttichikeloth/.conda/envs/venv_dolly/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl return forward_call(*input, kwargs) File "/home/vkuttichikeloth/.conda/envs/venv_dolly/lib/python3.10/site-packages/transformers/models/gpt_neox/modeling_gpt_neox.py", line 553, in forward outputs = layer( File "/home/vkuttichikeloth/.conda/envs/venv_dolly/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl return forward_call(*input, *kwargs) File "/home/vkuttichikeloth/.conda/envs/venv_dolly/lib/python3.10/site-packages/accelerate/hooks.py", line 165, in new_forward output = old_forward(args, kwargs) File "/home/vkuttichikeloth/.conda/envs/venv_dolly/lib/python3.10/site-packages/transformers/models/gpt_neox/modeling_gpt_neox.py", line 320, in forward attention_layer_outputs = self.attention( File "/home/vkuttichikeloth/.conda/envs/venv_dolly/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl return forward_call(*input, *kwargs) File "/home/vkuttichikeloth/.conda/envs/venv_dolly/lib/python3.10/site-packages/accelerate/hooks.py", line 165, in new_forward output = old_forward(args, **kwargs) File "/home/vkuttichikeloth/.conda/envs/venv_dolly/lib/python3.10/site-packages/transformers/models/gpt_neox/modeling_gpt_neox.py", line 152, in forward attn_output, attn_weights = self._attn(query, key, value, attention_mask, head_mask) File "/home/vkuttichikeloth/.conda/envs/venv_dolly/lib/python3.10/site-packages/transformers/models/gpt_neox/modeling_gpt_neox.py", line 206, in _attn attn_scores = torch.baddbmm( RuntimeError: CUDA error: CUBLAS_STATUS_INVALID_VALUE when calling cublasSgemmStridedBatched( handle, opa, opb, m, n, k, &alpha, a, lda, stridea, b, ldb, strideb, &beta, c, ldc, stridec, num_batches)

databrickslabs / dolly

RuntimeError: CUDA error: CUBLAS_STATUS_INVALID_VALUE when calling #124