LLVM ERROR: Cannot select: intrinsic %llvm.nvvm.shfl.sync.bfly.i32

When I run with gpt2 models, all its ok! But when I run with anyone those models exists ridger/MMfreeLM-370M, MMfreeLM-1.3B or MMfreeLM-2.7 this error occur.Why? Can anyone help me?

Error: LLVM ERROR: Cannot select: intrinsic %llvm.nvvm.shfl.sync.bfly.i32 [1] 93105 IOT instruction python3 generate_text.py

import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"
import mmfreelm
from transformers import AutoModelForCausalLM, AutoTokenizer

# Nome do modelo pré-treinado
#name = 'ridger/MMfreeLM-370M'
name = 'ridger/MMfreeLM-1.3B'
#name = 'ridger/MMfreeLM-2.7B'
#name = 'openai-community/gpt2'

# # Carregar o tokenizador e o modelo
tokenizer = AutoTokenizer.from_pretrained(name)
model = AutoModelForCausalLM.from_pretrained(name).cuda().half()

# input_prompt = "In a shocking finding, scientist discovered a herd of unicorns living in a remote, "
# input_ids = tokenizer(input_prompt, return_tensors="pt").input_ids.cuda()
# outputs = model.generate(input_ids, max_length=32,  do_sample=True, top_p=0.4, temperature=0.6)
# print(tokenizer.batch_decode(outputs, skip_special_tokens=True)[0])

def generate_response(prompt):
    inputs = tokenizer(prompt, return_tensors="pt")
    input_ids = inputs.input_ids.cuda()
    attention_mask = inputs.attention_mask.cuda()
    outputs = model.generate(input_ids, attention_mask=attention_mask, max_length=32, do_sample=True, top_p=0.4, temperature=0.6, pad_token_id=tokenizer.eos_token_id)
    return tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]

while True:
    prompt = input("Você: ")
    if prompt.lower() in ['exit', 'quit']:
        break
    response = generate_response(prompt)
    print(f"Modelo: {response}")

ridgerchu / matmulfreellm

LLVM ERROR: Cannot select: intrinsic %llvm.nvvm.shfl.sync.bfly.i32 #34