from optimum.amd.ryzenai import RyzenAIModelForCausalLM
from transformers import AutoTokenizer
from tests.ryzenai.testing_utils import DEFAULT_VAIP_CONFIG_TRANSFORMERS
model_path = # OPT/LLama model quantized using Brevitas
vaip_config = DEFAULT_VAIP_CONFIG_TRANSFORMERS
model = RyzenAIModelForCausalLM.from_pretrained(model_path, vaip_config=vaip_config)
tokenizer= AutoTokenizer.from_pretrained(model_path)
prompt = "Hey, are you conscious? Can you talk to me?"
inputs = tokenizer(prompt, return_tensors="pt")
generated_text = model.generate(**inputs, max_new_tokens=30, do_sample=False)
print(generated_text)
As per title!
Example Usage: