I am trying to run llama2 gguf on windows 11 Version 22H2. I have python 3.11 installed on my local machine.
Below is the code:
import gradio as gr
from ctransformers import AutoModelForCausalLM
def load_llm():
print("loading")
try:
llm = AutoModelForCausalLM.from_pretrained(
"./llama-2-7b.Q4_0.gguf",
model_type="llama",
max_new_tokens=512,
repetition_penalty=1.13,
temperature=0.1
)
except Exception as e:
print(e)
print("failed to load")
raise e
else:
print("loaded")
return llm
def llm_function(message, chat_history):
print("message: ", message)
llm = load_llm()
return llm(message)
title = "Codellama"
description = "Codellama is a chatbot developed by IsolatedFalcon. Ask me anything!"
# gr.ChatInterface(
# fn=llm_function,
# title=title
# ).launch()
llm = load_llm()
# while(True):
# query = input("You: ")
# print("Codellama: " + llm(query))
print(llm("What is the capital of India? Tell me something about that."))
and while running the file, i am getting this output
loading
exception: access violation writing 0x0000019A7D97B000
failed to load
Traceback (most recent call last):
File "D:\llama2\main.py", line 36, in <module>
llm = load_llm()
^^^^^^^^^^
File "D:\llama2\main.py", line 17, in load_llm
raise e
File "D:\llama2\main.py", line 7, in load_llm
llm = AutoModelForCausalLM.from_pretrained(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "D:\llama2\llama\Lib\site-packages\ctransformers\hub.py", line 173, in from_pretrained
I am trying to run llama2 gguf on windows 11 Version 22H2. I have python 3.11 installed on my local machine. Below is the code:
and while running the file, i am getting this output
Kindly let me know if i am missing something