Getting this error when I am trying to summarize a document using llama-3 (via llama_cpp and llamaindex). I just found that it's a llama_cpp error and not LlamaIndex error. If I downgrade to llama_cpp_python 0.2.62, it works but doesn't work with the latest version. Can you please look into it?
Here's the minimum code to reproduce the error:
import torch
from llama_index.core import Settings, SimpleDirectoryReader
from llama_index.llms.llama_cpp import LlamaCPP
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core.response_synthesizers import TreeSummarize
def llm_summary(text_chunks):
synth = TreeSummarize()
summary = synth.get_response("Summarize the provided text in about 200 words or less", text_chunks)
return summary
Hi,
Getting this error when I am trying to summarize a document using llama-3 (via llama_cpp and llamaindex). I just found that it's a llama_cpp error and not LlamaIndex error. If I downgrade to llama_cpp_python 0.2.62, it works but doesn't work with the latest version. Can you please look into it?
Here's the minimum code to reproduce the error:
import torch from llama_index.core import Settings, SimpleDirectoryReader from llama_index.llms.llama_cpp import LlamaCPP from llama_index.core.node_parser import SentenceSplitter from llama_index.core.response_synthesizers import TreeSummarize
def llm_summary(text_chunks): synth = TreeSummarize() summary = synth.get_response("Summarize the provided text in about 200 words or less", text_chunks) return summary
def completion_to_prompt_llama3(completion: str) -> str: system_prompt_str = ""
return (f"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n" f"{system_prompt_str.strip()}<|eot_id|><|start_header_id|>user<|end_header_id|>\n" f"{completion.strip()}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n")
filepath = "Mount Everest.docx" device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu") n_gpu_layers = 0 if device == "cpu" else -1 llm = LlamaCPP( model_url="https://huggingface.co/QuantFactory/Meta-Llama-3-8B-Instruct-GGUF/resolve/main/Meta-Llama-3-8B-Instruct.Q4_K_M.gguf", model_path=None, #llama-2-7b-chat.Q4_K_M.gguf temperature=0.1, max_new_tokens=256, context_window=8000, generate_kwargs={}, model_kwargs={"n_gpu_layers": n_gpu_layers, "offload_kqv": True}, completion_to_prompt=completion_to_prompt_llama3, verbose=True, )
Settings.llm = llm
reader = SimpleDirectoryReader(input_files = [filepath], filename_as_id = True) #, file_metadata = extract_metadata documents = reader.load_data()
nodes = SentenceSplitter(chunk_size=512, chunk_overlap=20).get_nodes_from_documents(documents) text_chunks = [node.text for node in nodes] summary_llm = llm_summary(text_chunks) print(f"LLM Summary => {summary_llm}")
Here's the input file: Mount Everest.docx
Thanks!