llmware-ai / llmware

Unified framework for building enterprise RAG pipelines with small, specialized models
https://llmware-ai.github.io/llmware/
Apache License 2.0
5.52k stars 1.37k forks source link

set' object has no attribute 'items' #202

Open khalilxg opened 10 months ago

khalilxg commented 10 months ago

AttributeError Traceback (most recent call last) Cell In[1], line 63 59 if name == "main": 61 model = "llmware/bling-1b-0.1" ---> 63 contract_analysis_on_laptop(model)

Cell In[1], line 36 33 print("\nAnalyzing contract: ", str(i+1), contract) 35 print("LLM Responses:") ---> 36 for key, value in query_list.items(): 37 38 # contract is parsed, text-chunked, and then filtered by topic key 39 source = prompter.add_source_document(contracts_path, contract, query=key) 41 # calling the LLM with 'source' information from the contract automatically packaged into the prompt

AttributeError: 'set' object has no attribute 'items'

"""This example demonstrats doing an analysis across contracts entirely on on a laptop using local models """

import os import re from llmware.prompts import Prompt, HumanInTheLoop from llmware.setup import Setup from llmware.configs import LLMWareConfig

def contract_analysis_on_laptop (model_name, from_hf=False):

# Load the llmware sample files
print (f"\n > Loading the llmware sample files...")
contracts_path = ("/home/ubuntu/Documents/ags/")
# query list
query_list = {"what is tunisia oblications of contacts?"}

print (f"\n > Loading model {model_name}...")
# Note: Some newer models use local custom code in their HF repos which is not trusted by default
#  For now, you can pass in a dummy api_key and we'll set the right config to trust that code
#  This will likely be changing in the future
if from_hf:
    # local cpu open source model
    prompter = Prompt().load_model(model_name,from_hf=True)
else:
    # e.g., 'gpt-4'
    prompter = Prompt().load_model(model_name)

for i, contract in enumerate(os.listdir(contracts_path)):

    print("\nAnalyzing contract: ", str(i+1), contract)

    print("LLM Responses:")
    for key, value in query_list.items():

        # contract is parsed, text-chunked, and then filtered by topic key
        source = prompter.add_source_document(contracts_path, contract, query=key)

        # calling the LLM with 'source' information from the contract automatically packaged into the prompt
        responses = prompter.prompt_with_source(value, prompt_name="just_the_facts", temperature=0.3)

        for r, response in enumerate(responses):
            print(key, ":", re.sub("[\n]"," ", response["llm_response"]).strip())

        # We're done with this contract, clear the source from the prompt
        prompter.clear_source_materials()

# Save jsonl report to jsonl to /prompt_history folder
print("\nPrompt state saved at: ", os.path.join(LLMWareConfig.get_prompt_path(),prompter.prompt_id))
prompter.save_state()

#Save csv report that includes the model, response, prompt, and evidence for human-in-the-loop review
csv_output = HumanInTheLoop(prompter).export_current_interaction_to_csv()
print("csv output - ", csv_output)

if name == "main":

model = "llmware/bling-1b-0.1"

contract_analysis_on_laptop(model)
doberst commented 10 months ago

we just updated this example - could you try again? I ran it a few times locally, and no issues, so hopefully you see the same on your end.

khalilxg commented 10 months ago

Can yoou plz paste the exemple here ! My ("/home/ubuntu/Documents/ags/") havd only 1 pdf.

doberst commented 10 months ago

Sure - for future reference, this is in the Examples/RAG/ folder:

"""This example demonstrates a basic contract analysis workflow run entirely on on a laptop using a RAG-finetuned small specialized instruct BLING model """

import os import re from llmware.prompts import Prompt, HumanInTheLoop from llmware.setup import Setup from llmware.configs import LLMWareConfig

def contract_analysis_on_laptop (model_name):

# Load the llmware sample files
print (f"\n > Loading the llmware sample files...")
sample_files_path = Setup().load_sample_files()
contracts_path = os.path.join(sample_files_path,"Agreements")

# query list
query_list = {"executive employment agreement": "What are the name of the two parties?",
              "base salary": "What is the executive's base salary?",
              "governing law": "What is the governing law?"}

print (f"\n > Loading model {model_name}...")

prompter = Prompt().load_model(model_name)

for i, contract in enumerate(os.listdir(contracts_path)):

    # exclude potential mac os created file artifact in folder path
    if contract != ".DS_Store":

        print("\nAnalyzing contract: ", str(i+1), contract)

        print("LLM Responses:")

        for key, value in query_list.items():

            # contract is parsed, text-chunked, and then filtered by topic key
            source = prompter.add_source_document(contracts_path, contract, query=key)

            # calling the LLM with 'source' information from the contract automatically packaged into the prompt
            responses = prompter.prompt_with_source(value, prompt_name="just_the_facts", temperature=0.3)

            for r, response in enumerate(responses):
                print(key, ":", re.sub("[\n]"," ", response["llm_response"]).strip())

            # We're done with this contract, clear the source from the prompt
            prompter.clear_source_materials()

# Save jsonl report to jsonl to /prompt_history folder
print("\nPrompt state saved at: ", os.path.join(LLMWareConfig.get_prompt_path(),prompter.prompt_id))
prompter.save_state()

#Save csv report that includes the model, response, prompt, and evidence for human-in-the-loop review
csv_output = HumanInTheLoop(prompter).export_current_interaction_to_csv()
print("csv output - ", csv_output)

if name == "main":

bling_models = ["llmware/bling-1b-0.1", "llmware/bling-1.4b-0.1", "llmware/bling-falcon-1b-0.1",
                "llmware/bling-sheared-llama-2.7b-0.1", "llmware/bling-sheared-llama-1.3b-0.1",
                "llmware/bling-red-pajamas-3b-0.1", "llmware/bling-stable-lm-3b-4e1t-0.1"]

# use local cpu model
model = bling_models[0]

contract_analysis_on_laptop(model)
khalilxg commented 10 months ago

next error after this update > Loading the llmware sample files...

Loading model llmware/bling-1b-0.1...

Analyzing contract: 1 Code des obligations et contrats.pdf LLM Responses: ERROR:root:error: to use prompt_with_source, there must be a loaded source - try '.add_sources' first

KeyError Traceback (most recent call last) Cell In[1], line 69 63 if name == "main": 64 65 66 # use local cpu model 67 model = "llmware/bling-1b-0.1" ---> 69 contract_analysis_on_laptop(model)

Cell In[1], line 49 46 responses = prompter.prompt_with_source(value, prompt_name="just_the_facts", temperature=0.3) 48 for r, response in enumerate(responses): ---> 49 print(key, ":", re.sub("[\n]"," ", response["llm_response"]).strip()) 51 # We're done with this contract, clear the source from the prompt 52 prompter.clear_source_materials()

KeyError: 'llm_response'

full code : """This example demonstrats doing an analysis across contracts entirely on on a laptop using local models """

import os import re from llmware.prompts import Prompt, HumanInTheLoop from llmware.setup import Setup from llmware.configs import LLMWareConfig

def contract_analysis_on_laptop (model_name, from_hf=False):

# Load the llmware sample files
print (f"\n > Loading the llmware sample files...")
sample_files_path = Setup().load_sample_files()
contracts_path = ("/home/ubuntu/Documents/ags/")

# query list
query_list = {"executive employment agreement": "What are the name of the two parties?",
              "base salary": "What is the executive's base salary?",
              "governing law": "What is the governing law?"}

print (f"\n > Loading model {model_name}...")
# Note: Some newer models use local custom code in their HF repos which is not trusted by default
#  For now, you can pass in a dummy api_key and we'll set the right config to trust that code
#  This will likely be changing in the future
if from_hf:
    # local cpu open source model
    prompter = Prompt().load_model(model_name,from_hf=True)
else:
    # e.g., 'gpt-4'
    prompter = Prompt().load_model(model_name)

for i, contract in enumerate(os.listdir(contracts_path)):

    print("\nAnalyzing contract: ", str(i+1), contract)

    print("LLM Responses:")
    for key, value in query_list.items():

        # contract is parsed, text-chunked, and then filtered by topic key
        source = prompter.add_source_document(contracts_path, contract, query=key)

        # calling the LLM with 'source' information from the contract automatically packaged into the prompt
        responses = prompter.prompt_with_source(value, prompt_name="just_the_facts", temperature=0.3)

        for r, response in enumerate(responses):
            print(key, ":", re.sub("[\n]"," ", response["llm_response"]).strip())

        # We're done with this contract, clear the source from the prompt
        prompter.clear_source_materials()

# Save jsonl report to jsonl to /prompt_history folder
print("\nPrompt state saved at: ", os.path.join(LLMWareConfig.get_prompt_path(),prompter.prompt_id))
prompter.save_state()

#Save csv report that includes the model, response, prompt, and evidence for human-in-the-loop review
csv_output = HumanInTheLoop(prompter).export_current_interaction_to_csv()
print("csv output - ", csv_output)

if name == "main":

# use local cpu model
model = "llmware/bling-1b-0.1"

contract_analysis_on_laptop(model)