abetlen / llama-cpp-python

Python bindings for llama.cpp
https://llama-cpp-python.readthedocs.io
MIT License
8.01k stars 950 forks source link

LlamaCPP Usage #1035

Open 4entertainment opened 10 months ago

4entertainment commented 10 months ago

i use this guide for develop a RAG system: https://github.com/marklysze/LlamaIndex-RAG-WSL-CUDA/blob/master/LlamaIndex_Mixtral_8x7B-RAG.ipynb

i use the following code for use my local llm:

llm = LlamaCPP(
    model_url=None, # We'll load locally.
    # model_path='./Models/mistral-7b-instruct-v0.1.Q6_K.gguf', # 6-bit model
    model_path="my_local_llm_path",
    temperature=0.1,
    max_new_tokens=1024, # Increasing to support longer responses
    context_window=8192, # Mistral7B has an 8K context-window
    generate_kwargs={},
    # set to at least 1 to use GPU
    model_kwargs={"n_gpu_layers": 40}, # 40 was a good amount of layers for the RTX 3090, you may need to decrease yours if you have less VRAM than 24GB
    # messages_to_prompt=messages_to_prompt,
    # completion_to_prompt=completion_to_prompt,
    messages_to_prompt=system_prompt,
    completion_to_prompt=query_wrapper_prompt,
    #system_prompt=system_prompt,
    #query_wrapper_prompt=query_wrapper_prompt,
    verbose=True
)

Can anyone give me information about LlamaCPP structure? What should i do for use my local model?

kmlob commented 10 months ago

You can find an example in https://github.com/imartinez/privateGPT.git inside llm_component.py.

develperbayman commented 9 months ago

what about a python example? for example i feel like im like a few lines of code off originally this was a openai chatbot that used the openai api im trying to convert it to use huggingface but im a little stumped


import openai
import threading
import time
import sys
import chat_commands
from gtts import gTTS
import os
import tkinter as tk
from tkinter import filedialog
from tkinter import messagebox
from tkinter import *
from flask import Flask, request, render_template
from PIL import Image, ImageTk
import torch
import torchvision.models as models
import speech_recognition as sr
import pygame
import speech_recognition as sr
import webbrowser
import re
import subprocess
import os
#import guifunc
#import pythonide
import pipgui
#from tkinter import *
#from tkinter.filedialog import asksaveasfilename, askopenfilename
from transformers import AutoModel

# Load model directly
model = AutoModel.from_pretrained("TheBloke/Llama-2-7B-Chat-GGUF")

doListenToCommand = True
listening = False

# List with common departures to end the while loop
despedida = ["Goodbye", "goodbye", "bye", "Bye", "See you later", "see you later"]

# Create the GUI window
window = tk.Tk()
window.title("Computer:AI")
window.geometry("400x400")

# Create the text entry box
text_entry = tk.Entry(window, width=50)
text_entry.pack(side=tk.BOTTOM)

# Create the submit button
submit_button = tk.Button(window, text="Submit", command=lambda: submit())
submit_button.pack(side=tk.BOTTOM)

# Create the text output box
text_output = tk.Text(window, height=300, width=300)
text_output.pack(side=tk.BOTTOM)

def submit(event=None, text_input=None):
    global doListenToCommand
    global listening

    # Get the user input and check if the input matches the list of goodbyes
    if text_input != "":
        usuario = text_input
    else:
        usuario = text_entry.get()

    if usuario in despedida:
        on_closing()
    else:
        prompt = f"You are ChatGPT and answer my following message: {usuario}"

    # Getting responses using the OpenAI API
    response = openai.Completion.create(
        engine="text-davinci-003",
        prompt=prompt,
        max_tokens=2049
    )

    respuesta = response["choices"][0]["text"]

    # Converting text to audio
    texto = str(respuesta)
    tts = gTTS(texto, lang='en', tld='ie')
    tts.save("audio.mp3")

    # Displaying the answer on the screen
    text_output.insert(tk.END, "ChatGPT: " + respuesta + "\n")

    # Clear the input text
    text_entry.delete(0, tk.END)

    # Playing the audio
    doListenToCommand = False
    time.sleep(1)
    os.system("play audio.mp3")
    doListenToCommand = True

    # Call function to listen to the user
    # if listening == False:
    #     listen_to_command()

# Bind the Enter key to the submit function
window.bind("<Return>", submit)
#pygame.mixer.music.load("audio.mp3")
#pygame.mixer.music.play()
#termux audio
#os.system("mpg123 audio.mp3")
# Flask app
app = Flask(__name__, template_folder='templates')

@app.route("/", methods=["GET", "POST"])
def index():
    if request.method == "POST":
        file = request.files["file"]
        file.save(file.filename)
        openai.api_key = request.form["apikey"]
        return "Model file and API key saved."
    return render_template("index.html")

def run_as_normal_app():
    window.update()

def run_on_flask():
    app.run()

def listen_to_command():
    global doListenToCommand
    global listening

    # If we are not to be listening then exit the function.
    if doListenToCommand == True:
        # Initialize the recognizer
        r = sr.Recognizer()

        # Use the default microphone as the audio source
        with sr.Microphone() as source:
            print("Listening...")
            listening = True
            audio = r.listen(source)
            listening = False

        try:
            # Use speech recognition to convert speech to text
            command = r.recognize_google(audio)
            print("You said:", command)
            text_output.insert(tk.END, "You: " + command + "\n")
            text_entry.delete(0, tk.END)

            # Check if the command is a "generate image" instruction
            # if "generate image" in command.lower():
            #   # Call the function to generate the image
            #   generate_image()

            # Process the commands
            # Prepare object to be passed.
            class passed_commands:
                tk = tk
                text_output = text_output
                submit = submit

            chat_commands.process_commands(passed_commands,command)

        except sr.UnknownValueError:
            print("Speech recognition could not understand audio.")
        except sr.RequestError as e:
            print("Could not request results from Google Speech Recognition service:", str(e))

        listen_to_command()
        listening = False

def on_closing():
    if tk.messagebox.askokcancel("Quit", "Do you want to quit?"):
        window.destroy()

def pythonide():
    command = pythonide
    process = subprocess.run(["python3","pythonide.py"])        

window.protocol("WM_DELETE_WINDOW", on_closing)

if __name__ == "__main__":
    # Create the menu bar
    menu_bar = tk.Menu(window)

    # Create the "File" menu
    file_menu = tk.Menu(menu_bar, tearoff=0)
    file_menu.add_command(label="Open LLM", command=lambda: filedialog.askopenfilename())
    file_menu.add_command(label="Save LLM", command=lambda: filedialog.asksaveasfilename())
    file_menu.add_separator()
    #file_menu.add_command(label="Exit", command=window.quit)
    file_menu.add_command(label="Exit", command=on_closing)
    menu_bar.add_cascade(label="File", menu=file_menu)

    # Create the "Run" menu
    run_menu = tk.Menu(menu_bar, tearoff=0)
    run_menu.add_command(label="Run as normal app", command=run_as_normal_app)
    run_menu.add_command(label="Run on Flask", command=run_on_flask)
    run_menu.add_command(label='Python Ide', command=pythonide)
    menu_bar.add_cascade(label="Run", menu=run_menu)

    # Set the menu bar
    window.config(menu=menu_bar)

    # Start the main program loop
    start_listening_thread = threading.Thread(target=listen_to_command)
    start_listening_thread.daemon = True
    start_listening_thread.start()
    window.mainloop()