Integrating Groq LPU Inference Engine API with Verba #140

bakongi commented 2 months ago

Hi all!

Here an instruction how to integrate Groq API with Verba.

Obtain API from

  1. pip install groq
  2. Create "GroqGenerator.py" at goldenverba/components/generation folder. Paste this code:
    import os
    from dotenv import load_dotenv
    from groq import AsyncGroq

from collections.abc import Iterator from goldenverba.components.interfaces import Generator


GROQ_API_KEY = os.getenv("GROQ_API_KEY")

class GroqGenerator(Generator): """ Groq Generator. """

def __init__(self):
    self.name = "GroqGenerator"
    self.description = "Generator using Groq's LPU INFERENCE ENGINE"
    self.requires_library = ["groq"]
    self.requires_env = ["GROQ_API_KEY"]
    self.streamable = True
    self.model_name = os.getenv("GROQ_MODEL", "llama3-8b-8192")
    self.context_window = 8192

    # Initialize Groq client
    self.client = AsyncGroq(api_key=GROQ_API_KEY)

    # Define model details
    self.models = {
        "gemma-7b-it": { "tokens": 8192, "developer": "Google"},
        "llama3-70b-8192": { "tokens": 8192, "developer": "Meta"},
        "llama3-8b-8192": { "tokens": 8192, "developer": "Meta"},
        "mixtral-8x7b-32768": { "tokens": 32768, "developer": "Mistral"},

    # Select a model
    self.max_tokens = self.models[self.model_name]["tokens"]

async def generate_stream(self, queries: list[str], context: list[str], conversation: dict = None):
    Generate a stream of response dictionaries based on a list of queries, a list of contexts, and includes conversational context. This function uses an asynchronous API to generate responses incrementally, suitable for streaming applications where responses are expected to be delivered in real-time as they are generated.

    @parameter queries: list[str] - A list of user queries to which the generator should respond.
    @parameter context: list[str] - Contextual information relevant to each query, which helps the model to generate more accurate and relevant responses.
    @parameter conversation: dict - A dictionary representing the conversational history and context. This helps in maintaining the flow and relevance of the conversation.

    @returns: Iterator[dict] - An iterator that yields dictionaries representing parts of the generated response. Each dictionary includes a 'message' key with the generated text, and a 'finish_reason' key that indicates whether the generation has completed ('stop') or if it is still ongoing ('').
    if conversation is None:
        conversation = []
    messages = self.prepare_messages(queries, context, conversation)

        chat_completion = await self.client.chat.completions.create(

        async for chunk in chat_completion:
            finish_reason = chunk.choices[0].finish_reason
            if chunk.choices[0].delta.content:
                yield {
                    "message": chunk.choices[0].delta.content,
                    "finish_reason": ""
                yield {
                    "message": "",
                    "finish_reason": finish_reason,

    except Exception as e:
        print(f"An error occurred: {str(e)}")
        raise e

def prepare_messages(self, queries: list[str], context: list[str], conversation: list[dict]) -> dict[str, str]:
    Prepares a list of messages formatted for a Retrieval Augmented Generation chatbot system, including system instructions, previous conversation, and a new user query with context.

    @parameter queries: A list of strings representing the user queries to be answered.
    @parameter context: A list of strings representing the context information provided for the queries.
    @parameter conversation: A list of previous conversation messages that include the role and content.

    @returns A list of message dictionaries formatted for the chatbot. This includes an initial system message, the previous conversation messages, and the new user query encapsulated with the provided context.
    messages = [
            "role": "system",
            "content": "You are a Retrieval Augmented Generation chatbot. Please answer user queries only their provided context. If the provided documentation does not provide enough information, say so. If the answer requires code examples encapsulate them with ```programming-language-name ```. Don't do pseudo-code.",

    for message in conversation:

    query = " ".join(queries)
    user_context = " ".join(context)

            "role": "user",
            "content": f"Please answer this query: '{query}' with this provided context: {user_context}",

    return messages
3. Modify "manager.py":

from goldenverba.components.generation.ClaudeGenerator import ClaudeGenerator from goldenverba.components.generation.GroqGenerator import GroqGenerator

class GeneratorManager: def init(self): self.generators: dict[str, Generator] = { "ClaudeGenerator": ClaudeGenerator(), "GPT4Generator": GPT4Generator(), "GPT3Generator": GPT3Generator(), "CohereGenerator": CohereGenerator(), "Llama2Generator": Llama2Generator(), "GroqGenerator": GroqGenerator(),

we added `from goldenverba.components.generation.GroqGenerator import GroqGenerator` and `"GroqGenerator": GroqGenerator(), `

4. Modify "verba_manager.py" add:
    # Check Groq ENV KEY
        import groq

        groq_key = os.environ.get("GROQ_API_KEY", "")

        if groq_key != "":
            self.environment_variables["GROQ_API_KEY"] = True
            self.client = groq.Groq(api_key=groq_key)
            self.environment_variables["GROQ_API_KEY"] = False
            raise ValueError("GROQ_API_KEY environment variable is not set.")

    except Exception as e:
        self.environment_variables["GROQ_API_KEY"] = False
        print(f"Error initializing Groq API: {e}")
somewhere at 200+th line 

        import groq

        self.installed_libraries["groq"] = True
    except Exception:
        self.installed_libraries["groq"] = False
somewhere at 300+th line

5. Add this variables to your environment or .env file:




GROQ_MODEL can be: 

gemma-7b-it llama2-70b-4096 llama3-70b-8192 llama3-8b-8192 mixtral-8x7b-32768

thomashacker commented 2 months ago

Thanks! Feel free to create a PR