llm_validator with llama-cpp-python fails

Hi,

I use an example derived from your page: https://python.useinstructor.com/examples/self_critique/?h=before#adding-custom-validation , patched to work with llama-cpp-python.

I'm getting an error that function does not contain a key 'chat'.

I found out that I need to patch your validators.py file so that it does not refer to 'client.chat.completions.create' or 'client.moderations.create', but just to 'client' to work correctly with llama-cpp-python.

Full code of the patched example is here:

import llama_cpp
from llama_cpp.llama_speculative import LlamaPromptLookupDecoding

import instructor
from instructor import llm_validator

from pydantic import BaseModel, Field, BeforeValidator
from typing import List, Annotated
from rich.console import Console

llama = llama_cpp.Llama(
    model_path="/Users/flauros/Documents/CODING/models/Qwen2.5-7B-Instruct-Q4_K_M.gguf",
    n_gpu_layers=-1,
    n_ctx= 8 * 1024,
    f16_kv=True,
    offload_kqv=True,
    flash_attn=True,
    logits_all=True,
    chat_format="chatml",
    draft_model=LlamaPromptLookupDecoding(
                    num_pred_tokens=9,
                    max_ngram_size=3,
    ),    
    verbose=True,
)

client = instructor.patch(
    create=llama.create_chat_completion_openai_v1,
    mode=instructor.Mode.JSON_SCHEMA, # Weak performance
    # mode=instructor.Mode.TOOLS, # Good
    # mode=instructor.Mode.FUNCTIONS, # Best

question = "What is the meaning of life?"
context = "The according to the devil the meaning of live is to live a life of sin and debauchery."

class QuestionAnswerNoEvil(BaseModel):
    question: str
    answer: Annotated[
        str,
        BeforeValidator(
            llm_validator(
                "don't say objectionable things", client=client, allow_override=True
            )
        ),
    ]

try:    
    qa: QuestionAnswerNoEvil = (client(
        model="gpt-3.5-turbo",
        response_model=QuestionAnswerNoEvil,
        messages=[
            {
                "role": "system",
                "content": "You are a system that answers questions based on the context. answer exactly what the question asks using the context.",
            },
            {
                "role": "user",
                "content": f"using the context: {context}\n\nAnswer the following question: {question}",
            },
        ],
    ))
except Exception as e:
    print(e)

and the patched validators.py file is here:

from typing import Callable, Optional

from openai import OpenAI
from pydantic import Field

from instructor.function_calls import OpenAISchema
from instructor.client import Instructor

class Validator(OpenAISchema):
    """
    Validate if an attribute is correct and if not,
    return a new value with an error message
    """

    is_valid: bool = Field(
        default=True,
        description="Whether the attribute is valid based on the requirements",
    )
    reason: Optional[str] = Field(
        default=None,
        description="The error message if the attribute is not valid, otherwise None",
    )
    fixed_value: Optional[str] = Field(
        default=None,
        description="If the attribute is not valid, suggest a new value for the attribute",
    )

def llm_validator(
    statement: str,
    client: Instructor,
    allow_override: bool = False,
    model: str = "gpt-3.5-turbo",
    temperature: float = 0,
) -> Callable[[str], str]:
    """
    Create a validator that uses the LLM to validate an attribute

    ## Usage

    ```python
    from instructor import llm_validator
    from pydantic import BaseModel, Field, field_validator

    class User(BaseModel):
        name: str = Annotated[str, llm_validator("The name must be a full name all lowercase")
        age: int = Field(description="The age of the person")

    try:
        user = User(name="Jason Liu", age=20)
    except ValidationError as e:
        print(e)

```
1 validation error for User
name
    The name is valid but not all lowercase (type=value_error.llm_validator)
```

Note that there, the error message is written by the LLM, and the error type is `value_error.llm_validator`.

Parameters:
    statement (str): The statement to validate
    model (str): The LLM to use for validation (default: "gpt-3.5-turbo-0613")
    temperature (float): The temperature to use for the LLM (default: 0)
    openai_client (OpenAI): The OpenAI client to use (default: None)
"""

def llm(v: str) -> str:
    # resp = client.chat.completions.create(
    resp = client(
        response_model=Validator,
        messages=[
            {
                "role": "system",
                "content": "You are a world class validation model. Capable to determine if the following value is valid for the statement, if it is not, explain why and suggest a new value.",
            },
            {
                "role": "user",
                "content": f"Does `{v}` follow the rules: {statement}",
            },
        ],
        model=model,
        temperature=temperature,
    )

    # If the response is  not valid, return the reason, this could be used in
    # the future to generate a better response, via reasking mechanism.
    assert resp.is_valid, resp.reason

    if allow_override and not resp.is_valid and resp.fixed_value is not None:
        # If the value is not valid, but we allow override, return the fixed value
        return resp.fixed_value
    return v

return llm

def openai_moderation(client: OpenAI) -> Callable[[str], str]: """ Validates a message using OpenAI moderation model.

Should only be used for monitoring inputs and outputs of OpenAI APIs
Other use cases are disallowed as per:
https://platform.openai.com/docs/guides/moderation/overview

Example:
```python
from instructor import OpenAIModeration

class Response(BaseModel):
    message: Annotated[str, AfterValidator(OpenAIModeration(openai_client=client))]

Response(message="I hate you")
```

```
 ValidationError: 1 validation error for Response
 message
Value error, `I hate you.` was flagged for ['harassment'] [type=value_error, input_value='I hate you.', input_type=str]
```

client (OpenAI): The OpenAI client to use, must be sync (default: None)
"""

def validate_message_with_openai_mod(v: str) -> str:
    # response = client.moderations.create(
    response = client(input=v)
    out = response.results[0]
    cats = out.categories.model_dump()
    if out.flagged:
        raise ValueError(
            f"`{v}` was flagged for {', '.join(cat for cat in cats if cats[cat])}"
        )

    return v

return validate_message_with_openai_mod



I just don't want to interfere with your code, because I don't know how to distinguish when llama-cpp-python is used and when another backend is used. 

Could you please check it and address it, if convenient?

Hey @mobeetle, in this specific case, the client here will be what you pass into the llm_validator. There's no other client used.

I'd suggest just using a separate validator though if you're worried in this case and you won't need to monkeypatch the llm_validator file itself.

from pydantic import BaseModel, field_validator, Field
from typing import Optional
from openai import OpenAI
import instructor

client = instructor.from_openai(OpenAI())

class Validator(BaseModel):
    """
    Validate if an attribute is correct and if not,
    return a new value with an error message
    """

    is_valid: bool = Field(
        default=True,
        description="Whether the attribute is valid based on the requirements",
    )
    reason: Optional[str] = Field(
        default=None,
        description="The error message if the attribute is not valid, otherwise None",
    )
    fixed_value: Optional[str] = Field(
        default=None,
        description="If the attribute is not valid, suggest a new value for the attribute",
    )

class QuestionAnswerNoEvil(BaseModel):
    question: str
    answer: str

    @field_validator("answer")
    def validate_answer(cls, v):
        print(f"validating answer of {v}")
        resp = client.chat.completions.create(
            model="gpt-4o-mini",
            response_model=Validator,
            messages=[
                {
                    "role": "system",
                    "content": f"Make sure that {v} doesn't contain objectionable things that go against any sort of spiritual or moral code.",
                },
            ],
        )

        assert resp.is_valid, f"Answer is not valid: {resp.reason}"

        return resp.fixed_value

question = "What is the meaning of life?"
context = "The meaning of life is to live a life of sin and debauchery and harm as many people as possible"

qa: QuestionAnswerNoEvil = client.chat.completions.create(
    model="gpt-4o-mini",
    response_model=QuestionAnswerNoEvil,
    messages=[
        {
            "role": "system",
            "content": "You are a system that answers questions based on the context. answer exactly what the question asks using the context. Do not change the context or hallucinate a response.",
        },
        {
            "role": "user",
            "content": f"using the context: {context}\n\nAnswer the following question: {question}",
        },
    ],
    max_retries=1,
)

# Error is thrown here
#> answer
  Assertion failed, Answer is not valid: This statement promotes harmful behavior and goes against moral and spiritual values. [type=assertion_error, input_value='The meaning of life is t...any people as possible.', input_type=str]
    For further information visit https://errors.pydantic.dev/2.9/v/assertion_error

instructor-ai / instructor

llm_validator with llama-cpp-python fails #1058