Same Seed, Different Result using LM Studio API

I'm working on a project , and I need to track the seed used in each generation so that I can reproduce the output when needed using the same config ( and same seed ). However, I find that it's not always the case.

I tried using the seed 42, and it gave me the exact same result each time with the same config. When I tried a larger number 1715852364 ( which I usually get from the epoch time ) I found out that it gives different results.

Output with seed 42 (Exactly the same twice ):
Output with seed 1715852364 ( Completly different ):

Here is the code I used to produce this bug (which is part of my code project): lm_studio.py :

import requests
try :
    from base import BaseModel
except :
    from models.ai.base import BaseModel
from typing import Any, Dict
from openai import OpenAI
import json

class LMStudioModel(BaseModel):
    def __init__(self, api_url: str, headers: Dict[str, str], config: Dict[str, Any]) -> None:
        super().__init__(api_url, headers, config)
        self.client = OpenAI(base_url=api_url, api_key="not-needed")

    def __str__(self) -> str:
        return "LMStudioModel"

    def __repr__(self) -> str:
        return f"{self.__class__.__name__}(api_url={self.api_url}, headers={self.headers}, config={self.config})"

    @staticmethod
    def load_config(config_path: str) -> Dict[str, Any]:
        return super(LMStudioModel, LMStudioModel).load_config(config_path)

    def generate_text(self, prompt: str, parameters: Dict[str, Any]) -> Any:
        # Adjust parameters based on the method signature and expected parameters
        data = {
            "messages": [
                {"role": "system", "content": parameters.get("instructions", "You are an intelligent assistant. You always provide well-reasoned answers that are both correct and helpful.")},
                {"role": "user", "content": prompt}
            ],
            "temperature": parameters.get("temperature", 0.7),
            "max_tokens": parameters.get("max_tokens", -1),
            "stream": parameters.get("stream", False)
        }
        response = requests.post(self.api_url + "/chat/completions", headers=self.headers, json=data)
        try:
            response.raise_for_status()
            return response.json()
        except Exception as e:
            return {"error": str(e)}

    def predict(self, prompt: str, params: Dict[str, Any] = None) -> Any:
        if params is None:
            params = self.config.get('default_parameters', {})
        response = self.generate_text(prompt, params)
        return response

    def inference(self, prompt, seed=None) -> str:
        chat_completion = self.client.chat.completions.create(
            messages=[
                {
                    "role": "user",
                    "content": prompt.strip(),
                }
            ],
            model="not-needed", # unused 
            seed=seed
        )
        return chat_completion.choices[0].message.content

    def sys_inference(self, sys_prompt: str, usr_prompt: str, seed=None) -> str:
        print("Using seed %s with type %s" % (seed, type(seed)))
        chat_completion = self.client.chat.completions.create(
            messages=[
                {"role": "system", "content": sys_prompt},
                {
                    "role": "user",
                    "content": usr_prompt.strip(),
                }
            ],
            model="not-needed", # unused 
            temperature=0.7,
            seed=seed
        )
        return chat_completion.choices[0].message.content

    def interactive_prompt(self):
        print("You are now chatting with the intelligent assistant. Type something to start the conversation.")
        history = [
            {"role": "system", "content": "You are an intelligent assistant. You always provide well-reasoned answers that are both correct and helpful."},
            {"role": "user", "content": "Hello, introduce yourself to someone opening this program for the first time. Be concise."},
        ]

        while True:
            messages = history[-2:]  # Consider only the last system message and the last user message for brevity
            completion = self.client.chat.completions.create(
                model="local-model",  # this field is currently unused
                messages=messages,
                temperature=0.7,
                max_tokens=150,
                stream=True
            )   
            new_message = {"role": "assistant", "content": ""}
            for chunk in completion:
                if chunk.choices[0].delta.content:
                    print(chunk.choices[0].delta.content, end="", flush=True)
                    new_message["content"] += chunk.choices[0].delta.content

            history.append(new_message)
            print()
            # Capture user input
            user_input = input("> ")
            if user_input.lower() == 'quit':
                print("Exiting interactive prompt...")
                break
            history.append({"role": "user", "content": user_input})

    def update_token(self, new_token: str) -> None:
        self.headers['Authorization'] = f"Bearer {new_token}"

    def calc_tokens(self, prompt: str) -> int:
        # Simplified token calculation; you might want to adjust this according to your actual tokenization logic
        return len(prompt.split())

    @classmethod
    def setup_from_config(cls, config_path: str):
        config = cls.load_config(config_path)
        api_url = config.get("api_url", "http://localhost:1234/v1")  # Default to example URL
        headers = {"Content-Type": "application/json"}  # Default header for JSON content
        headers.update(config.get("headers", {}))  # Update with any additional headers from config
        return cls(api_url=api_url, headers=headers, config=config)

    @classmethod
    def setup_from_dict(cls, config_json: Dict[str, Any] | str ):
        if isinstance(config_json, dict):
            api_url = config_json.get("api_url", "http://localhost:1234/v1")  # Default to example URL
            headers = {"Content-Type": "application/json"}  # Default header for JSON content
            headers.update(config_json.get("headers", {}))  # Update with any additional headers from config
            return cls(api_url=api_url, headers=headers, config=config_json)
        elif isinstance(config_json, str): # if it's a string, convert it to a dict
            config : dict = json.loads(config_json)
            return cls.setup_from_dict(config)

# Example usage
if __name__ == '__main__':
    config_path = "configs/lm_studio.config.json"
    lm_studio = LMStudioModel.setup_from_config(config_path)
    # print(lm_studio.sys_inference(sys_prompt="You are a helpful assistant", usr_prompt="Hello there", seed=42))
    print(lm_studio.sys_inference(sys_prompt="You are a helpful assistant", usr_prompt="Hello there", seed=1715852364))
    # lm_studio.interactive_prompt()

base.py :

from abc import ABC, abstractmethod
from typing import Any, Dict
import json 
import os 
import sys 

class BaseModel(ABC):
    """
    Abstract base class for models to interact with APIs and perform data processing.
    """

    def __init__(self, api_url: str, headers: Dict[str, str], config: Dict[str, Any]) -> None:
        self.api_url = api_url
        self.headers = headers
        self.config = config

    @staticmethod
    @abstractmethod
    def load_config(config_path: str) -> Dict[str, Any]:
        """
        Loads configuration from a specified path.
        """
        with open(config_path, 'r') as file:
            return json.load(file)

    @abstractmethod
    def generate_text(self, prompt: str, parameters: Dict[str, Any]) -> Any:
        """
        Generates text based on a prompt and parameters.
        This method needs to be implemented by the subclass.
        """
        pass

    @abstractmethod
    def predict(self, prompt: str, params: Dict[str, Any]) -> Any:
        """
        Processes a prompt and returns a prediction.
        This method needs to be implemented by the subclass.
        """
        pass

    @abstractmethod
    def inference(self) -> str:
        """
        Performs inference using the model.
        This method needs to be implemented by the subclass.
        """
        pass
    @abstractmethod
    def sys_inference(self, sys_prompt, user_prompt, seed:int | None =None) -> str:
        """
        Performs inference using the model with system prompt .
        This method needs to be implemented by the subclass.
        """
        pass

    @abstractmethod
    def update_token(self, new_token: str) -> None:
        """
        Updates the API token used for authentication.
        This method needs to be implemented by the subclass.
        """
        pass

    @abstractmethod
    def calc_tokens(self, prompt: str) -> int:
        """
        Calculates the number of tokens in a prompt.
        This method needs to be implemented by the subclass.
        """
        pass

    def interactive_prompt(self) -> None:
        """
        Optional: Implement an interactive prompt for testing purposes.
        This method can be overridden by subclasses for specific interactive functionality.
        """
        print("This method can be overridden by subclasses.")

    @classmethod
    def setup_from_config(cls, config_path: str):
        """
        Sets up the model based on the specified configuration.
        This method must be implemented by subclasses.
        """
        pass

    def setup_from_dict(cls, config_json: Dict[str, Any] | str ):
        """
        Sets up the model based on the specified configuration.
        This method must be implemented by subclasses.
        """
        pass

configs/lm_studio.config.json :

{
    "api_url": "http://localhost:1234/v1",
    "instructions": "You are a helpful AI Assistant.",
    "default_parameters":{
        "temperature": 0.7,
        "max_tokens": -1,
        "stream": false
    }
}

I only know that LM Studio uses llama.cpp, but not sure if it has to do with the size of the seed, if so what's the maximum integer where the same seed will always give the same results ?

lmstudio-ai / lmstudio-bug-tracker

Same Seed, Different Result using LM Studio API #19