Describe the bug

I am trying to run the swe agent on local llm without Open ai key, For this I have modified the OpenAI Class to access the gradio url. In Gradio I have hosted a model "Code Qwen 0.5B".

Below is the modifed Open AI Class to integrate the gradio.

`
class OpenAIModel(BaseModel): MODELS = { "gpt-3.5-turbo-0125": { "max_context": 16_385, "cost_per_input_token": 5e-07, "cost_per_output_token": 1.5e-06, }, "gpt-3.5-turbo-1106": { "max_context": 16_385, "cost_per_input_token": 1.5e-06, "cost_per_output_token": 2e-06, }, "gpt-3.5-turbo-16k-0613": { "max_context": 16_385, "cost_per_input_token": 1.5e-06, "cost_per_output_token": 2e-06, }, "gpt-4-32k-0613": { "max_context": 32_768, "cost_per_input_token": 6e-05, "cost_per_output_token": 0.00012, }, "gpt-4-0613": { "max_context": 8_192, "cost_per_input_token": 3e-05, "cost_per_output_token": 6e-05, }, "gpt-4-1106-preview": { "max_context": 128_000, "cost_per_input_token": 1e-05, "cost_per_output_token": 3e-05, }, "gpt-4-0125-preview": { "max_context": 128_000, "cost_per_input_token": 1e-05, "cost_per_output_token": 3e-05, }, "gpt-4-turbo-2024-04-09": { "max_context": 128_000, "cost_per_input_token": 1e-05, "cost_per_output_token": 3e-05, }, }

SHORTCUTS = {
    "gpt3": "gpt-3.5-turbo-1106",
    "gpt3-legacy": "gpt-3.5-turbo-16k-0613",
    "gpt4": "gpt-4-1106-preview",
    "gpt4-legacy": "gpt-4-0613",
    "gpt4-0125": "gpt-4-0125-preview",
    "gpt3-0125": "gpt-3.5-turbo-0125",
    "gpt4-turbo": "gpt-4-turbo-2024-04-09",
}

def __init__(self, args: ModelArguments, commands: list[Command]):
    super().__init__(args, commands)

    # Set OpenAI key
    cfg = config.Config(os.path.join(os.getcwd(), "keys.cfg"))
    api_key = cfg.get("OPENAI_API_KEY", None)
    if api_key:
        self.client = OpenAI(api_key=api_key, base_url="https://api.openai.com")
    else:
        self.api_url = "https://451edefe4e25c41ffb.gradio.live/api/predict/"

def history_to_messages(
        self,
        history: list[dict[str, str]],
        is_demonstration: bool = False,
) -> str | list[dict[str, str]]:
    """
    Create `messages` by filtering out all keys except for role/content per `history` turn
    """
    # Remove system messages if it is a demonstration
    if is_demonstration:
        history = [entry for entry in history if entry["role"] != "system"]
        return "\n".join([entry["content"] for entry in history])
    # Return history components with just role, content fields
    return [{k: v for k, v in entry.items() if k in ["role", "content"]} for entry in history]

def query(self, history: list[dict[str, str]], is_demonstration: bool = False) -> str:
    """Query the custom Gradio API with the given prompt and return the response."""
    # Format the history into messages
    messages = self.history_to_messages(history, is_demonstration)

    # For demonstration, messages is a single string
    if is_demonstration:
        prompt = messages
    else:
        prompt = json.dumps(messages)

    try:
        response = requests.post(self.api_url, json={"prompt": prompt})
        if response.status_code == 200:
            return response.json()  # Adjust depending on the actual response structure
        else:
            return "Failed to get response: " + response.text
    except Exception as e:
        return f"An error occurred: {str(e)}"

And Below is the Hosted Model Gradio Code.

`from transformers import AutoModelForCausalLM, AutoTokenizer device = "cuda" # the device to load the model onto

model = AutoModelForCausalLM.from_pretrained( "Qwen/Qwen1.5-0.5B-Chat", torch_dtype="auto", device_map="auto" ) tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen1.5-0.5B-Chat")

prompt = "Give me a short introduction to large language model." messages = [ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": prompt} ] text = tokenizer.apply_chat_template( messages, tokenize=False, add_generation_prompt=True ) model_inputs = tokenizer([text], return_tensors="pt").to(device)

generated_ids = model.generate( model_inputs.input_ids, max_new_tokens=512 ) generated_ids = [ output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids) ]

response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]

from transformers import AutoModelForCausalLM, AutoTokenizer from gradio import Interface import torch

Load the Qwen model (assuming you have transformers installed)

device = "cuda" if torch.cuda.is_available() else "cpu" # Use GPU if available model = AutoModelForCausalLM.from_pretrained( "Qwen/Qwen1.5-0.5B-Chat", torch_dtype="auto", device_map="auto" ) tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen1.5-0.5B-Chat")

def generate_introduction(prompt): """ Generates an introduction to large language models using the Qwen model, formatted for a specific system.

Args:
    prompt: A string prompt for the model.

Returns:
    A string containing the formatted response as required by the system.
"""
# Generate the model response
messages = [
    {"role": "system", "content": "You are a helpful assistant."},
    {"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(
    messages,
    tokenize=False,
    add_generation_prompt=True
)
model_inputs = tokenizer([text], return_tensors="pt").to(device)

generated_ids = model.generate(
    model_inputs.input_ids,
    max_new_tokens=512
)
generated_ids = [
    output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]

response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]

return response.strip()

Define the Gradio interface

interface = Interface( fn=generate_introduction, inputs="text", outputs="text", title="Introduction to Large Language Models", description="Enter a prompt to get a short introduction to large language models generated by the Qwen model." )

Launch the Gradio interface

interface.launch()

After pasting gradio url into models.py openAI class I run the project.

But When I run below command to start the project.

python3.9 run.py --model_name gpt4 --data_path /home/uzair/project/test-repo/problem_statements/1.md --repo_path /home/uzair/project/test-repo --config_file config/default_from_url.yaml --apply_patch_locally

It starts and agent copies the local issue from local repo, after copying it shows some format error given below:

` WARNING FORMAT ERROR Your output was not formatted correctly. You must always include one discussion and one command as part of your response. Make sure you do not have multiple discussion/command tags. Please make sure your output precisely matches the following format: DISCUSSION Discuss here with yourself about what your planning and what you're going to do in this step.

     ```
     command(s) that you're going to run
     ```

WARNING Malformat limit reached: Failed to get response: {"detail":[{"type":"missing","loc":["body","data"],"msg":"Field required","input":{"prompt":"[{\"role\": \"system\", \"content\": \"SETTING: You are an autonomous programmer, `

Questions

1) Can I run swe agent with local llm? 2) Can you help in resolving error? 3) Is it possible to acheive what I am trying to do?

Steps/commands/code to Reproduce

Error message/results

WARNING FORMAT ERROR Your output was not formatted correctly. You must always include one discussion and one command as part of your response. Make sure you do not have multiple discussion/command tags. Please make sure your output precisely matches the following format: DISCUSSION Discuss here with yourself about what your planning and what you're going to do in this step.

     ```
     command(s) that you're going to run
     ```

System Information

Windows Core i7 8 gen

Checklist

[X] I'm running with the latest docker container/on the latest development version
[X] I've searched the other issues for a duplicate
[X] I have copied the full command/code that I ran (as text, not as screenshot!)
[X] If applicable: I have copied the full log file/error message that was the result (as text, not as screenshot!)
[X] I have enclosed code/log messages in triple backticks (docs) and clicked "Preview" to make sure it's displayed correctly.

princeton-nlp / SWE-agent

FORMAT ERROR : Your output was not formatted correctly. You must always include one discussion and one command as part of your response. #454