openai / openai-python

The official Python library for the OpenAI API
https://pypi.org/project/openai/
Apache License 2.0
22.31k stars 3.11k forks source link

I couldn't upload file an use the one at thread normally via openai library... #1775

Open alex-deus opened 3 days ago

alex-deus commented 3 days ago

Confirm this is an issue with the Python library and not an underlying OpenAI API

Describe the bug

I uploaded file to vector storage, but I couldn't use the file at threads: It seems there was an error while trying to search the uploaded files. Could you please try uploading the file again, or let me know if there is a specific file you want me to look into?.

To Reproduce

  1. create vector_store
  2. create assistant
  3. upload file
  4. wait the uploading
  5. attach file to vector store
  6. create thread
  7. create run
  8. wait completing the run
  9. get messages
  10. take run's message

Code snippets

import json
import os
import time

from openai import OpenAI

client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

vector_store = client.beta.vector_stores.create(name="Test")

assistant = client.beta.assistants.create(
    description=f"Test",
    model="gpt-4o",
    tools=[{"type": "file_search"}],
    tool_resources={"file_search": {"vector_store_ids": [vector_store.id]}},
    temperature=0.4
)

# Upload file
file = client.files.create(file=("data.json", json.dumps({"name": "Alexbabaliks"}).encode()), purpose="assistants")
while True:
    file_status = client.files.retrieve(file_id=file.id)
    if file_status.status == 'processed':
        break
    time.sleep(1)

client.beta.vector_stores.files.create(vector_store_id=vector_store.id, file_id=file.id)
while True:
    vector_store = client.beta.vector_stores.retrieve(vector_store_id=vector_store.id)
    if vector_store.status == 'completed':
        break
    time.sleep(1)

thread = client.beta.threads.create(tool_resources={"file_search": {"vector_store_ids": [vector_store.id]}})

run = client.beta.threads.runs.create(
    instructions="What is my name??? Take it from JSON file and return JSON in format {'name': '<name>'}",
    thread_id=thread.id,
    assistant_id=assistant.id,
    model="gpt-4o",
    temperature=0.4,
    tools=[{"type": "file_search"}],
)

while True:
    run_status = client.beta.threads.runs.retrieve(thread_id=thread.id, run_id=run.id)
    if run_status.status == "completed":
        break
    elif run_status.status == "failed":
        break

    time.sleep(2)

answer = ""
messages = client.beta.threads.messages.list(thread_id=thread.id)
for message in messages.data:
    if run.id != message.run_id:
        continue

    for content in message.content:
        if content.type == "text":
            answer = content.text.value
            break

print(answer)

OS

Linux

Python version

3.11.1

Library version

openai v1.51.0

alex-deus commented 2 days ago

I came up with a solution, but it doesn’t work well for large JSON files (around 5MB), and sometimes for smaller simple file, it returns: {"n": "I couldn't find your given name in the provided documents."} :-D

import json
import os
import time

from openai import OpenAI

TEMPERATURE = 0.2

def ask(client: OpenAI, assistant_id: str, thread_id: str, file_id: str, instruction: str) -> str:
    client.beta.threads.messages.create(
        thread_id=thread_id,
        content=instruction,
        role="user",
        attachments=[{"file_id": file_id, "tools": [{"type": "file_search"}]}],
    )

    run = client.beta.threads.runs.create(
        thread_id=thread_id,
        model="gpt-4o",
        tools=[{"type": "file_search"}],
        assistant_id=assistant_id,
        temperature=TEMPERATURE
    )

    count = 0
    while True:
        run = client.beta.threads.runs.retrieve(thread_id=thread_id, run_id=run.id)
        if run.status == "completed":
            break
        elif run.status == "failed":
            raise Exception(f"{run.last_error.code}: {run.last_error.message}")

        count += 1
        if count > 30:
            raise Exception("Too many requests")
        else:
            time.sleep(4)

    answer = ""
    messages = client.beta.threads.messages.list(thread_id=thread_id)
    for message in messages.data:
        if run.id != message.run_id:
            continue

        for content in message.content:
            if content.type == "text":
                answer = content.text.value
                break

    return answer

def main() -> None:
    client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))

    # Create assistant
    assistant = client.beta.assistants.create(
        description="Test",
        instructions="Return all answer in JSON format",
        model="gpt-4o",
        tools=[{"type": "file_search"}],
        temperature=TEMPERATURE
    )

    # Upload file
    data = ("data.json", json.dumps({"given_name": "John", "family_name": "Smit"}).encode())
    file = client.files.create(file=data, purpose="assistants")

    thread = client.beta.threads.create()  # Create thread

    instruction = "What is my given_name? Answer format {'n': '<name>'}"
    answer = ask(client, assistant.id, thread.id, file.id, instruction)
    print(answer)  # Sometimes answer could be {"n": "I couldn't find your given name in the provided documents."}

    instruction = "What is my last family_name? Answer format {'l': '<last name>'}"
    answer = ask(client, assistant.id, thread.id, file.id, instruction)
    print(answer)

if __name__ == '__main__':
    main()