I was looking at forking this and implementing Chunking as most of us don't have access to the GPT-4-32K option as they stopped offering it weeks ago. Updating to include some sort of chunking would def help. Thoughts?
def make_openai_request(prompt, tokens=500):
model = "gpt-4" # Update this to the correct GPT-4 model identifier since we can't use 32k...
error_retry_count = 0
while error_retry_count < 3:
try:
print(f"Making API request for prompt: {prompt[:50]}...")
# Chunk the prompt into smaller pieces
chunk_size = 500 # Adjust as needed to stay within token limit
responses = []
for start in range(0, len(prompt), chunk_size):
end = start + chunk_size
response = openai.ChatCompletion.create(
model=model,
messages=[{
"role": "system",
"content": "You are a helpful assistant."
}, {
"role": "user",
"content": prompt[start:end]
}],
max_tokens=tokens
)
responses.append(response)
# Combine responses from all chunks
combined_response = ""
for response in responses:
if response and response['choices']:
combined_response += response['choices'][0]['message']['content'].strip()
else:
print("API response empty. Retrying...")
error_retry_count += 1
break
if combined_response:
print(f"API request successful for prompt: {prompt[:50]}")
return combined_response
except Exception as e:
print(f"An error occurred during the API call: {e}")
error_retry_count += 1
time.sleep(2) # Wait before retrying
print("Failed to get a successful API response after 3 tries.")
return None
I was looking at forking this and implementing Chunking as most of us don't have access to the GPT-4-32K option as they stopped offering it weeks ago. Updating to include some sort of chunking would def help. Thoughts?