Closed truebit closed 8 months ago
Would be exciting! Feel free to add a custom client for it through following this documentation or mirroring existing LM client integrations.
The current integration of the OpenAI SDK in dsp/modules/databricks.py will be useful in adding the vision LM integration as well.
I have created a custom module, the sample code like below:
class GPT4V(dspy.Module):
def __init__(self):
super().__init__()
self.generate_answer = dspy.ChainOfThought("question -> answer")
def forward(self, question, *images):
# context = self.retrieve(question).passages
config = {'images': images}
answer = self.generate_answer(question=question, config=config)
return answer
Notice: We should use config
to pass customized params to the custom model
@truebit
Hi, thanks for this.
I'm having trouble running this with:
import cv2
import numpy as np
import base64
#The current integration of the OpenAI SDK in dsp/modules/databricks.py will be useful in adding the vision LM integration as well.
#
# https://github.com/stanfordnlp/dspy/issues/459
def encode_image_to_string_from_path(image_path: str) -> str:
import base64
import cv2
# Read the image from the given path
image = cv2.imread(image_path)
if image is None:
raise ValueError("Image not found or the path is incorrect")
# Encode the image to a JPEG format in memory
_, buffer = cv2.imencode('.jpg', image)
# Encode the buffer to a base64 string
return base64.b64encode(buffer).decode("utf-8")
class GPT4V(dspy.Module):
def __init__(self):
super().__init__()
self.generate_answer = dspy.ChainOfThought("question -> answer")
def forward(self, question, *images):
# context = self.retrieve(question).passages
config = {'images': images}
answer = self.generate_answer(question=question, config=config)
return answer
# Set up the LM
gpt_vision = dspy.OpenAI(model='gpt-4-vision-preview', max_tokens=1800)
dspy.settings.configure(lm=gpt_vision)
# construct the module
gptv_mod = GPT4V()
# Call with input arguments.
img = encode_image_to_string_from_path("./chicken_cluster.jpg")
response = gptv_mod("What is the main subject of the image?", img)
response.completions.answer
With the following output:
Traceback (most recent call last):
File "/home/antoan/dev/ai/dspy/main.py", line 47, in <module>
response = gptv_mod("What is the main subject of the image?", img)
File "/home/antoan/dev/ai/dspy/.venv/lib/python3.10/site-packages/dspy/primitives/program.py", line 26, in __call__
return self.forward(*args, **kwargs)
File "/home/antoan/dev/ai/dspy/main.py", line 36, in forward
answer = self.generate_answer(question=question, config=config)
File "/home/antoan/dev/ai/dspy/.venv/lib/python3.10/site-packages/dspy/predict/predict.py", line 49, in __call__
return self.forward(**kwargs)
File "/home/antoan/dev/ai/dspy/.venv/lib/python3.10/site-packages/dspy/predict/chain_of_thought.py", line 59, in forward
return super().forward(signature=signature, **kwargs)
File "/home/antoan/dev/ai/dspy/.venv/lib/python3.10/site-packages/dspy/predict/predict.py", line 91, in forward
x, C = dsp.generate(template, **config)(x, stage=self.stage)
File "/home/antoan/dev/ai/dspy/.venv/lib/python3.10/site-packages/dsp/primitives/predict.py", line 77, in do_generate
completions: list[dict[str, Any]] = generator(prompt, **kwargs)
File "/home/antoan/dev/ai/dspy/.venv/lib/python3.10/site-packages/dsp/modules/gpt3.py", line 184, in __call__
response = self.request(prompt, **kwargs)
File "/home/antoan/dev/ai/dspy/.venv/lib/python3.10/site-packages/backoff/_sync.py", line 105, in retry
ret = target(*args, **kwargs)
File "/home/antoan/dev/ai/dspy/.venv/lib/python3.10/site-packages/dsp/modules/gpt3.py", line 150, in request
return self.basic_request(prompt, **kwargs)
File "/home/antoan/dev/ai/dspy/.venv/lib/python3.10/site-packages/dsp/modules/gpt3.py", line 123, in basic_request
response = chat_request(**kwargs)
File "/home/antoan/dev/ai/dspy/.venv/lib/python3.10/site-packages/dsp/modules/gpt3.py", line 271, in chat_request
return v1_cached_gpt3_turbo_request_v2_wrapped(**kwargs).model_dump()
File "/home/antoan/dev/ai/dspy/.venv/lib/python3.10/site-packages/dsp/modules/cache_utils.py", line 16, in wrapper
return func(*args, **kwargs)
File "/home/antoan/dev/ai/dspy/.venv/lib/python3.10/site-packages/dsp/modules/gpt3.py", line 264, in v1_cached_gpt3_turbo_request_v2_wrapped
return v1_cached_gpt3_turbo_request_v2(**kwargs)
File "/home/antoan/dev/ai/dspy/.venv/lib/python3.10/site-packages/joblib/memory.py", line 655, in __call__
return self._cached_call(args, kwargs)[0]
File "/home/antoan/dev/ai/dspy/.venv/lib/python3.10/site-packages/joblib/memory.py", line 598, in _cached_call
out, metadata = self.call(*args, **kwargs)
File "/home/antoan/dev/ai/dspy/.venv/lib/python3.10/site-packages/joblib/memory.py", line 856, in call
output = self.func(*args, **kwargs)
File "/home/antoan/dev/ai/dspy/.venv/lib/python3.10/site-packages/dsp/modules/gpt3.py", line 258, in v1_cached_gpt3_turbo_request_v2
return openai.chat.completions.create(**kwargs)
File "/home/antoan/dev/ai/dspy/.venv/lib/python3.10/site-packages/openai/_utils/_utils.py", line 275, in wrapper
return func(*args, **kwargs)
TypeError: Completions.create() got an unexpected keyword argument 'images'
Would this not require creating a client wrapper to give it the input images, similar to the databricks integration as mentioned earlier here?
its because you have to make a change here https://github.com/stanfordnlp/dspy/pull/675
DSPY currently only supports llm models, but openai library also supports vison language models. Could we also support gpt-4v requests?