langchain-ai / langchain-google

MIT License
117 stars 147 forks source link

Failed to chat image with base64 format. #612

Closed pleomax0730 closed 29 minutes ago

pleomax0730 commented 23 hours ago

Passed case

Environment:

langchain                                 0.3.4
langchain-community                       0.3.3
langchain-core                            0.3.19
langchain-google-community                2.0.1
langchain-google-genai                    2.0.1
langchain-google-vertexai                 2.0.7

Code to reproduce the error:

import base64
import os
from pathlib import Path
from urllib.error import URLError
from urllib.request import urlretrieve

from langchain_core.messages import HumanMessage, SystemMessage
from langchain_google_vertexai.chat_models import ChatVertexAI

REPO_URL = "https://github.com/langfuse/langfuse-python"
download_path = "static"
os.makedirs(download_path, exist_ok=True)

test_files = ["puton.jpg"]
raw_url = f"{REPO_URL}/raw/main/{download_path}"

for file in test_files:
    try:
        urlretrieve(f"{raw_url}/{file}", f"{download_path}/{file}")
        print(f"Successfully downloaded: {file}")
    except URLError as e:
        print(f"Failed to download {file}: {e}")
    except OSError as e:
        print(f"Failed to save {file}: {e}")

file_path = Path("static/puton.jpg")
with file_path.open("rb") as f:
    image_data = base64.b64encode(f.read()).decode("utf-8")

query = "What's in the picture?"

model_kwargs = {
    "temperature": 0.0,
    "max_output_tokens": 1000,
    "top_p": 0.95,
    "top_k": None,
}

llm = ChatVertexAI(model_name="gemini-1.5-flash-002", **model_kwargs)
llm.invoke(
    [
        SystemMessage(
            content="""Describe the image in detail. Example: {"description": "..."}"""
        ),
        HumanMessage(
            [
                {
                    "type": "text",
                    "text": query,
                },
                {
                    "type": "image_url",
                    "image_url": {"url": f"data:image/jpeg;base64,{image_data}"},
                },
            ]
        ),
    ]
)

Error:

Traceback (most recent call last):
  File "C:\Users\User\Desktop\code\test_fail.py", line 40, in <module>
    llm.invoke(
  File "C:\Users\User\miniconda3\envs\llm\lib\site-packages\langchain_core\language_models\chat_models.py", line 286, in invoke
    self.generate_prompt(
  File "C:\Users\User\miniconda3\envs\llm\lib\site-packages\langchain_core\language_models\chat_models.py", line 786, in generate_prompt
    return self.generate(prompt_messages, stop=stop, callbacks=callbacks, **kwargs)
  File "C:\Users\User\miniconda3\envs\llm\lib\site-packages\langchain_core\language_models\chat_models.py", line 643, in generate
    raise e
  File "C:\Users\User\miniconda3\envs\llm\lib\site-packages\langchain_core\language_models\chat_models.py", line 633, in generate
    self._generate_with_cache(
  File "C:\Users\User\miniconda3\envs\llm\lib\site-packages\langchain_core\language_models\chat_models.py", line 851, in _generate_with_cache
    result = self._generate(
  File "C:\Users\User\miniconda3\envs\llm\lib\site-packages\langchain_google_vertexai\chat_models.py", line 1220, in _generate
    return self._generate_gemini(
  File "C:\Users\User\miniconda3\envs\llm\lib\site-packages\langchain_google_vertexai\chat_models.py", line 1388, in _generate_gemini
    request = self._prepare_request_gemini(messages=messages, stop=stop, **kwargs)
  File "C:\Users\User\miniconda3\envs\llm\lib\site-packages\langchain_google_vertexai\chat_models.py", line 1301, in _prepare_request_gemini
    system_instruction, contents = _parse_chat_history_gemini(messages)
  File "C:\Users\User\miniconda3\envs\llm\lib\site-packages\langchain_google_vertexai\chat_models.py", line 307, in _parse_chat_history_gemini
    parts = _convert_to_parts(message)
  File "C:\Users\User\miniconda3\envs\llm\lib\site-packages\langchain_google_vertexai\chat_models.py", line 275, in _convert_to_parts
    part = _convert_to_prompt(raw_part)
  File "C:\Users\User\miniconda3\envs\llm\lib\site-packages\langchain_google_vertexai\chat_models.py", line 222, in _convert_to_prompt
    return ImageBytesLoader(project=project).load_gapic_part(path)
  File "C:\Users\User\miniconda3\envs\llm\lib\site-packages\langchain_google_vertexai\_image_utils.py", line 127, in load_gapic_part
    part = self.load_part(image_string)
  File "C:\Users\User\miniconda3\envs\llm\lib\site-packages\langchain_google_vertexai\_image_utils.py", line 124, in load_part
    return Part.from_image(Image.from_bytes(bytes_))
  File "C:\Users\User\miniconda3\envs\llm\lib\site-packages\vertexai\generative_models\_generative_models.py", line 2443, in from_image
    return Part.from_data(data=image.data, mime_type=image._mime_type)
  File "C:\Users\User\miniconda3\envs\llm\lib\site-packages\vertexai\generative_models\_generative_models.py", line 2901, in _mime_type
    return _FORMAT_TO_MIME_TYPE[self._pil_image.format.lower()]
KeyError: 'mpo'
jzaldi commented 1 hour ago

Cannot reproduce this error, seems to work fine.

May be a problem with google-cloud-aiplatform version

pleomax0730 commented 54 minutes ago

Cannot reproduce this error, seems to work fine.

May be a problem with google-cloud-aiplatform version

Hi @jzaldi , Thanks for the reply.

Found that pillow PIL_Image recognize the image mime type as mpo rather than JPEG.

Force saving as jpeg worked.

from PIL import Image
with Image.open('static/puton.jpg') as img:    
    img.save('static/puton.jpg', format='JPEG')