Add task type that are messages to enable multi-modal tasks.

Resolves #4090

import asyncio
import base64
from io import BytesIO
import PIL.Image
from autogen_ext.models import OpenAIChatCompletionClient
from autogen_core.components import Image
from autogen_agentchat.messages import MultiModalMessage
from autogen_agentchat.agents import AssistantAgent

def gen_image():
    # Create an image with a straight horizontal line in the middle
    width, height = 100, 10  # Small image with a line across the center
    line_image = PIL.Image.new("RGB", (width, height), (255, 255, 255))  # White background

    # Draw a black line across the middle
    for x in range(width):
        line_image.putpixel((x, height // 2), (0, 0, 0))

    # Save image to a BytesIO object
    buffered_line = BytesIO()
    line_image.save(buffered_line, format="PNG")
    line_image_bytes = buffered_line.getvalue()

    # Encode the image with the line to base64
    return base64.b64encode(line_image_bytes).decode('utf-8')

async def main() -> None:
    agent = AssistantAgent("assistant", model_client=OpenAIChatCompletionClient(model="gpt-4o"))
    image_b64 = gen_image()
    result = await agent.run(task=MultiModalMessage(content=[Image.from_base64(image_b64)], source="user"))
    print(result)

asyncio.run(main())

source='user' models_usage=None content=[<autogen_core.components._image.Image object at 0x7f36b0116c30>]
source='assistant' models_usage=RequestUsage(prompt_tokens=294, completion_tokens=18) content='It appears to be a thin horizontal black line. How can I assist you with it?'
TaskResult(messages=[MultiModalMessage(source='user', models_usage=None, content=[<autogen_core.components._image.Image object at 0x7f36b0116c30>]), TextMessage(source='assistant', models_usage=RequestUsage(prompt_tokens=294, completion_tokens=18), content='It appears to be a thin horizontal black line. How can I assist you with it?')], stop_reason=None)

microsoft / autogen

Add task type that are messages to enable multi-modal tasks. #4091