I use Anthropic's Claude models via Amazon Bedrock. I wanted to try the new model Claude 3.5 Sonnet v2 with PDF attachment support but it's not working. I am using Python and tried using both libraries boto3 (bedrock-runtime client) and anthropic.AnthropicBedrock. I could not find any documentation given by either AWS or Anthropic on how to attach PDF file so I'm just doing hit and trials.

Boto

For example, I tried using Bedrock playground and it works on UI. I exported the messages as JSON (see screenshot) and then coded the same in python using boto.

import os
import json
from io import BytesIO
from base64 import b64encode
from dotenv import load_dotenv

import boto3
from botocore.config import Config
from anthropic import AnthropicBedrock

def query_claude_v2():
    load_dotenv()

    prompt = "extract this file"
    input_pdf = "release/data/11.pdf"
    message_content = [{"type": "text", "text": prompt}]

    with open(input_pdf, "rb") as file:
        message_content.append(
            {
                "type": "document",
                "attrs": {
                    "format": "pdf",
                    "name": "11.pdf",
                    "source": {
                        "bytes": list(file.read())
                    }
                }
            }
        )

    client = boto3.Session().client(
        service_name="bedrock-runtime",
        region_name=os.environ["region"],
        aws_access_key_id=os.environ["AWS_ACCESS_KEY_ID"],
        aws_secret_access_key=os.environ["AWS_SECRET_ACCESS_KEY"],
        aws_session_token=os.environ["AWS_SESSION_TOKEN"],
        config=Config(read_timeout=600) # 10 minutes
    )

    native_request = {
        "anthropic_version": "bedrock-2023-05-31",
        "max_tokens": 8192,
        "messages": [{"role": "user", "content": message_content}],
        "temperature": 0.2
    }
    request = json.dumps(native_request)
    response = client.invoke_model(
        modelId="anthropic.claude-3-5-sonnet-20241022-v2:0",
        body=request
    )
    response_body = json.loads(response["body"].read())
    response_text = response_body["content"][0]["text"]

    return response_text

print(query_claude_v2())

It gives me this error:

botocore.errorfactory.ValidationException: An error occurred (ValidationException) when calling the InvokeModel operation: messages.0.content.1: Input tag 'document' found using 'type' does not match any of the expected tags: 'text', 'image', 'tool_use', 'tool_result'

Maybe AWS needs to add this support in their validator.

Anthropic

I also tried using Anthropic SDK for Python with AnthropicBedrock. Took help from the code of this PR #721 and coded in python

import os
import json
from io import BytesIO
from base64 import b64encode
from dotenv import load_dotenv

import boto3
from botocore.config import Config
from anthropic import AnthropicBedrock

def query_claude_v2_anthropic():
    load_dotenv()

    prompt = "extract this file"
    input_pdf = "release/data/11.pdf"
    with open(input_pdf, "rb") as f:
        pdf_stream = BytesIO(f.read())

    bedrock_client = AnthropicBedrock(
        aws_access_key=os.environ["AWS_ACCESS_KEY_ID"],
        aws_secret_key=os.environ["AWS_SECRET_ACCESS_KEY"],
        aws_session_token=os.environ["AWS_SESSION_TOKEN"],
        aws_region=os.environ["region"],
        timeout=600
    )
    response = bedrock_client.messages.create(
        max_tokens=8192,
        messages=[{
            "role": "user", "content": [
                {"type": "text", "text": prompt},
                {"type": "document", "source": {
                    "type": "base64",
                    "media_type": "application/pdf",
                    "data": b64encode(pdf_stream.getvalue()).decode("utf-8")
                }}
            ]
        }],
        model="anthropic.claude-3-5-sonnet-20241022-v2:0",
        temperature=0.2
    )

    return response.content[0].text

print(query_claude_v2_anthropic())

It gives me this error:

anthropic.BadRequestError: Error code: 400 - {'message': "messages.0.content.1: Input tag 'document' found using 'type' does not match any of the expected tags: 'text', 'image', 'tool_use', 'tool_result'"}

Thank you @RobertCraigie, I have modified both functions to include anthropic-beta headers but I am still getting the same error

import os
import json
from io import BytesIO
from base64 import b64encode
from dotenv import load_dotenv

import boto3
from botocore.config import Config
from anthropic import AnthropicBedrock

def query_claude_v2():
    load_dotenv()

    prompt = "extract this file"
    input_pdf = "/Users/shivam/Downloads/11-1-43-9-11.pdf"
    message_content = [{"type": "text", "text": prompt}]

    with open(input_pdf, "rb") as file:
        message_content.append(
            {
                "type": "document",
                "attrs": {
                    "format": "pdf",
                    "source": {
                        "bytes": list(file.read())
                    }
                }
            }
        )

    client = boto3.Session().client(
        service_name="bedrock-runtime",
        region_name=os.environ["region"],
        aws_access_key_id=os.environ["AWS_ACCESS_KEY_ID"],
        aws_secret_access_key=os.environ["AWS_SECRET_ACCESS_KEY"],
        aws_session_token=os.environ["AWS_SESSION_TOKEN"],
        config=Config(read_timeout=600) # 10 minutes
    )

    def add_custom_headers(request, **kwargs):
        request.headers['anthropic-beta'] = 'pdfs-2024-09-25'
        print(request)

    client.meta.events.register('before-send.bedrock-runtime.InvokeModel', add_custom_headers)

    native_request = {
        "anthropic_version": "bedrock-2023-05-31",
        "max_tokens": 8192,
        "messages": [{"role": "user", "content": message_content}],
        "temperature": 0.2
    }
    request = json.dumps(native_request)
    response = client.invoke_model(
        modelId="anthropic.claude-3-5-sonnet-20241022-v2:0",
        body=request
    )
    response_body = json.loads(response["body"].read())
    response_text = response_body["content"][0]["text"]

    return response_text

def query_claude_v2_anthropic():
    load_dotenv()

    prompt = "extract this file"
    input_pdf = "/Users/shivam/Downloads/11-1-43-9-11.pdf"
    with open(input_pdf, "rb") as f:
        pdf_stream = BytesIO(f.read())

    bedrock_client = AnthropicBedrock(
        aws_access_key=os.environ["AWS_ACCESS_KEY_ID"],
        aws_secret_key=os.environ["AWS_SECRET_ACCESS_KEY"],
        aws_session_token=os.environ["AWS_SESSION_TOKEN"],
        aws_region=os.environ["region"],
        timeout=600
    )
    response = bedrock_client.beta.messages.create(
        max_tokens=8192,
        messages=[{
            "role": "user", "content": [
                {"type": "text", "text": prompt},
                {"type": "document", "source": {
                    "type": "base64",
                    "media_type": "application/pdf",
                    "data": b64encode(pdf_stream.getvalue()).decode("utf-8")
                }}
            ]
        }],
        model="anthropic.claude-3-5-sonnet-20241022-v2:0",
        temperature=0.2,
        betas=['pdfs-2024-09-25']
    )

    return response.content[0].text

# print(query_claude_v2())
print(query_claude_v2_anthropic())

Error

anthropic.BadRequestError: Error code: 400 - {'message': "messages.0.content.1: Input tag 'document' found using 'type' does not match any of the expected tags: 'text', 'image', 'tool_use', 'tool_result'"}

I suppose AWS or Anthropic is running some kind of validation and is not allowing the type: document content type.

anthropics / anthropic-sdk-python

Bedrock Claude 3.5 Sonnet v2 is not supporting new attachments (PDF) #725

Boto

Anthropic