InternLM / InternLM-XComposer

InternLM-XComposer-2.5: A Versatile Large Vision Language Model Supporting Long-Contextual Input and Output
Apache License 2.0
2.47k stars 153 forks source link

fastapi部署报错 #267

Closed Zking668 closed 5 months ago

Zking668 commented 5 months ago

这是我的服务端调用代码

import os
import io
import json
from PIL import Image
import torch
import numpy as np
from transformers import AutoModelForCausalLM, AutoTokenizer
from fastapi import FastAPI, Request
import datetime
import uvicorn
from fastapi.middleware.cors import CORSMiddleware
import base64

# Initialize FastAPI app
app = FastAPI()

app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["GET", "POST", "PUT", "DELETE", "OPTIONS"],
    allow_headers=["*"],
)

# 加载模型和 tokenizer
code_path = '/root/models/internlm-xcomposer2-vl-7b'
tokenizer = AutoTokenizer.from_pretrained(code_path, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(code_path, device_map='cuda', trust_remote_code=True).half().eval()

# Define endpoint for handling POST requests
@app.post('/')
async def generate_text(request: Request):
    # Read the JSON data from the request
    json_post_raw = await request.json()
    print("Start to process request")

    json_post = json.dumps(json_post_raw)
    request_data = json.loads(json_post)

    # Parse JSON data
    history = request_data.get("history")
    image_base64 = request_data.get("image")
    query = request_data.get("text")

    print(query)
    print(image_base64[:100])

    image_tensor = None
    if image_base64:
        image_encode = image_base64.split(',')[1]
        image_encode = base64.b64decode(image_encode)
        image = Image.open(io.BytesIO(image_encode)).convert('RGB')
        # image_tensor = model.vis_processor(image).permute(2, 0, 1).unsqueeze(0)

        image_tensor = torch.tensor(np.array(image)).permute(2, 0, 1)

    print(image_tensor.shape)

    # Call chat function to generate response
    response, updated_history = model.chat(
        tokenizer=tokenizer,
        query=query,
        image=image_tensor,
        history=history,
        max_new_tokens=150,
        do_sample=True,
        temperature=0.9,
        top_p=0.9,
        repetition_penalty=1.2
    )

    # Prepare response data
    response_data = {
        "response": response,
        "history": updated_history
    }

    return response_data

# Run the FastAPI app
if __name__ == "__main__":
    uvicorn.run(app, host='0.0.0.0', port=6006, workers=1)

上传图片和文字后,报错信息如下: image

image 我想请问这里接受的tensor的要求是什么啊?应该怎样处理接受的图片?

yuhangzang commented 5 months ago

Please refer to this function about the tensor shape of input image(s), which could be a path string or a torch tensor.