DAMO-NLP-SG / VideoLLaMA2

VideoLLaMA 2: Advancing Spatial-Temporal Modeling and Audio Understanding in Video-LLMs
Apache License 2.0
871 stars 60 forks source link

Deployment on huggingface endpoints #86

Open aliayub40995 opened 2 months ago

aliayub40995 commented 2 months ago

I want to deploy the model on huggingface endpoints and run inference on it. I have created a handler.py file to deploy the model and a test.py file to run inference using API call to endpoint. Model is deployed successfully. What are the available ways to input the video to endpoint?

Handler.py:

from typing import Dict, List, Any
import sys
sys.path.append('./')
from videollama2 import model_init, mm_infer
from videollama2.utils import disable_torch_init
import logging
import os

class EndpointHandler:
    def __init__(self, path: str = ""):
        disable_torch_init()
        self.model_path = 'DAMO-NLP-SG/VideoLLaMA2-7B'
        self.model, self.processor, self.tokenizer = model_init(self.model_path)

    def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
        logging.info(f"Received data: {data}")  

        modal = None
        modal_path = None
        instruct = None

        inputs = data.get("inputs", data)
        modal = inputs.get("modal", "video")
        modal_path = inputs.get("modal_path", "")
        instruct = inputs.get("instruct", "")

        logging.info(f"Modal: {modal}, Modal Path: {modal_path}, Instruct: {instruct}")

        if not modal_path or not instruct:
            raise ValueError("Both 'modal_path' and 'instruct' must be provided in the input data.")

        output = mm_infer(
            self.processor[modal](modal_path), 
            instruct, 
            model=self.model, 
            tokenizer=self.tokenizer, 
            do_sample=False, 
            modal=modal
        )

        return [{"output": output}]

Test.py:

import requests

API_URL = ""
headers = {
    "Accept": "application/json",
    "Authorization": "", 
    "Content-Type": "application/json"
}

def query(payload):
    response = requests.post(API_URL, headers=headers, json=payload)
    return response.json()

payload = {
    "inputs": {
        "modal": "video", 
        "modal_path": "",  
        "instruct": "Describe what is happening in the video along with timestamps"
    }
}

output = query(payload)
print(output)
clownrat6 commented 2 months ago

Could you provide us a deployment demo for testing or Could you describe the problem when inputting video?

LiangMeng89 commented 3 days ago

I want to deploy the model on huggingface endpoints and run inference on it. I have created a handler.py file to deploy the model and a test.py file to run inference using API call to endpoint. Model is deployed successfully. What are the available ways to input the video to endpoint?

Handler.py:

from typing import Dict, List, Any
import sys
sys.path.append('./')
from videollama2 import model_init, mm_infer
from videollama2.utils import disable_torch_init
import logging
import os

class EndpointHandler:
    def __init__(self, path: str = ""):
        disable_torch_init()
        self.model_path = 'DAMO-NLP-SG/VideoLLaMA2-7B'
        self.model, self.processor, self.tokenizer = model_init(self.model_path)

    def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
        logging.info(f"Received data: {data}")  

        modal = None
        modal_path = None
        instruct = None

        inputs = data.get("inputs", data)
        modal = inputs.get("modal", "video")
        modal_path = inputs.get("modal_path", "")
        instruct = inputs.get("instruct", "")

        logging.info(f"Modal: {modal}, Modal Path: {modal_path}, Instruct: {instruct}")

        if not modal_path or not instruct:
            raise ValueError("Both 'modal_path' and 'instruct' must be provided in the input data.")

        output = mm_infer(
            self.processor[modal](modal_path), 
            instruct, 
            model=self.model, 
            tokenizer=self.tokenizer, 
            do_sample=False, 
            modal=modal
        )

        return [{"output": output}]

Test.py:

import requests

API_URL = ""
headers = {
    "Accept": "application/json",
    "Authorization": "", 
    "Content-Type": "application/json"
}

def query(payload):
    response = requests.post(API_URL, headers=headers, json=payload)
    return response.json()

payload = {
    "inputs": {
        "modal": "video", 
        "modal_path": "",  
        "instruct": "Describe what is happening in the video along with timestamps"
    }
}

output = query(payload)
print(output)

Hello,I'm a phD student from ZJU, I also use videollama2 to do my own research,we create a WeChat group to discuss some issues of videollama2 and help each other,could you join us? Please contact me: WeChat number == LiangMeng19357260600, phone number == +86 19357260600,e-mail == liangmeng89@zju.edu.cn.