bytedance / Shot2Story

A new multi-shot video understanding benchmark Shot2Story with comprehensive video summaries and detailed shot-level captions.
https://mingfei.info/shot2story
90 stars 6 forks source link

run demo_video.py error #16

Open lujuncong2000 opened 2 days ago

lujuncong2000 commented 2 days ago

Hello! I modify demo_video.py like: import argparse import os import random

import numpy as np import torch import torch.backends.cudnn as cudnn import gradio as gr

import lavis.tasks as tasks

from lavis.common.config import Config from lavis.common.dist_utils import get_rank from lavis.common.registry import registry from lavis.conversation.conversation import Chat, CONV_VISION_MS, CONV_VISION_MS_TEXT imports modules for registration

from lavis.datasets.builders import from lavis.models import from lavis.processors import from lavis.runners import from lavis.tasks import *

def parse_args(): parser = argparse.ArgumentParser(description="Demo") parser.add_argument("--cfg-path", default="lavis/projects/blip2/eval/demo.yaml", help="path to configuration file.") parser.add_argument("--gpu-id", type=int, default=0, help="specify the gpu to load the model.") parser.add_argument( "--options", nargs="+", help="override some settings in the used config, the key-value pair " "in xxx=yyy format will be merged into config file (deprecate), " "change to --cfg-options instead.", ) args = parser.parse_args() return args

def setup_seeds(config): seed = config.run_cfg.seed + get_rank()

random.seed(seed) np.random.seed(seed) torch.manual_seed(seed)

cudnn.benchmark = False cudnn.deterministic = True

print('Initializing Chat') args = parse_args() cfg = Config(args)

model_config = cfg.model_cfg model_config.device_8bit = args.gpu_id model_cls = registry.get_model_class(model_config.arch) print("model_cls", model_cls) model = model_cls.from_config(model_config).to('cuda:{}'.format(args.gpu_id))

TODO

task = tasks.setup_task(cfg) dataset = task.build_datasets(cfg)

print(cfg.dict) pre_cfg = cfg.config.preprocess

vis_processor_cfg = pre_cfg.vis_processor.eval print(vis_processor_cfg.dict) vis_processor = registry.get_processor_class(vis_processor_cfg.name).from_config(vis_processor_cfg)

vis_processors, txt_processors = load_preprocess(pre_cfg)

vis_processor = vis_processors['eval']

chat = Chat(model, vis_processor, task=task, dataset=dataset, device='cuda:{}'.format(args.gpu_id)) print('Initialization Finished')

def upload_vid(gr_vid, temperature=0.1, input_splits=""): chat_state = CONV_VISION_MS.copy() if input_splits == 'Automatic detection': input_splits = '' img_list = [] llm_message = chat.upload_video_ms_standalone(gr_vid, chat_state, img_list, input_splits=input_splits) chat.ask("Please describe this video in detail.", chat_state) summary = chat.answer(conv=chat_state, num_beams=1, temperature=temperature, max_new_tokens=650, max_length=2048)[0][0] print(gr_vid, summary) return summary

import os

video_path = "/media/cv/09C1B27DA5EB573A/ASIT" video_child_path = os.listdir(video_path)

for v_id in video_child_path: upload_vid(video_child_path)

But the error is: Initialization Finished /hy-tmp/Shot2Story-temp/code/lavis/conversation/conversation.py:210: UserWarning: The given NumPy array is not writable, and PyTorch does not support non-writable tensors. This means writing to this tensor will result in undefined behavior. You may want to copy the array to protect its data or make it writable before converting it to a tensor. This type of warning will be suppressed for the rest of this program. (Triggered internally at /opt/conda/conda-bld/pytorch_1646755903507/work/torch/csrc/utils/tensor_numpy.cpp:178.) resized_frame = torch.from_numpy(frame).permute(2, 0, 1) ERROR:pyscenedetect:VideoManager is deprecated and will be removed. INFO:pyscenedetect:Loaded 1 video, framerate: 59.940 FPS, resolution: 1920 x 1080 INFO:pyscenedetect:Downscale factor set to 7, effective resolution: 274 x 154 INFO:pyscenedetect:Detecting scenes... ERROR:pyscenedetect:base_timecode argument is deprecated and has no effect. Scenes from /hy-tmp/dataset/ASIT/gBR_sFM_c01_d04_mBR0_ch01.mp4: New scene detection results 1 0 flexible_sampling Traceback (most recent call last): File "demo.py", line 83, in upload_video(video_path) File "demo.py", line 70, in upload_video llm_message = chat.upload_video_ms_standalone(gr_vid, chat_state, img_list, input_splits=input_splits) File "/hy-tmp/Shot2Story-temp/code/lavis/conversation/conversation.py", line 645, in upload_video_ms_standalone self.samples = get_split(video, self.vis_processor.transform, dataset, self.transnet_model, self.asr_model, sampling='headtail', input_splits=input_splits) File "/hy-tmp/Shot2Story-temp/code/lavis/conversation/conversation.py", line 467, in get_split frms = transform(frms) File "/usr/local/miniconda3/envs/shot2story/lib/python3.8/site-packages/torchvision/transforms/transforms.py", line 95, in call img = t(img) File "/hy-tmp/Shot2Story-temp/code/lavis/processors/transforms_video.py", line 129, in call return F.normalize(clip, self.mean, self.std, self.inplace) File "/hy-tmp/Shot2Story-temp/code/lavis/processors/functionalvideo.py", line 108, in normalize clip.sub(mean[:, None, None, None]).div_(std[:, None, None, None]) RuntimeError: The size of tensor a (224) must match the size of tensor b (3) at non-singleton dimension 0

what shoule i do? i need your help. And when i try your online demo, it isn't work. Thank you very much!

youthHan commented 2 days ago

Let me handle the online demo first.

UPD: the demo is fixed.

lujuncong2000 commented 2 days ago

Thank you a lot!

发自我的iPhone

------------------ Original ------------------ From: Mingfei Han @.> Date: Tue,Sep 24,2024 11:36 PM To: bytedance/Shot2Story @.> Cc: Briefness @.>, Author @.> Subject: Re: [bytedance/Shot2Story] run demo_video.py error (Issue #16)

Let me handle the online demo first.

— Reply to this email directly, view it on GitHub, or unsubscribe. You are receiving this because you authored the thread.Message ID: @.***>