Open lujuncong2000 opened 2 days ago
Let me handle the online demo first.
UPD: the demo is fixed.
Thank you a lot!
发自我的iPhone
------------------ Original ------------------ From: Mingfei Han @.> Date: Tue,Sep 24,2024 11:36 PM To: bytedance/Shot2Story @.> Cc: Briefness @.>, Author @.> Subject: Re: [bytedance/Shot2Story] run demo_video.py error (Issue #16)
Let me handle the online demo first.
— Reply to this email directly, view it on GitHub, or unsubscribe. You are receiving this because you authored the thread.Message ID: @.***>
Hello! I modify demo_video.py like: import argparse import os import random
import numpy as np import torch import torch.backends.cudnn as cudnn import gradio as gr
import lavis.tasks as tasks
from lavis.common.config import Config from lavis.common.dist_utils import get_rank from lavis.common.registry import registry from lavis.conversation.conversation import Chat, CONV_VISION_MS, CONV_VISION_MS_TEXT imports modules for registration
from lavis.datasets.builders import from lavis.models import from lavis.processors import from lavis.runners import from lavis.tasks import *
def parse_args(): parser = argparse.ArgumentParser(description="Demo") parser.add_argument("--cfg-path", default="lavis/projects/blip2/eval/demo.yaml", help="path to configuration file.") parser.add_argument("--gpu-id", type=int, default=0, help="specify the gpu to load the model.") parser.add_argument( "--options", nargs="+", help="override some settings in the used config, the key-value pair " "in xxx=yyy format will be merged into config file (deprecate), " "change to --cfg-options instead.", ) args = parser.parse_args() return args
def setup_seeds(config): seed = config.run_cfg.seed + get_rank()
random.seed(seed) np.random.seed(seed) torch.manual_seed(seed)
cudnn.benchmark = False cudnn.deterministic = True
print('Initializing Chat') args = parse_args() cfg = Config(args)
model_config = cfg.model_cfg model_config.device_8bit = args.gpu_id model_cls = registry.get_model_class(model_config.arch) print("model_cls", model_cls) model = model_cls.from_config(model_config).to('cuda:{}'.format(args.gpu_id))
TODO
task = tasks.setup_task(cfg) dataset = task.build_datasets(cfg)
print(cfg.dict) pre_cfg = cfg.config.preprocess
vis_processor_cfg = pre_cfg.vis_processor.eval print(vis_processor_cfg.dict) vis_processor = registry.get_processor_class(vis_processor_cfg.name).from_config(vis_processor_cfg)
vis_processors, txt_processors = load_preprocess(pre_cfg)
vis_processor = vis_processors['eval']
chat = Chat(model, vis_processor, task=task, dataset=dataset, device='cuda:{}'.format(args.gpu_id)) print('Initialization Finished')
def upload_vid(gr_vid, temperature=0.1, input_splits=""): chat_state = CONV_VISION_MS.copy() if input_splits == 'Automatic detection': input_splits = '' img_list = [] llm_message = chat.upload_video_ms_standalone(gr_vid, chat_state, img_list, input_splits=input_splits) chat.ask("Please describe this video in detail.", chat_state) summary = chat.answer(conv=chat_state, num_beams=1, temperature=temperature, max_new_tokens=650, max_length=2048)[0][0] print(gr_vid, summary) return summary
import os
video_path = "/media/cv/09C1B27DA5EB573A/ASIT" video_child_path = os.listdir(video_path)
for v_id in video_child_path: upload_vid(video_child_path)
But the error is: Initialization Finished /hy-tmp/Shot2Story-temp/code/lavis/conversation/conversation.py:210: UserWarning: The given NumPy array is not writable, and PyTorch does not support non-writable tensors. This means writing to this tensor will result in undefined behavior. You may want to copy the array to protect its data or make it writable before converting it to a tensor. This type of warning will be suppressed for the rest of this program. (Triggered internally at /opt/conda/conda-bld/pytorch_1646755903507/work/torch/csrc/utils/tensor_numpy.cpp:178.) resized_frame = torch.from_numpy(frame).permute(2, 0, 1) ERROR:pyscenedetect:VideoManager is deprecated and will be removed. INFO:pyscenedetect:Loaded 1 video, framerate: 59.940 FPS, resolution: 1920 x 1080 INFO:pyscenedetect:Downscale factor set to 7, effective resolution: 274 x 154 INFO:pyscenedetect:Detecting scenes... ERROR:pyscenedetect:base_timecode argument is deprecated and has no effect. Scenes from /hy-tmp/dataset/ASIT/gBR_sFM_c01_d04_mBR0_ch01.mp4: New scene detection results 1 0 flexible_sampling Traceback (most recent call last): File "demo.py", line 83, in upload_video(video_path) File "demo.py", line 70, in upload_video llm_message = chat.upload_video_ms_standalone(gr_vid, chat_state, img_list, input_splits=input_splits) File "/hy-tmp/Shot2Story-temp/code/lavis/conversation/conversation.py", line 645, in upload_video_ms_standalone self.samples = get_split(video, self.vis_processor.transform, dataset, self.transnet_model, self.asr_model, sampling='headtail', input_splits=input_splits) File "/hy-tmp/Shot2Story-temp/code/lavis/conversation/conversation.py", line 467, in get_split frms = transform(frms) File "/usr/local/miniconda3/envs/shot2story/lib/python3.8/site-packages/torchvision/transforms/transforms.py", line 95, in call img = t(img) File "/hy-tmp/Shot2Story-temp/code/lavis/processors/transforms_video.py", line 129, in call return F.normalize(clip, self.mean, self.std, self.inplace) File "/hy-tmp/Shot2Story-temp/code/lavis/processors/functionalvideo.py", line 108, in normalize clip.sub(mean[:, None, None, None]).div_(std[:, None, None, None]) RuntimeError: The size of tensor a (224) must match the size of tensor b (3) at non-singleton dimension 0
what shoule i do? i need your help. And when i try your online demo, it isn't work. Thank you very much!