Closed varungupta31 closed 2 months ago
A recent commit (by @ZhangYuanhan-AI) on the Video Demo Code has added multiple syntactical bugs.
The commit https://github.com/LLaVA-VL/LLaVA-NeXT/commit/066ea451b263f408a00d5fbfa0daa1ef71c33796
def parse_args(): """ Parse command-line arguments. """ parser = argparse.ArgumentParser() # Define the command-line arguments parser.add_argument("--video_path", help="Path to the video files.", required=True) parser.add_argument("--output_dir", help="Directory to save the model results JSON.", required=True) parser.add_argument("--output_name", help="Name of the file for storing results JSON.", required=True) parser.add_argument("--model-path", type=str, default="facebook/opt-350m") parser.add_argument("--model-base", type=str, default=None) parser.add_argument("--conv-mode", type=str, default=None) parser.add_argument("--chunk-idx", type=int, default=0) parser.add_argument("--mm_resampler_type", type=str, default="spatial_pool") parser.add_argument("--mm_spatial_pool_stride", type=int, default=4) parser.add_argument("--mm_spatial_pool_out_channels", type=int, default=1024) parser.add_argument("--mm_spatial_pool_mode", type=str, default="average") parser.add_argument("--image_aspect_ratio", type=str, default="anyres") parser.add_argument("--image_grid_pinpoints", type=str, default="[(224, 448), (224, 672), (224, 896), (448, 448), (448, 224), (672, 224), (896, 224)]") parser.add_argument("--mm_patch_merge_type", type=str, default="spatial_unpad") parser.add_argument("--overwrite", type=lambda x: (str(x).lower() == 'true'), default=True) parser.add_argument("--for_get_frames_num", type=int, default=4) parser.add_argument("--load_8bit", type=lambda x: (str(x).lower() == 'true'), default=False) parser.add_argument("--prompt", type=str, default=None) parser.add_argument("--api_key", type=str, help="OpenAI API key") parser.add_argument("--mm_newline_position", type=str, default="no_token") parser.add_argument("--force_sample", type=lambda x: (str(x).lower() == 'true'), default=False) parser.add_argument("--add_time_instruction", type=str, default=False) return parser.parse_args() def load_video(video_path,args): if max_frames_num == 0: return np.zeros((1, 336, 336, 3)) vr = VideoReader(video_path, ctx=cpu(0),num_threads=1) total_frame_num = len(vr) video_time = total_frame_num / vr.get_avg_fps() fps = round(vr.get_avg_fps()/fps) frame_idx = [i for i in range(0, len(vr), fps)] frame_time = [i/fps for i in frame_idx] if len(frame_idx) > args.for_get_frames_num or args.force_sample: sample_fps = max_frames_num uniform_sampled_frames = np.linspace(0, total_frame_num - 1, sample_fps, dtype=int) frame_idx = uniform_sampled_frames.tolist() frame_time = [i/vr.get_avg_fps() for i in frame_idx] frame_time = ",".join([f"{i:.2f}s" for i in frame_time]) spare_frames = vr.get_batch(frame_idx).asnumpy() # import pdb;pdb.set_trace() return spare_frames,frame_time,video_time
the def load_video has inconsistent tab spaces.
def load_video
Further, to name a few:
max_frames_num
fps
https://github.com/LLaVA-VL/LLaVA-NeXT/pull/205
Waiting for this merge.
I see, thank you @ZhangYuanhan-AI :)
A recent commit (by @ZhangYuanhan-AI) on the Video Demo Code has added multiple syntactical bugs.
The commit https://github.com/LLaVA-VL/LLaVA-NeXT/commit/066ea451b263f408a00d5fbfa0daa1ef71c33796
the
def load_video
has inconsistent tab spaces.Further, to name a few:
max_frames_num
is not defined anywhere.fps
is referenced before the assignment