how to finetune with more sample frames ?
the comment for video llava finetune said:
# We sample 8 frames for tuning following the original paper
# But we can increase the number of frames for longer videos and check out if it helps performance
# Change the below "8" to any number of frames you want, and note that more frames -> more computational resources needed
indices = np.linspace(start_id, end_id, 8).astype(int)
However after i set to 30 and finetune it show:
Traceback (most recent call last):
File "videollava_finetune_original_100.py", line 505, in
trainer.fit(model_module)
File "/usr/local/lib/python3.8/dist-packages/lightning/pytorch/trainer/trainer.py", line 544, in fit
call._call_and_handle_interrupt(
File "/usr/local/lib/python3.8/dist-packages/lightning/pytorch/trainer/call.py", line 44, in _call_and_handle_interrupt
return trainer_fn(*args, kwargs)
File "/usr/local/lib/python3.8/dist-packages/lightning/pytorch/trainer/trainer.py", line 580, in _fit_impl
self._run(model, ckpt_path=ckpt_path)
File "/usr/local/lib/python3.8/dist-packages/lightning/pytorch/trainer/trainer.py", line 987, in _run
results = self._run_stage()
File "/usr/local/lib/python3.8/dist-packages/lightning/pytorch/trainer/trainer.py", line 1031, in _run_stage
self._run_sanity_check()
File "/usr/local/lib/python3.8/dist-packages/lightning/pytorch/trainer/trainer.py", line 1060, in _run_sanity_check
val_loop.run()
File "/usr/local/lib/python3.8/dist-packages/lightning/pytorch/loops/utilities.py", line 182, in _decorator
return loop_run(self, *args, kwargs)
File "/usr/local/lib/python3.8/dist-packages/lightning/pytorch/loops/evaluation_loop.py", line 135, in run
self._evaluation_step(batch, batch_idx, dataloader_idx, dataloader_iter)
File "/usr/local/lib/python3.8/dist-packages/lightning/pytorch/loops/evaluation_loop.py", line 396, in _evaluation_step
output = call._call_strategy_hook(trainer, hook_name, step_args)
File "/usr/local/lib/python3.8/dist-packages/lightning/pytorch/trainer/call.py", line 309, in _call_strategy_hook
output = fn(args, kwargs)
File "/usr/local/lib/python3.8/dist-packages/lightning/pytorch/strategies/strategy.py", line 412, in validation_step
return self.lightning_module.validation_step(*args, kwargs)
File "videollava_finetune_original_100.py", line 435, in validation_step
generated_ids = self.model.generate(
File "/usr/local/lib/python3.8/dist-packages/peft/peft_model.py", line 647, in generate
return self.get_base_model().generate(*args, kwargs)
File "/usr/local/lib/python3.8/dist-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, *kwargs)
File "/usr/local/lib/python3.8/dist-packages/transformers/generation/utils.py", line 1758, in generate
result = self._sample(
File "/usr/local/lib/python3.8/dist-packages/transformers/generation/utils.py", line 2397, in _sample
outputs = self(
File "/usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
return self._call_impl(args, kwargs)
File "/usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py", line 1527, in _call_impl
return forward_call(*args, *kwargs)
File "/usr/local/lib/python3.8/dist-packages/accelerate/hooks.py", line 166, in new_forward
output = module._old_forward(args, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/transformers/models/video_llava/modeling_video_llava.py", line 513, in forward
image_outputs, video_outputs = self._get_vision_features(
File "/usr/local/lib/python3.8/dist-packages/transformers/models/video_llava/modeling_video_llava.py", line 377, in _get_vision_features
raise ValueError(f"Video pixel values should have exactly 8 frames but foung {num_frames}")
ValueError: Video pixel values should have exactly 8 frames but foung 30
Does it mean that if i really want to change 8 to 30
I need to fully train model again? if so.....i suggest the comment should be deleted which is confusing.....
Also another question is that
if i set about more then 50 frame, it'll cause error :
OverflowError: There was an overflow with type <class 'list'>. Try to reduce writer_batch_size to have batches smaller than 2GB.
(offset overflow while concatenating arrays)
how to finetune with more sample frames ? the comment for video llava finetune said:
indices = np.linspace(start_id, end_id, 8).astype(int)
However after i set to 30 and finetune it show:
Traceback (most recent call last): File "videollava_finetune_original_100.py", line 505, in
trainer.fit(model_module)
File "/usr/local/lib/python3.8/dist-packages/lightning/pytorch/trainer/trainer.py", line 544, in fit
call._call_and_handle_interrupt(
File "/usr/local/lib/python3.8/dist-packages/lightning/pytorch/trainer/call.py", line 44, in _call_and_handle_interrupt
return trainer_fn(*args, kwargs)
File "/usr/local/lib/python3.8/dist-packages/lightning/pytorch/trainer/trainer.py", line 580, in _fit_impl
self._run(model, ckpt_path=ckpt_path)
File "/usr/local/lib/python3.8/dist-packages/lightning/pytorch/trainer/trainer.py", line 987, in _run
results = self._run_stage()
File "/usr/local/lib/python3.8/dist-packages/lightning/pytorch/trainer/trainer.py", line 1031, in _run_stage
self._run_sanity_check()
File "/usr/local/lib/python3.8/dist-packages/lightning/pytorch/trainer/trainer.py", line 1060, in _run_sanity_check
val_loop.run()
File "/usr/local/lib/python3.8/dist-packages/lightning/pytorch/loops/utilities.py", line 182, in _decorator
return loop_run(self, *args, kwargs)
File "/usr/local/lib/python3.8/dist-packages/lightning/pytorch/loops/evaluation_loop.py", line 135, in run
self._evaluation_step(batch, batch_idx, dataloader_idx, dataloader_iter)
File "/usr/local/lib/python3.8/dist-packages/lightning/pytorch/loops/evaluation_loop.py", line 396, in _evaluation_step
output = call._call_strategy_hook(trainer, hook_name, step_args)
File "/usr/local/lib/python3.8/dist-packages/lightning/pytorch/trainer/call.py", line 309, in _call_strategy_hook
output = fn(args, kwargs)
File "/usr/local/lib/python3.8/dist-packages/lightning/pytorch/strategies/strategy.py", line 412, in validation_step
return self.lightning_module.validation_step(*args, kwargs)
File "videollava_finetune_original_100.py", line 435, in validation_step
generated_ids = self.model.generate(
File "/usr/local/lib/python3.8/dist-packages/peft/peft_model.py", line 647, in generate
return self.get_base_model().generate(*args, kwargs)
File "/usr/local/lib/python3.8/dist-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, *kwargs)
File "/usr/local/lib/python3.8/dist-packages/transformers/generation/utils.py", line 1758, in generate
result = self._sample(
File "/usr/local/lib/python3.8/dist-packages/transformers/generation/utils.py", line 2397, in _sample
outputs = self(
File "/usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
return self._call_impl(args, kwargs)
File "/usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py", line 1527, in _call_impl
return forward_call(*args, *kwargs)
File "/usr/local/lib/python3.8/dist-packages/accelerate/hooks.py", line 166, in new_forward
output = module._old_forward(args, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/transformers/models/video_llava/modeling_video_llava.py", line 513, in forward
image_outputs, video_outputs = self._get_vision_features(
File "/usr/local/lib/python3.8/dist-packages/transformers/models/video_llava/modeling_video_llava.py", line 377, in _get_vision_features
raise ValueError(f"Video pixel values should have exactly
8
frames but foung{num_frames}
") ValueError: Video pixel values should have exactly8
frames but foung30
Does it mean that if i really want to change 8 to 30 I need to fully train model again? if so.....i suggest the comment should be deleted which is confusing.....
Also another question is that
if i set about more then 50 frame, it'll cause error :
OverflowError: There was an overflow with type <class 'list'>. Try to reduce writer_batch_size to have batches smaller than 2GB. (offset overflow while concatenating arrays)
How can i solve it if i really want to use?
thanks!!!