When I tried to execute bash stage_2_full_v7b_224_fps_1.sh, the training progressed to about 20%, and an error occurred:
Traceback (most recent call last):
File "/home/kas/kas_workspace/shuozhang/LLaMA-VID/llamavid/train/train_mem.py", line 13, in
train()
File "/home/kas/kas_workspace/shuozhang/LLaMA-VID/llamavid/train/train.py", line 1194, in train
trainer.train()
File "/home/kas/.conda/envs/llamavid/lib/python3.10/site-packages/transformers/trainer.py", line 1539, in train
return inner_training_loop(
File "/home/kas/.conda/envs/llamavid/lib/python3.10/site-packages/transformers/trainer.py", line 1787, in _inner_training_loop
for step, inputs in enumerate(epoch_iterator):
File "/home/kas/.conda/envs/llamavid/lib/python3.10/site-packages/accelerate/data_loader.py", line 394, in iter
raise exception
KeyError: Caught KeyError in DataLoader worker process 3.
Original Traceback (most recent call last):
File "/home/kas/.conda/envs/llamavid/lib/python3.10/site-packages/torch/utils/data/_utils/worker.py", line 308, in _worker_loop
data = fetcher.fetch(index)
File "/home/kas/.conda/envs/llamavid/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py", line 51, in fetch
data = [self.dataset[idx] for idx in possibly_batched_index]
File "/home/kas/.conda/envs/llamavid/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py", line 51, in
data = [self.dataset[idx] for idx in possibly_batched_index]
File "/home/kas/kas_workspace/shuozhang/LLaMA-VID/llamavid/train/train.py", line 925, in getitem
data_dict = preprocess(
File "/home/kas/kas_workspace/shuozhang/LLaMA-VID/llamavid/train/train.py", line 774, in preprocess
return preprocess_imgsp_v1(sources, tokenizer, has_image=has_image, refine_prompt=refine_prompt)
File "/home/kas/kas_workspace/shuozhang/LLaMA-VID/llamavid/train/train.py", line 544, in preprocess_imgsp_v1
if roles[source[0]["from"]] != conv.roles[0]:
KeyError: 0
Thank you for your outstanding contribution.
When I tried to execute bash stage_2_full_v7b_224_fps_1.sh, the training progressed to about 20%, and an error occurred:
Traceback (most recent call last): File "/home/kas/kas_workspace/shuozhang/LLaMA-VID/llamavid/train/train_mem.py", line 13, in
train()
File "/home/kas/kas_workspace/shuozhang/LLaMA-VID/llamavid/train/train.py", line 1194, in train
trainer.train()
File "/home/kas/.conda/envs/llamavid/lib/python3.10/site-packages/transformers/trainer.py", line 1539, in train
return inner_training_loop(
File "/home/kas/.conda/envs/llamavid/lib/python3.10/site-packages/transformers/trainer.py", line 1787, in _inner_training_loop
for step, inputs in enumerate(epoch_iterator):
File "/home/kas/.conda/envs/llamavid/lib/python3.10/site-packages/accelerate/data_loader.py", line 394, in iter
raise exception
KeyError: Caught KeyError in DataLoader worker process 3.
Original Traceback (most recent call last):
File "/home/kas/.conda/envs/llamavid/lib/python3.10/site-packages/torch/utils/data/_utils/worker.py", line 308, in _worker_loop
data = fetcher.fetch(index)
File "/home/kas/.conda/envs/llamavid/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py", line 51, in fetch
data = [self.dataset[idx] for idx in possibly_batched_index]
File "/home/kas/.conda/envs/llamavid/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py", line 51, in
data = [self.dataset[idx] for idx in possibly_batched_index]
File "/home/kas/kas_workspace/shuozhang/LLaMA-VID/llamavid/train/train.py", line 925, in getitem
data_dict = preprocess(
File "/home/kas/kas_workspace/shuozhang/LLaMA-VID/llamavid/train/train.py", line 774, in preprocess
return preprocess_imgsp_v1(sources, tokenizer, has_image=has_image, refine_prompt=refine_prompt)
File "/home/kas/kas_workspace/shuozhang/LLaMA-VID/llamavid/train/train.py", line 544, in preprocess_imgsp_v1
if roles[source[0]["from"]] != conv.roles[0]:
KeyError: 0
What could be the problem?