But the result wil be wrong:
Traceback (most recent call last):
File "/home///./.local/lib/python3.10/site-packages/gradio/routes.py", line 488, in run_predict
output = await app.get_blocks().process_api(
File "/home///./.local/lib/python3.10/site-packages/gradio/blocks.py", line 1431, in process_api
result = await self.call_function(
File "/home///./.local/lib/python3.10/site-packages/gradio/blocks.py", line 1103, in call_function
prediction = await anyio.to_thread.run_sync(
File "/home///./fixfolder001/Anaconda/python310/lib/python3.10/site-packages/anyio/to_thread.py", line 28, in run_sync
return await get_asynclib().run_sync_in_worker_thread(func, *args, cancellable=cancellable,
File "/home///./fixfolder001/Anaconda/python310/lib/python3.10/site-packages/anyio/_backends/_asyncio.py", line 818, in run_sync_in_worker_thread
return await future
File "/home///./fixfolder001/Anaconda/python310/lib/python3.10/site-packages/anyio/_backends/_asyncio.py", line 754, in run
result = context.run(func, *args)
File "/home//*****/./.local/lib/python3.10/site-packages/gradio/utils.py", line 707, in wrapper
response = f(*args, kwargs)
File "/home///./dnn/test/MiniGPT4-video/minigpt4_video_demo.py", line 229, in gradio_demo_local
pred=run(video_path,instruction,model,vis_processor,gen_subtitles=has_sub)
File "/home///./dnn/test/MiniGPT4-video/minigpt4_video_demo.py", line 147, in run
answers = model.generate(prepared_images, prompt, max_new_tokens=args.max_new_tokens, do_sample=True, lengths=[length],num_beams=2)
File "/home//*****/./.local/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, kwargs)
File "/home///./dnn/test/MiniGPT4-video/minigpt4/models/mini_gpt4_llama_v2.py", line 553, in generate
outputs = self.llama_model.generate(
File "/home///./.local/lib/python3.10/site-packages/peft/peft_model.py", line 580, in generate
return self.base_model.generate(kwargs)
File "/home//***/./.local/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, kwargs)
File "/home///./.local/lib/python3.10/site-packages/transformers/generation/utils.py", line 1590, in generate
return self.beam_sample(
File "/home///./.local/lib/python3.10/site-packages/transformers/generation/utils.py", line 3271, in beam_sample
outputs = self(
File "/home//*****/./.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, kwargs)
File "/home//*****/./.local/lib/python3.10/site-packages/accelerate/hooks.py", line 165, in new_forward
output = module._old_forward(*args, kwargs)
File "/home///./dnn/test/MiniGPT4-video/minigpt4/models/modeling_llama_v2.py", line 65, in forward
outputs = self.model(
File "/home///./.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, kwargs)
File "/home//**/./.local/lib/python3.10/site-packages/transformers/models/llama/modeling_llama.py", line 1070, in forward
layer_outputs = decoder_layer(
File "/home///./.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, kwargs)
File "/home//***/./.local/lib/python3.10/site-packages/accelerate/hooks.py", line 165, in new_forward
output = module._old_forward(*args, kwargs)
File "/home///./.local/lib/python3.10/site-packages/transformers/models/llama/modeling_llama.py", line 795, in forward
hidden_states = self.input_layernorm(hidden_states)
File "/home///./.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, kwargs)
File "/home//***/./.local/lib/python3.10/site-packages/accelerate/hooks.py", line 165, in new_forward
output = module._old_forward(*args, kwargs)
File "/home/***/.local/lib/python3.10/site-packages/transformers/models/llama/modeling_llama.py", line 117, in forward
return self.weight * hidden_states.to(input_dtype)
RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cuda:1!
I change code in 'MiniGPT4-video/minigpt4/models/mini_gpt4_llama_v2.py' to let miniGpt4 can interference on multi Gpus
But the result wil be wrong: Traceback (most recent call last): File "/home///./.local/lib/python3.10/site-packages/gradio/routes.py", line 488, in run_predict output = await app.get_blocks().process_api( File "/home///./.local/lib/python3.10/site-packages/gradio/blocks.py", line 1431, in process_api result = await self.call_function( File "/home///./.local/lib/python3.10/site-packages/gradio/blocks.py", line 1103, in call_function prediction = await anyio.to_thread.run_sync( File "/home///./fixfolder001/Anaconda/python310/lib/python3.10/site-packages/anyio/to_thread.py", line 28, in run_sync return await get_asynclib().run_sync_in_worker_thread(func, *args, cancellable=cancellable, File "/home///./fixfolder001/Anaconda/python310/lib/python3.10/site-packages/anyio/_backends/_asyncio.py", line 818, in run_sync_in_worker_thread return await future File "/home///./fixfolder001/Anaconda/python310/lib/python3.10/site-packages/anyio/_backends/_asyncio.py", line 754, in run result = context.run(func, *args) File "/home//*****/./.local/lib/python3.10/site-packages/gradio/utils.py", line 707, in wrapper response = f(*args, kwargs) File "/home///./dnn/test/MiniGPT4-video/minigpt4_video_demo.py", line 229, in gradio_demo_local pred=run(video_path,instruction,model,vis_processor,gen_subtitles=has_sub) File "/home///./dnn/test/MiniGPT4-video/minigpt4_video_demo.py", line 147, in run answers = model.generate(prepared_images, prompt, max_new_tokens=args.max_new_tokens, do_sample=True, lengths=[length],num_beams=2) File "/home//*****/./.local/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context return func(*args, kwargs) File "/home///./dnn/test/MiniGPT4-video/minigpt4/models/mini_gpt4_llama_v2.py", line 553, in generate outputs = self.llama_model.generate( File "/home///./.local/lib/python3.10/site-packages/peft/peft_model.py", line 580, in generate return self.base_model.generate(kwargs) File "/home//***/./.local/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context return func(*args, kwargs) File "/home///./.local/lib/python3.10/site-packages/transformers/generation/utils.py", line 1590, in generate return self.beam_sample( File "/home///./.local/lib/python3.10/site-packages/transformers/generation/utils.py", line 3271, in beam_sample outputs = self( File "/home//*****/./.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl return forward_call(*args, kwargs) File "/home//*****/./.local/lib/python3.10/site-packages/accelerate/hooks.py", line 165, in new_forward output = module._old_forward(*args, kwargs) File "/home///./dnn/test/MiniGPT4-video/minigpt4/models/modeling_llama_v2.py", line 65, in forward outputs = self.model( File "/home///./.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl return forward_call(*args, kwargs) File "/home//**/./.local/lib/python3.10/site-packages/transformers/models/llama/modeling_llama.py", line 1070, in forward layer_outputs = decoder_layer( File "/home///./.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl return forward_call(*args, kwargs) File "/home//***/./.local/lib/python3.10/site-packages/accelerate/hooks.py", line 165, in new_forward output = module._old_forward(*args, kwargs) File "/home///./.local/lib/python3.10/site-packages/transformers/models/llama/modeling_llama.py", line 795, in forward hidden_states = self.input_layernorm(hidden_states) File "/home///./.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl return forward_call(*args, kwargs) File "/home//***/./.local/lib/python3.10/site-packages/accelerate/hooks.py", line 165, in new_forward output = module._old_forward(*args, kwargs) File "/home/***/.local/lib/python3.10/site-packages/transformers/models/llama/modeling_llama.py", line 117, in forward return self.weight * hidden_states.to(input_dtype) RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cuda:1!