[{'role': 'user', 'content': [{'type': 'image', 'image': '/vlmeval/images/DocVQA_TEST/57348.jpg', 'min_pixels': 1003520, 'max_pixels': 12845056}, {'type': 'text', 'text': "What is the % of 'Providers of Capital' in the year 2010 based on 'Distribution of Value-Added' graph?\nPlease try to answer the question with short words or phrases if possible."}]}]
0%| | 0/649 [00:04<?, ?it/s]
Traceback (most recent call last):
File "/VLMEvalKit/run.py", line 226, in <module>
main()
File "/VLMEvalKit/run.py", line 140, in main
model = infer_data_job(
File "/VLMEvalKit/vlmeval/inference.py", line 164, in infer_data_job
model = infer_data(
File "/VLMEvalKit/vlmeval/inference.py", line 129, in infer_data
response = model.generate(message=struct, dataset=dataset_name)
File "/VLMEvalKit/vlmeval/vlm/base.py", line 115, in generate
return self.generate_inner(message, dataset)
File "/VLMEvalKit/vlmeval/vlm/qwen2_vl/model.py", line 100, in generate_inner
generated_ids = self.model.generate(
File "/opt/conda/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, **kwargs)
File "/opt/conda/lib/python3.10/site-packages/transformers/generation/utils.py", line 2053, in generate
result = self._sample(
File "/opt/conda/lib/python3.10/site-packages/transformers/generation/utils.py", line 3003, in _sample
outputs = self(**model_inputs, return_dict=True)
File "/opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl
return forward_call(*args, **kwargs)
File "/opt/conda/lib/python3.10/site-packages/accelerate/hooks.py", line 165, in new_forward
output = module._old_forward(*args, **kwargs)
File "/opt/conda/lib/python3.10/site-packages/transformers/models/qwen2_vl/modeling_qwen2_vl.py", line 1686, in forward
inputs_embeds = inputs_embeds.masked_scatter(image_mask, image_embeds)
RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:3 and cuda:0! (when checking argument for argument mask in method wrapper_CUDA__masked_scatter_)
当我运行
出现以下错误
已有的issue似乎解决不了我的需求 我的模型够小,因此希望八卡八模型,每个卡推理一部分的文件 这在readme里似乎是 torchrun --nproc-per-node=8 run.py 来实现的
参考了现有 issue#244 但qwen2vl的model.py里 模型load没有问题
与issue 224里类似