model = AutoModelForCausalLM.from_pretrained(ckpt_path, revision='master',
torch_dtype=torch.float16, trust_remote_code=True,device_map="auto")
model = model.eval()
from modelscope import snapshot_download
if name == 'main':
text = '任务:你需要理解图片内容,理解图片中哪些文字是标题,并根据标题、相应的分类、其他重要信息对图片的内容以结构化的方式详细描述。 要求:1.忽略水印信息。2.只总结在图片上出现过的信息,不做发散。3.不要遗漏其他重要信息。'
response, history = model.chat(query=text, image='体检有结节会癌变吗?一文读懂三种常见结节_1_蚂蚁保-重疾险_来自小红书网页版.jpg', tokenizer= tokenizer,history=[])
result_json.append({'picture_path':file_local_path, 'predict_result':response})
with open('/content-insight/234678/code/internlm_test/picture_analyze_0522_qwen_result_1752.txt', 'w') as file:
json.dump(result_json, file, indent=4, ensure_ascii=False)`
The Error Detail:
Traceback (most recent call last):
File "/content-insight/234678/code/internlm_test/infer.py", line 33, in
response, history = model.chat(query=text, image=file_local_path, tokenizer= tokenizer,history=[])
File "/opt/conda/lib/python3.8/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, kwargs)
File "/root/.cache/huggingface/modules/transformers_modules/internlm-xcomposer2-vl-7b/modeling_internlm_xcomposer2.py", line 500, in chat
image = self.encode_img(image)
File "/root/.cache/huggingface/modules/transformers_modules/internlm-xcomposer2-vl-7b/modeling_internlm_xcomposer2.py", line 118, in encode_img
img_embeds, atts_img, img_target = self.img2emb(image)
File "/root/.cache/huggingface/modules/transformers_modules/internlm-xcomposer2-vl-7b/modeling_internlm_xcomposer2.py", line 122, in img2emb
img_embeds = self.vision_proj(self.vit(image.to(self.device)))
File "/opt/conda/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, *kwargs)
File "/opt/conda/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl
return forward_call(args, kwargs)
File "/opt/conda/lib/python3.8/site-packages/torch/nn/modules/container.py", line 215, in forward
input = module(input)
File "/opt/conda/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, *kwargs)
File "/opt/conda/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl
return forward_call(args, **kwargs)
File "/opt/conda/lib/python3.8/site-packages/torch/nn/modules/linear.py", line 114, in forward
return F.linear(input, self.weight, self.bias)
RuntimeError: mat1 and mat2 must have the same dtype, but got Float and Half
Base Model : InternLM-xcomposer2-vl-7b My Code : `import os
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
import torch import json from tqdm import tqdm from modelscope import AutoTokenizer, AutoModelForCausalLM
ckpt_path = "/content-insight/base_model/internlm-xcomposer2-vl-7b" tokenizer = AutoTokenizer.from_pretrained(ckpt_path, revision='master', trust_remote_code=True)
torch_dtype=torch.float16
可以令模型以 float16 精度加载,否则 transformers 会将模型加载为 float32,导致显存不足model = AutoModelForCausalLM.from_pretrained(ckpt_path, revision='master', torch_dtype=torch.float16, trust_remote_code=True,device_map="auto") model = model.eval() from modelscope import snapshot_download
self.vision_tower_name = snapshot_download("AI-ModelScope/clip-vit-large-patch14-336")
model.tokenizer = tokenizer
if name == 'main': text = '任务:你需要理解图片内容,理解图片中哪些文字是标题,并根据标题、相应的分类、其他重要信息对图片的内容以结构化的方式详细描述。 要求:1.忽略水印信息。2.只总结在图片上出现过的信息,不做发散。3.不要遗漏其他重要信息。' response, history = model.chat(query=text, image='体检有结节会癌变吗?一文读懂三种常见结节_1_蚂蚁保-重疾险_来自小红书网页版.jpg', tokenizer= tokenizer,history=[]) result_json.append({'picture_path':file_local_path, 'predict_result':response})
The Error Detail: Traceback (most recent call last): File "/content-insight/234678/code/internlm_test/infer.py", line 33, in
response, history = model.chat(query=text, image=file_local_path, tokenizer= tokenizer,history=[])
File "/opt/conda/lib/python3.8/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, kwargs)
File "/root/.cache/huggingface/modules/transformers_modules/internlm-xcomposer2-vl-7b/modeling_internlm_xcomposer2.py", line 500, in chat
image = self.encode_img(image)
File "/root/.cache/huggingface/modules/transformers_modules/internlm-xcomposer2-vl-7b/modeling_internlm_xcomposer2.py", line 118, in encode_img
img_embeds, atts_img, img_target = self.img2emb(image)
File "/root/.cache/huggingface/modules/transformers_modules/internlm-xcomposer2-vl-7b/modeling_internlm_xcomposer2.py", line 122, in img2emb
img_embeds = self.vision_proj(self.vit(image.to(self.device)))
File "/opt/conda/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, *kwargs)
File "/opt/conda/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl
return forward_call(args, kwargs)
File "/opt/conda/lib/python3.8/site-packages/torch/nn/modules/container.py", line 215, in forward
input = module(input)
File "/opt/conda/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, *kwargs)
File "/opt/conda/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl
return forward_call(args, **kwargs)
File "/opt/conda/lib/python3.8/site-packages/torch/nn/modules/linear.py", line 114, in forward
return F.linear(input, self.weight, self.bias)
RuntimeError: mat1 and mat2 must have the same dtype, but got Float and Half
我基于modelscope的代码进行了一点输入图片和文本的修改,然后遇到了这个问题,请问应该怎么解决