Open qwebug opened 4 days ago
Very interesting. What driver version do you have?
npu_win_32.0.100.2540
I can replicate the error, I'll take a look
I also found another error, when testing MiniCPM-Llama3-V-2_5 on this library.
# test.py
import torch
from PIL import Image
from transformers import AutoModel, AutoTokenizer
import intel_npu_acceleration_library
from intel_npu_acceleration_library import NPUModelForCausalLM, int4
model_id = 'openbmb/MiniCPM-Llama3-V-2_5'
model = AutoModel.from_pretrained(model_id, trust_remote_code=True, torch_dtype=torch.float16)
model = intel_npu_acceleration_library.compile(model, dtype=int4)
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
image = Image.open('australia.jpg').convert('RGB')
question = 'What is in the image?'
msgs = [{'role': 'user', 'content': question}]
res = model.chat(
image=image,
msgs=msgs,
tokenizer=tokenizer,
sampling=True, # if sampling=False, beam_search will be used by default
temperature=0.7,
# system_prompt='' # pass system_prompt if needed
)
print(res)
## if you want to use streaming, please make sure sampling=True and stream=True
## the model.chat will return a generator
res = model.chat(
image=image,
msgs=msgs,
tokenizer=tokenizer,
sampling=True,
temperature=0.7,
stream=True
)
generated_text = ""
for new_text in res:
generated_text += new_text
print(new_text, flush=True, end='')
>python miniCPM.py
Loading checkpoint shards: 100%|██████████████████████████████████████████████████████████████████████████████| 7/7 [00:14<00:00, 2.09s/it]
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Traceback (most recent call last):
File "D:\Desktop\workspace\NPU\intel-npu\intel-npu-acceleration-library\examples\miniCPM.py", line 19, in <module>
res = model.chat(
File "C:\Users\xxx\.cache\huggingface\modules\transformers_modules\openbmb\MiniCPM-Llama3-V-2_5\45387f99a455e11801b78a0b24811856688e0c8b\modeling_minicpmv.py", line 454, in chat
res, vision_hidden_states = self.generate(
File "C:\Users\xxx\.cache\huggingface\modules\transformers_modules\openbmb\MiniCPM-Llama3-V-2_5\45387f99a455e11801b78a0b24811856688e0c8b\modeling_minicpmv.py", line 354, in generate
) = self.get_vllm_embedding(model_inputs)
File "C:\Users\xxx\.cache\huggingface\modules\transformers_modules\openbmb\MiniCPM-Llama3-V-2_5\45387f99a455e11801b78a0b24811856688e0c8b\modeling_minicpmv.py", line 99, in get_vllm_embedding
vision_embedding = self.vpm(all_pixel_values.type(dtype), patch_attention_mask=patch_attn_mask).last_hidden_state
File "D:\Anaconda\envs\intel-npu-pure\lib\site-packages\torch\nn\modules\module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "D:\Anaconda\envs\intel-npu-pure\lib\site-packages\torch\nn\modules\module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "D:\Anaconda\envs\intel-npu-pure\lib\site-packages\transformers\models\idefics2\modeling_idefics2.py", line 715, in forward
hidden_states = self.embeddings(pixel_values=pixel_values, patch_attention_mask=patch_attention_mask)
File "D:\Anaconda\envs\intel-npu-pure\lib\site-packages\torch\nn\modules\module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "D:\Anaconda\envs\intel-npu-pure\lib\site-packages\torch\nn\modules\module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "D:\Anaconda\envs\intel-npu-pure\lib\site-packages\transformers\models\idefics2\modeling_idefics2.py", line 167, in forward
patch_embeds = self.patch_embedding(pixel_values)
File "D:\Anaconda\envs\intel-npu-pure\lib\site-packages\torch\nn\modules\module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "D:\Anaconda\envs\intel-npu-pure\lib\site-packages\torch\nn\modules\module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "D:\Anaconda\envs\intel-npu-pure\lib\site-packages\intel_npu_acceleration_library\nn\conv.py", line 112, in forward
inp_unf = torch.nn.functional.unfold(
File "D:\Anaconda\envs\intel-npu-pure\lib\site-packages\torch\nn\functional.py", line 4814, in unfold
return handle_torch_function(
File "D:\Anaconda\envs\intel-npu-pure\lib\site-packages\torch\overrides.py", line 1619, in handle_torch_function
result = mode.__torch_function__(public_api, types, args, kwargs)
File "D:\Anaconda\envs\intel-npu-pure\lib\site-packages\intel_npu_acceleration_library\device.py", line 66, in __torch_function__
return super_fn(*args, **kwargs or {})
File "D:\Anaconda\envs\intel-npu-pure\lib\site-packages\intel_npu_acceleration_library\device.py", line 60, in super_fn
return func(*args, **kwargs)
File "D:\Anaconda\envs\intel-npu-pure\lib\site-packages\torch\nn\functional.py", line 4817, in unfold
return torch._C._nn.im2col(input, _pair(kernel_size), _pair(dilation), _pair(padding), _pair(stride))
TypeError: im2col(): argument 'padding' (position 4) must be tuple of ints, but found element of type str at pos 0
When I tested Qwen2-7B on this library, it reported some errors.