unum-cloud / uform

Pocket-Sized Multimodal AI for content understanding and generation across multilingual texts, images, and 🔜 video, up to 5x faster than OpenAI CLIP and LLaVA 🖼️ & 🖋️
https://unum-cloud.github.io/uform/
Apache License 2.0
1.05k stars 62 forks source link

Multi-GPU Parallelism:ERROR:get_image_embedding #93

Open bupianlizhugui opened 1 month ago

bupianlizhugui commented 1 month ago

from typing import List from PIL import Image import torch import numpy as np from uform import get_model, Modality import torch.nn as nn def get_image_embedding(images: List[Image.Image]): preprocessed = processor_image(images) embedding = model_image_parallel.forward(preprocessed) return embedding.detach().cpu().numpy()

def get_text_embedding(texts: List[str]): preprocessed = processor_text(texts) embedding = model_text_parallel.forward(preprocessed) return embedding.detach().cpu().numpy()

from transformers.image_utils import load_image image1 = load_image("http://img.ltwebstatic.com/images2_pi/2019/03/29/15538517971773429449.jpg") image2 = load_image("http://img.ltwebstatic.com/images3_pi/2024/04/16/21/17132411401deebc2f549346ffcb2bd626c3afc9b7.jpg")

ERROR:

TypeError Traceback (most recent call last) Cell In[56], line 1 ----> 1 get_image_embedding(image2)

Cell In[50], line 8, in get_image_embedding(images) 7 def get_image_embedding(images: List[Image.Image]): ----> 8 preprocessed = processor_image(images) 9 embedding = model_image_parallel.forward(preprocessed) 10 return embedding.detach().cpu().numpy()

File /opt/conda/envs/uform/lib/python3.10/site-packages/torch/nn/modules/module.py:1553, in Module._wrapped_call_impl(self, *args, kwargs) 1551 return self._compiled_call_impl(*args, *kwargs) # type: ignore[misc] 1552 else: -> 1553 return self._call_impl(args, kwargs)

File /opt/conda/envs/uform/lib/python3.10/site-packages/torch/nn/modules/module.py:1562, in Module._call_impl(self, *args, *kwargs) 1557 # If we don't have any hooks, we want to skip the rest of the logic in 1558 # this function, and just call forward. 1559 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks 1560 or _global_backward_pre_hooks or _global_backward_hooks 1561 or _global_forward_hooks or _global_forward_pre_hooks): -> 1562 return forward_call(args, **kwargs) 1564 try: 1565 result = None

File /opt/conda/envs/uform/lib/python3.10/site-packages/uform/torch_encoders.py:387, in ImageEncoder.forward(self, x, return_features) 384 if _is_on_gpu(self) and not x.is_cuda: 385 x = x.cuda() --> 387 features = self.forward_features(x) 388 embeddings = self.forward_embedding(features) 389 return_features = return_features if return_features is not None else self.return_features

File /opt/conda/envs/uform/lib/python3.10/site-packages/uform/torch_encoders.py:360, in ImageEncoder.forward_features(self, x) 359 def forward_features(self, x: Union[Tensor, dict]) -> Tensor: --> 360 x = self.patch_embed(x).flatten(start_dim=2).transpose(2, 1) 361 x = x + self.pos_embed 362 special_tokens = [self.cls_token.expand(x.shape[0], -1, -1)]

File /opt/conda/envs/uform/lib/python3.10/site-packages/torch/nn/modules/module.py:1553, in Module._wrapped_call_impl(self, *args, kwargs) 1551 return self._compiled_call_impl(*args, *kwargs) # type: ignore[misc] 1552 else: -> 1553 return self._call_impl(args, kwargs)

File /opt/conda/envs/uform/lib/python3.10/site-packages/torch/nn/modules/module.py:1562, in Module._call_impl(self, *args, *kwargs) 1557 # If we don't have any hooks, we want to skip the rest of the logic in 1558 # this function, and just call forward. 1559 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks 1560 or _global_backward_pre_hooks or _global_backward_hooks 1561 or _global_forward_hooks or _global_forward_pre_hooks): -> 1562 return forward_call(args, **kwargs) 1564 try: 1565 result = None

File /opt/conda/envs/uform/lib/python3.10/site-packages/torch/nn/modules/conv.py:458, in Conv2d.forward(self, input) 457 def forward(self, input: Tensor) -> Tensor: --> 458 return self._conv_forward(input, self.weight, self.bias)

File /opt/conda/envs/uform/lib/python3.10/site-packages/torch/nn/modules/conv.py:454, in Conv2d._conv_forward(self, input, weight, bias) 450 if self.padding_mode != 'zeros': 451 return F.conv2d(F.pad(input, self._reversed_padding_repeated_twice, mode=self.padding_mode), 452 weight, bias, self.stride, 453 _pair(0), self.dilation, self.groups) --> 454 return F.conv2d(input, weight, bias, self.stride, 455 self.padding, self.dilation, self.groups)

TypeError: conv2d() received an invalid combination of arguments - got (Image, Parameter, Parameter, tuple, tuple, tuple, int), but expected one of: