Pocket-Sized Multimodal AI for content understanding and generation across multilingual texts, images, and 🔜 video, up to 5x faster than OpenAI CLIP and LLaVA 🖼️ & 🖋️
from typing import List
from PIL import Image
import torch
import numpy as np
from uform import get_model, Modality
import torch.nn as nn
def get_image_embedding(images: List[Image.Image]):
preprocessed = processor_image(images)
embedding = model_image_parallel.forward(preprocessed)
return embedding.detach().cpu().numpy()
File /opt/conda/envs/uform/lib/python3.10/site-packages/torch/nn/modules/module.py:1562, in Module._call_impl(self, *args, *kwargs)
1557 # If we don't have any hooks, we want to skip the rest of the logic in
1558 # this function, and just call forward.
1559 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1560 or _global_backward_pre_hooks or _global_backward_hooks
1561 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1562 return forward_call(args, **kwargs)
1564 try:
1565 result = None
File /opt/conda/envs/uform/lib/python3.10/site-packages/uform/torch_encoders.py:387, in ImageEncoder.forward(self, x, return_features)
384 if _is_on_gpu(self) and not x.is_cuda:
385 x = x.cuda()
--> 387 features = self.forward_features(x)
388 embeddings = self.forward_embedding(features)
389 return_features = return_features if return_features is not None else self.return_features
File /opt/conda/envs/uform/lib/python3.10/site-packages/uform/torch_encoders.py:360, in ImageEncoder.forward_features(self, x)
359 def forward_features(self, x: Union[Tensor, dict]) -> Tensor:
--> 360 x = self.patch_embed(x).flatten(start_dim=2).transpose(2, 1)
361 x = x + self.pos_embed
362 special_tokens = [self.cls_token.expand(x.shape[0], -1, -1)]
File /opt/conda/envs/uform/lib/python3.10/site-packages/torch/nn/modules/module.py:1562, in Module._call_impl(self, *args, *kwargs)
1557 # If we don't have any hooks, we want to skip the rest of the logic in
1558 # this function, and just call forward.
1559 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1560 or _global_backward_pre_hooks or _global_backward_hooks
1561 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1562 return forward_call(args, **kwargs)
1564 try:
1565 result = None
TypeError: conv2d() received an invalid combination of arguments - got (Image, Parameter, Parameter, tuple, tuple, tuple, int), but expected one of:
(Tensor input, Tensor weight, Tensor bias = None, tuple of ints stride = 1, tuple of ints padding = 0, tuple of ints dilation = 1, int groups = 1)
didn't match because some of the arguments have invalid types: (!Image!, !Parameter!, !Parameter!, !tuple of (int, int)!, !tuple of (int, int)!, !tuple of (int, int)!, !int!)
(Tensor input, Tensor weight, Tensor bias = None, tuple of ints stride = 1, str padding = "valid", tuple of ints dilation = 1, int groups = 1)
didn't match because some of the arguments have invalid types: (!Image!, !Parameter!, !Parameter!, !tuple of (int, int)!, !tuple of (int, int)!, !tuple of (int, int)!, !int!)
from typing import List from PIL import Image import torch import numpy as np from uform import get_model, Modality import torch.nn as nn def get_image_embedding(images: List[Image.Image]): preprocessed = processor_image(images) embedding = model_image_parallel.forward(preprocessed) return embedding.detach().cpu().numpy()
def get_text_embedding(texts: List[str]): preprocessed = processor_text(texts) embedding = model_text_parallel.forward(preprocessed) return embedding.detach().cpu().numpy()
from transformers.image_utils import load_image image1 = load_image("http://img.ltwebstatic.com/images2_pi/2019/03/29/15538517971773429449.jpg") image2 = load_image("http://img.ltwebstatic.com/images3_pi/2024/04/16/21/17132411401deebc2f549346ffcb2bd626c3afc9b7.jpg")
ERROR:
TypeError Traceback (most recent call last) Cell In[56], line 1 ----> 1 get_image_embedding(image2)
Cell In[50], line 8, in get_image_embedding(images) 7 def get_image_embedding(images: List[Image.Image]): ----> 8 preprocessed = processor_image(images) 9 embedding = model_image_parallel.forward(preprocessed) 10 return embedding.detach().cpu().numpy()
File /opt/conda/envs/uform/lib/python3.10/site-packages/torch/nn/modules/module.py:1553, in Module._wrapped_call_impl(self, *args, kwargs) 1551 return self._compiled_call_impl(*args, *kwargs) # type: ignore[misc] 1552 else: -> 1553 return self._call_impl(args, kwargs)
File /opt/conda/envs/uform/lib/python3.10/site-packages/torch/nn/modules/module.py:1562, in Module._call_impl(self, *args, *kwargs) 1557 # If we don't have any hooks, we want to skip the rest of the logic in 1558 # this function, and just call forward. 1559 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks 1560 or _global_backward_pre_hooks or _global_backward_hooks 1561 or _global_forward_hooks or _global_forward_pre_hooks): -> 1562 return forward_call(args, **kwargs) 1564 try: 1565 result = None
File /opt/conda/envs/uform/lib/python3.10/site-packages/uform/torch_encoders.py:387, in ImageEncoder.forward(self, x, return_features) 384 if _is_on_gpu(self) and not x.is_cuda: 385 x = x.cuda() --> 387 features = self.forward_features(x) 388 embeddings = self.forward_embedding(features) 389 return_features = return_features if return_features is not None else self.return_features
File /opt/conda/envs/uform/lib/python3.10/site-packages/uform/torch_encoders.py:360, in ImageEncoder.forward_features(self, x) 359 def forward_features(self, x: Union[Tensor, dict]) -> Tensor: --> 360 x = self.patch_embed(x).flatten(start_dim=2).transpose(2, 1) 361 x = x + self.pos_embed 362 special_tokens = [self.cls_token.expand(x.shape[0], -1, -1)]
File /opt/conda/envs/uform/lib/python3.10/site-packages/torch/nn/modules/module.py:1553, in Module._wrapped_call_impl(self, *args, kwargs) 1551 return self._compiled_call_impl(*args, *kwargs) # type: ignore[misc] 1552 else: -> 1553 return self._call_impl(args, kwargs)
File /opt/conda/envs/uform/lib/python3.10/site-packages/torch/nn/modules/module.py:1562, in Module._call_impl(self, *args, *kwargs) 1557 # If we don't have any hooks, we want to skip the rest of the logic in 1558 # this function, and just call forward. 1559 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks 1560 or _global_backward_pre_hooks or _global_backward_hooks 1561 or _global_forward_hooks or _global_forward_pre_hooks): -> 1562 return forward_call(args, **kwargs) 1564 try: 1565 result = None
File /opt/conda/envs/uform/lib/python3.10/site-packages/torch/nn/modules/conv.py:458, in Conv2d.forward(self, input) 457 def forward(self, input: Tensor) -> Tensor: --> 458 return self._conv_forward(input, self.weight, self.bias)
File /opt/conda/envs/uform/lib/python3.10/site-packages/torch/nn/modules/conv.py:454, in Conv2d._conv_forward(self, input, weight, bias) 450 if self.padding_mode != 'zeros': 451 return F.conv2d(F.pad(input, self._reversed_padding_repeated_twice, mode=self.padding_mode), 452 weight, bias, self.stride, 453 _pair(0), self.dilation, self.groups) --> 454 return F.conv2d(input, weight, bias, self.stride, 455 self.padding, self.dilation, self.groups)
TypeError: conv2d() received an invalid combination of arguments - got (Image, Parameter, Parameter, tuple, tuple, tuple, int), but expected one of: