facebookresearch / dinov2

PyTorch code and models for the DINOv2 self-supervised learning method.
Apache License 2.0
8.79k stars 763 forks source link

I want to implement an instance retrieval with dinov2 #93

Open abdelkareemkobo opened 1 year ago

abdelkareemkobo commented 1 year ago

I read the issues and how to do the inference this is my code

import torch
import numpy as np 
import matplotlib.pyplot as plt 
from torch.utils.data import Dataset,DataLoader
from torchvision import transforms,utils
from torchvision.datasets import ImageFolder
import warnings 
warnings.filterwarnings("ignore")

from PIL import Image
import torchvision.transforms as T
import hubconf

dino = torch.hub.load('facebookresearch/dinov2', 'dinov2_vitb14')

test_img = Image.open('/kaggle/input/cat-and-dog/training_set/training_set/dogs/dog.1015.jpg')

image_transforms = T.Compose([
    T.Resize(256, interpolation=T.InterpolationMode.BICUBIC),
    T.CenterCrop(224),
    T.ToTensor(),
    T.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
])
t_img = image_transforms(test_img)
print(t_img.shape)
t_img = t_img.unsqueeze(0)
print(t_img.shape)
dino_emb = dino(t_img)#[1,3,224,224]

This is the error message i get Using cache found in /root/.cache/torch/hub/facebookresearch_dinov2_main torch.Size([3, 224, 224]) torch.Size([1, 3, 224, 224])

NotImplementedError Traceback (most recent call last) Cell In[33], line 31 29 t_img = t_img.unsqueeze(0) 30 print(t_img.shape) ---> 31 dino_emb = dino(t_img)#[1,3,224,224]

File /opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py:1501, in Module._call_impl(self, *args, *kwargs) 1496 # If we don't have any hooks, we want to skip the rest of the logic in 1497 # this function, and just call forward. 1498 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks 1499 or _global_backward_pre_hooks or _global_backward_hooks 1500 or _global_forward_hooks or _global_forward_pre_hooks): -> 1501 return forward_call(args, **kwargs) 1502 # Do not call functions when jit is used 1503 full_backward_hooks, non_full_backward_hooks = [], []

File ~/.cache/torch/hub/facebookresearch_dinov2_main/dinov2/models/vision_transformer.py:291, in DinoVisionTransformer.forward(self, is_training, *args, kwargs) 290 def forward(self, *args, is_training=False, *kwargs): --> 291 ret = self.forward_features(args, kwargs) 292 if is_training: 293 return ret

File ~/.cache/torch/hub/facebookresearch_dinov2_main/dinov2/models/vision_transformer.py:228, in DinoVisionTransformer.forward_features(self, x, masks) 225 x = self.prepare_tokens_with_masks(x, masks) 227 for blk in self.blocks: --> 228 x = blk(x) 230 x_norm = self.norm(x) 231 return { 232 "x_norm_clstoken": x_norm[:, 0], 233 "x_norm_patchtokens": x_norm[:, 1:], 234 "x_prenorm": x, 235 "masks": masks, 236 }

File /opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py:1501, in Module._call_impl(self, *args, *kwargs) 1496 # If we don't have any hooks, we want to skip the rest of the logic in 1497 # this function, and just call forward. 1498 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks 1499 or _global_backward_pre_hooks or _global_backward_hooks 1500 or _global_forward_hooks or _global_forward_pre_hooks): -> 1501 return forward_call(args, **kwargs) 1502 # Do not call functions when jit is used 1503 full_backward_hooks, non_full_backward_hooks = [], []

File ~/.cache/torch/hub/facebookresearch_dinov2_main/dinov2/layers/block.py:247, in NestedTensorBlock.forward(self, x_or_x_list) 245 def forward(self, x_or_x_list): 246 if isinstance(x_or_x_list, Tensor): --> 247 return super().forward(x_or_x_list) 248 elif isinstance(x_or_x_list, list): 249 assert XFORMERS_AVAILABLE, "Please install xFormers for nested tensors usage"

File ~/.cache/torch/hub/facebookresearch_dinov2_main/dinov2/layers/block.py:105, in Block.forward(self, x) 103 x = x + self.drop_path1(ffn_residual_func(x)) # FIXME: drop_path2 104 else: --> 105 x = x + attn_residual_func(x) 106 x = x + ffn_residual_func(x) 107 return x

File ~/.cache/torch/hub/facebookresearch_dinov2_main/dinov2/layers/block.py:84, in Block.forward..attn_residual_func(x) 83 def attn_residual_func(x: Tensor) -> Tensor: ---> 84 return self.ls1(self.attn(self.norm1(x)))

File /opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py:1501, in Module._call_impl(self, *args, *kwargs) 1496 # If we don't have any hooks, we want to skip the rest of the logic in 1497 # this function, and just call forward. 1498 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks 1499 or _global_backward_pre_hooks or _global_backward_hooks 1500 or _global_forward_hooks or _global_forward_pre_hooks): -> 1501 return forward_call(args, **kwargs) 1502 # Do not call functions when jit is used 1503 full_backward_hooks, non_full_backward_hooks = [], []

File ~/.cache/torch/hub/facebookresearch_dinov2_main/dinov2/layers/attention.py:76, in MemEffAttention.forward(self, x, attn_bias) 72 qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads) 74 q, k, v = unbind(qkv, 2) ---> 76 x = memory_efficient_attention(q, k, v, attn_bias=attn_bias) 77 x = x.reshape([B, N, C]) 79 x = self.proj(x)

File /opt/conda/lib/python3.10/site-packages/xformers/ops/fmha/init.py:196, in memory_efficient_attention(query, key, value, attn_bias, p, scale, op) 115 def memory_efficientattention( 116 query: torch.Tensor, 117 key: torch.Tensor, (...) 123 op: Optional[AttentionOp] = None, 124 ) -> torch.Tensor: 125 """Implements the memory-efficient attention mechanism following 126 "Self-Attention Does Not Need O(n^2) Memory" <[http://arxiv.org/abs/2112.05682>.](http://arxiv.org/abs/2112.05682%3E%60_.%3C/span%3E) 127 (...) 194 :return: multi-head attention Tensor with shape [B, Mq, H, Kv] 195 """ --> 196 return _memory_efficient_attention( 197 Inputs( 198 query=query, key=key, value=value, p=p, attn_bias=attn_bias, scale=scale 199 ), 200 op=op, 201 )

File /opt/conda/lib/python3.10/site-packages/xformers/ops/fmha/init.py:299, in _memory_efficient_attention(inp, op) 294 return _memory_efficient_attention_forward( 295 inp, op=op[0] if op is not None else None 296 ) 298 output_shape = inp.normalize_bmhk() --> 299 return _fMHA.apply( 300 op, inp.query, inp.key, inp.value, inp.attn_bias, inp.p, inp.scale 301 ).reshape(output_shape)

File /opt/conda/lib/python3.10/site-packages/torch/autograd/function.py:506, in Function.apply(cls, *args, *kwargs) 503 if not torch._C._are_functorch_transforms_active(): 504 # See NOTE: [functorch vjp and autograd interaction] 505 args = _functorch.utils.unwrap_dead_wrappers(args) --> 506 return super().apply(args, **kwargs) # type: ignore[misc] 508 if cls.setup_context == _SingleLevelFunction.setup_context: 509 raise RuntimeError( 510 'In order to use an autograd.Function with functorch transforms ' 511 '(vmap, grad, jvp, jacrev, ...), it must override the setup_context ' 512 'staticmethod. For more details, please see ' 513 'https://pytorch.org/docs/master/notes/extending.func.html style="color:rgb(175,0,0)">')

File /opt/conda/lib/python3.10/site-packages/xformers/ops/fmha/init.py:41, in _fMHA.forward(ctx, op, *args) 38 op_fw = op[0] if op is not None else None 39 op_bw = op[1] if op is not None else None ---> 41 out, op_ctx = _memory_efficient_attention_forward_requires_grad( 42 inp=inp, op=op_fw 43 ) 45 # Saving attn_bias is a bit complicated, as the 46 # torch part should go in save_for_backward 47 if isinstance(inp.attn_bias, torch.Tensor):

File /opt/conda/lib/python3.10/site-packages/xformers/ops/fmha/init.py:324, in _memory_efficient_attention_forward_requires_grad(inp, op) 322 output_shape = inp.normalize_bmhk() 323 if op is None: --> 324 op = _dispatch_fw(inp) 325 else: 326 _ensure_op_supports_or_raise(ValueError, "memory_efficient_attention", op, inp)

File /opt/conda/lib/python3.10/site-packages/xformers/ops/fmha/dispatch.py:98, in _dispatch_fw(inp) 96 priority_list_ops.remove(triton.FwOp) 97 priority_list_ops.insert(0, triton.FwOp) ---> 98 return _run_priority_list( 99 "memory_efficient_attention_forward", priority_list_ops, inp 100 )

File /opt/conda/lib/python3.10/site-packages/xformers/ops/fmha/dispatch.py:73, in _run_priority_list(name, priority_list, inp) 71 for op, not_supported in zip(priority_list, not_supported_reasons): 72 msg += "\n" + _format_not_supported_reasons(op, not_supported) ---> 73 raise NotImplementedError(msg)

NotImplementedError: No operator found for memory_efficient_attention_forward with inputs: query : shape=(1, 257, 12, 64) (torch.float32) key : shape=(1, 257, 12, 64) (torch.float32) value : shape=(1, 257, 12, 64) (torch.float32) attn_bias : <class 'NoneType'> p : 0.0 cutlassF is not supported because: device=cpu (supported: {'cuda'}) Operator wasn't built - see python -m xformers.info for more info flshattF is not supported because: device=cpu (supported: {'cuda'}) dtype=torch.float32 (supported: {torch.float16, torch.bfloat16}) Operator wasn't built - see python -m xformers.info for more info tritonflashattF is not supported because: device=cpu (supported: {'cuda'}) dtype=torch.float32 (supported: {torch.float16, torch.bfloat16}) smallkF is not supported because: max(query.shape[-1] != value.shape[-1]) > 32 Operator wasn't built - see python -m xformers.info for more info unsupported embed per head: 64

abdelkareemkobo commented 1 year ago

The problem with the Torch version and xforemrs . I solve it with building a new conda env

xcp2022beGood commented 1 year ago

The problem with the Torch version and xforemrs . I solve it with building a new conda env

Hi firend: I did the same job as you , dino_emb is a tensor of [1,1024], right? How can I use this tensor to see the result of my image after being processed by the model?

abdelkareemkobo commented 1 year ago

@xcp2022beGood see this kaggle_demo