jeonsworld / ViT-pytorch

Pytorch reimplementation of the Vision Transformer (An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale)
MIT License
1.95k stars 374 forks source link

Hybrid ViT fails in the constructor for image size = 200 #15

Open pgagarinov opened 3 years ago

pgagarinov commented 3 years ago

vit = VisionTransformer(CONFIGS['R50-ViT-B_16'], zero_head=False, img_size=200)

leads to "float division by zero" exception:


ZeroDivisionError Traceback (most recent call last)

in ----> 1 vit = VisionTransformer(CONFIGS['R50-ViT-B_16'], zero_head=False, img_size=200) ViT-pytorch/models/modeling.py in __init__(self, config, img_size, num_classes, zero_head, vis) 267 self.classifier = config.classifier 268 --> 269 self.transformer = Transformer(config, img_size, vis) 270 self.head = Linear(config.hidden_size, num_classes) 271 ViT-pytorch/models/modeling.py in __init__(self, config, img_size, vis) 251 def __init__(self, config, img_size, vis): 252 super(Transformer, self).__init__() --> 253 self.embeddings = Embeddings(config, img_size=img_size) 254 self.encoder = Encoder(config, vis) 255 ViT-pytorch/models/modeling.py in __init__(self, config, img_size, in_channels) 144 width_factor=config.resnet.width_factor) 145 in_channels = self.hybrid_model.width * 16 --> 146 self.patch_embeddings = Conv2d(in_channels=in_channels, 147 out_channels=config.hidden_size, 148 kernel_size=patch_size, ~/.conda/envs/ml-devenv2/lib/python3.8/site-packages/torch/nn/modules/conv.py in __init__(self, in_channels, out_channels, kernel_size, stride, padding, dilation, groups, bias, padding_mode) 408 padding = _pair(padding) 409 dilation = _pair(dilation) --> 410 super(Conv2d, self).__init__( 411 in_channels, out_channels, kernel_size, stride, padding, dilation, 412 False, _pair(0), groups, bias, padding_mode) ~/.conda/envs/ml-devenv2/lib/python3.8/site-packages/torch/nn/modules/conv.py in __init__(self, in_channels, out_channels, kernel_size, stride, padding, dilation, transposed, output_padding, groups, bias, padding_mode) 81 else: 82 self.register_parameter('bias', None) ---> 83 self.reset_parameters() 84 85 def reset_parameters(self) -> None: ~/.conda/envs/ml-devenv2/lib/python3.8/site-packages/torch/nn/modules/conv.py in reset_parameters(self) 84 85 def reset_parameters(self) -> None: ---> 86 init.kaiming_uniform_(self.weight, a=math.sqrt(5)) 87 if self.bias is not None: 88 fan_in, _ = init._calculate_fan_in_and_fan_out(self.weight) ~/.conda/envs/ml-devenv2/lib/python3.8/site-packages/torch/nn/init.py in kaiming_uniform_(tensor, a, mode, nonlinearity) 379 fan = _calculate_correct_fan(tensor, mode) 380 gain = calculate_gain(nonlinearity, a) --> 381 std = gain / math.sqrt(fan) 382 bound = math.sqrt(3.0) * std # Calculate uniform bounds from standard deviation 383 with torch.no_grad(): ZeroDivisionError: float division by zero