[ ] DINO/SWAV MLP
("The projection head consists of a 3-layer multi-layer perceptron (MLP) with hidden
dimension 2048 followed by `2 normalization and a weight normalized fully connected
layer [61] with K dimensions, which is similar to the design from SwAV")
https://github.com/facebookresearch/dino
class DINOHead(nn.Module):
def __init__(self, in_dim, out_dim, use_bn=False, norm_last_layer=True, nlayers=3, hidden_dim=2048, bottleneck_dim=256):
super().__init__()
nlayers = max(nlayers, 1)
if nlayers == 1:
self.mlp = nn.Linear(in_dim, bottleneck_dim)
else:
layers = [nn.Linear(in_dim, hidden_dim)]
if use_bn:
layers.append(nn.BatchNorm1d(hidden_dim))
layers.append(nn.GELU())
for _ in range(nlayers - 2):
layers.append(nn.Linear(hidden_dim, hidden_dim))
if use_bn:
layers.append(nn.BatchNorm1d(hidden_dim))
layers.append(nn.GELU())
layers.append(nn.Linear(hidden_dim, bottleneck_dim))
self.mlp = nn.Sequential(*layers)
self.apply(self._init_weights)
self.last_layer = nn.utils.weight_norm(nn.Linear(bottleneck_dim, out_dim, bias=False))
self.last_layer.weight_g.data.fill_(1)
if norm_last_layer:
self.last_layer.weight_g.requires_grad = False
def _init_weights(self, m):
if isinstance(m, nn.Linear):
trunc_normal_(m.weight, std=.02)
if isinstance(m, nn.Linear) and m.bias is not None:
nn.init.constant_(m.bias, 0)
def forward(self, x):
x = self.mlp(x)
x = nn.functional.normalize(x, dim=-1, p=2)
x = self.last_layer(x)
return x
[ ] Benchmarks
[ ] SWAV
[ ] Implementation
[ ] Benchmarks
[ ] Documentation
[ ] core
[ ] introduction/overview
[ ] model descriptions
[ ] Tests
[x] Load pretrained model for individual component (only ViT for example)
[ ] DINO
[ ] Centering Layer
[ ] DINO/SWAV MLP ("The projection head consists of a 3-layer multi-layer perceptron (MLP) with hidden dimension 2048 followed by `2 normalization and a weight normalized fully connected layer [61] with K dimensions, which is similar to the design from SwAV") https://github.com/facebookresearch/dino