this nn got a bit butchered.. layer4 ripped out - probably completely wrong.
the code above could be better approach.
# we need custom resnet blocks - so use the ResNet50 es.shape: torch.Size([1, 512, 1, 1])
# n.b. emoportraits reduced this from 512 -> 128 dim - these are feature maps / identity fingerprint of image
class CustomResNet50(nn.Module):
def __init__(self, *args, **kwargs):
super().__init__()
resnet = models.resnet50(*args, **kwargs)
self.conv1 = resnet.conv1
self.bn1 = resnet.bn1
# self.relu = resnet.relu
self.maxpool = resnet.maxpool
self.layer1 = resnet.layer1
self.layer2 = resnet.layer2
self.layer3 = resnet.layer3
# Remove the last residual block (layer4)
# self.layer4 = resnet.layer4
# Add an adaptive average pooling layer
self.adaptive_avg_pool = nn.AdaptiveAvgPool2d(FEATURE_SIZE_AVG_POOL)
# Add a 1x1 convolutional layer to reduce the number of channels to 512
self.conv_reduce = nn.Conv2d(1024, 512, kernel_size=1)
def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = F.relu(x)
x = self.maxpool(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
# Remove the forward pass through layer4
# x = self.layer4(x)
# Apply adaptive average pooling
x = self.adaptive_avg_pool(x)
# Apply the 1x1 convolutional layer to reduce the number of channels
x = self.conv_reduce(x)
return x
https://github.com/search?q=repo%3AAlessandroRuzzi%2FGazeNeRF%20resnet50&type=code
https://github.com/AlessandroRuzzi/GazeNeRF/blob/eb0a5f1f625bcb40809139628e4bcf31a9b30e55/gaze_estimation/gaze_estimator_resnet.py#L11
this nn got a bit butchered.. layer4 ripped out - probably completely wrong.
the code above could be better approach.