Closed deepskydetail closed 8 months ago
This is how I would do it:
library(reticulate)
library(tensorflow)
library(keras)
r"{class ResNetBlock(Layer):
def __init__(self, out_channels, first_stride=1):
super().__init__()
first_padding = 'same'
if first_stride != 1:
first_padding = 'valid'
self.conv_sequence = Sequential([
Conv2D(out_channels, 3, first_stride, padding=first_padding),
BatchNormalization(),
ReLU(),
Conv2D(out_channels, 3, 1, padding='same'),
BatchNormalization(),
ReLU()
])
def call(self, inputs):
x = self.conv_sequence(inputs)
if x.shape == inputs.shape:
x = x + inputs # Skip connection
return x
}" |> invisible()
library(keras)
ResNetBlock(keras$layers$Layer) %py_class% {
initialize <- function(out_channels, first_stride = 1) {
super$initialize()
self$out_channels <- out_channels
self$first_stride <- first_stride
first_padding <- 'same'
if(self$first_stride != 1)
first_padding <- 'valid'
self$conv_sequence <- keras_model_sequential() %>%
layer_conv_2d(filters = self$out_channels, kernel_size = c(4, 4),
strides = self$first_stride, padding = first_padding) %>%
layer_batch_normalization() %>%
layer_activation(activation = 'relu') %>%
layer_conv_2d(filters = self$out_channels, kernel_size = c(4, 4),
padding = 'same') %>%
layer_batch_normalization() %>%
layer_activation(activation = 'relu')
}
call <- function(self, inputs) {
x <- self$conv_sequence(inputs)
if(x$shape == inputs$shape){
x <- x + inputs
}
x
}
get_config <- function() {
list(out_channels = self$out_channels,
first_stride = self$first_stride)
}
}
layer_resnet_block <- keras::create_layer_wrapper(ResNetBlock)
r"{class ResNet(Model):
def __init__(self):
super(ResNet, self).__init__()
self.conv_1 = Sequential([Conv2D(64, 7, 2),
ReLU(),
MaxPooling2D(3, 2)
])
self.resnet_chains = Sequential([ResNetBlock(64), ResNetBlock(64)] +
[ResNetBlock(128, 2), ResNetBlock(128)] +
[ResNetBlock(256, 2), ResNetBlock(256)] +
[ResNetBlock(512, 2), ResNetBlock(512)])
self.out = Sequential([GlobalAveragePooling2D(),
Dense(1, activation='sigmoid')])
def call(self, x):
x = self.conv_1(x)
x = self.resnet_chains(x)
x = self.out(x)
return x
model = ResNet()
print(model)
}" |> invisible()
ResNet(keras$Model) %py_class% {
initialize <- function() {
super$initialize()
self$conv_1 <- keras_model_sequential() %>%
layer_conv_2d(64, 7, 2) %>%
layer_activation_relu() %>%
layer_max_pooling_2d(3, 2)
self$resnet_chains <- keras_model_sequential() %>%
layer_resnet_block(64) %>%
layer_resnet_block(64) %>%
layer_resnet_block(128, 2) %>%
layer_resnet_block(128) %>%
layer_resnet_block(256, 2) %>%
layer_resnet_block(256) %>%
layer_resnet_block(512, 2) %>%
layer_resnet_block(512)
self$out <- keras_model_sequential() %>%
layer_global_average_pooling_2d() %>%
layer_dense(1, activation = "sigmoid")
}
call <- function(x) {
x %>%
self$conv_1() %>%
self$resnet_chains() %>%
self$out()
}
}
model <- ResNet()
# call it once to build the model
x <- tf$ones(as.integer(c(1, 224, 224, 3)))
invisible(model(x))
model
#> Model: "res_net"
#> ┏━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━┓
#> ┃ ┃ ┃ Param ┃ ┃
#> ┃ Layer (type) ┃ Output Shape ┃ # ┃ Tra… ┃
#> ┡━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━┩
#> │ sequential (Sequential) │ ? │ 9,472 │ Y │
#> ├──────────────────────────┼────────────────────────┼────────┼──────┤
#> │ sequential_9 │ ? │ 19,54… │ Y │
#> │ (Sequential) │ │ │ │
#> ├──────────────────────────┼────────────────────────┼────────┼──────┤
#> │ sequential_10 │ ? │ 513 │ Y │
#> │ (Sequential) │ │ │ │
#> └──────────────────────────┴────────────────────────┴────────┴──────┘
#> Total params: 19,558,913 (74.61 MB)
#> Trainable params: 19,551,233 (74.58 MB)
#> Non-trainable params: 7,680 (30.00 KB)
Created on 2023-11-01 with reprex v2.0.2
Thank you so much! This is super helpful, and nicely laid out :)
I'm guessing that the main problem was I didn't use keras::create_layer_wrapper() when using pipes. It says so in the instructions, but it sort of went over my head that you need to use it when using %py_class%
Thanks again! 👍
I am trying to create a custom ResNet layer, but keep getting the following error: Error: RuntimeError: unused argument ()
I am basing it on python code I found here: https://colab.research.google.com/drive/1a1GsJDHwgAl4EAcNd-_T3mZC61nmmKbg
The python code is this:
I am trying to translate it to R based on the instructions on the R Tensorflow website. This is what I have:
I have tried it in other ways too, including putting the conv_sequence lines in the build argument. Any help would be appreciated!