rstudio / tensorflow.rstudio.com

https://tensorflow.rstudio.com
9 stars 12 forks source link

Creating a Custom ResNet layer #68

Closed deepskydetail closed 8 months ago

deepskydetail commented 10 months ago

I am trying to create a custom ResNet layer, but keep getting the following error: Error: RuntimeError: unused argument ()

I am basing it on python code I found here: https://colab.research.google.com/drive/1a1GsJDHwgAl4EAcNd-_T3mZC61nmmKbg

The python code is this:

class ResNetBlock(Layer):

  def __init__(self, out_channels, first_stride=1):
    super().__init__()

    first_padding = 'same'
    if first_stride != 1:
      first_padding = 'valid'

    self.conv_sequence = Sequential([
        Conv2D(out_channels, 3, first_stride, padding=first_padding),
        BatchNormalization(),
        ReLU(),

        Conv2D(out_channels, 3, 1, padding='same'),
        BatchNormalization(),
        ReLU()
    ])

  def call(self, inputs):
    x = self.conv_sequence(inputs)

    if x.shape == inputs.shape:
      x = x + inputs # Skip connection

    return x

I am trying to translate it to R based on the instructions on the R Tensorflow website. This is what I have:

ResNetBlock(keras$layers$Layer) %py_class% {
  initialize <- function(out_channels, first_stride) {
    super$initialize()
    self$out_channels <- out_channels
    self$first_stride <- first_stride

    first_padding = 'same'
    if(self$first_stride != 1){
      first_padding = 'valid'
    }

    self$conv_sequence <- keras_model_sequential()
    self$conv_sequence <- layer_conv_2d(self$conv_sequence, 
                                        filters = self$out_channels, kernel_size = c(4, 4),
                                        strides = self$first_stride, padding = first_padding)
    self$conv_sequence <- layer_batch_normalization(self$conv_sequence)
    self$conv_sequence <- layer_activation(self$conv_sequence, activation = 'relu')
    self$conv_sequence <- layer_conv_2d(self$conv_sequence, filters = self$out_channels, 
                                        kernel_size = c(4, 4), padding = 'same')
    self$conv_sequence <- layer_batch_normalization(self$conv_sequence)
    self$conv_sequence <- layer_activation(self$conv_sequence, activation = 'relu')
  }

  call <- function(self, inputs) {
    x <- self$conv_sequence(inputs)

    if(x$shape == inputs$shape){
      x <- x + inputs
    }

    return(x)

  }

  get_config <- function() {
    list(out_channels = self$out_channels,
         first_stride = self$first_stride)
  }
} 

I have tried it in other ways too, including putting the conv_sequence lines in the build argument. Any help would be appreciated!

t-kalinowski commented 10 months ago

This is how I would do it:

library(reticulate)
library(tensorflow)
library(keras)

r"{class ResNetBlock(Layer):

  def __init__(self, out_channels, first_stride=1):
    super().__init__()

    first_padding = 'same'
    if first_stride != 1:
      first_padding = 'valid'

    self.conv_sequence = Sequential([
        Conv2D(out_channels, 3, first_stride, padding=first_padding),
        BatchNormalization(),
        ReLU(),

        Conv2D(out_channels, 3, 1, padding='same'),
        BatchNormalization(),
        ReLU()
    ])

  def call(self, inputs):
    x = self.conv_sequence(inputs)

    if x.shape == inputs.shape:
      x = x + inputs # Skip connection

    return x
}" |> invisible()

library(keras)

ResNetBlock(keras$layers$Layer) %py_class% {

  initialize <- function(out_channels, first_stride = 1) {
    super$initialize()
    self$out_channels <- out_channels
    self$first_stride <- first_stride

    first_padding <- 'same'
    if(self$first_stride != 1) 
      first_padding <- 'valid'

    self$conv_sequence <- keras_model_sequential() %>%
      layer_conv_2d(filters = self$out_channels, kernel_size = c(4, 4),
                    strides = self$first_stride, padding = first_padding) %>%
      layer_batch_normalization() %>%
      layer_activation(activation = 'relu') %>%
      layer_conv_2d(filters = self$out_channels, kernel_size = c(4, 4), 
                    padding = 'same') %>%
      layer_batch_normalization() %>%
      layer_activation(activation = 'relu') 
  }

  call <- function(self, inputs) {
    x <- self$conv_sequence(inputs)

    if(x$shape == inputs$shape){
      x <- x + inputs
    }
    x

  }

  get_config <- function() {
    list(out_channels = self$out_channels,
         first_stride = self$first_stride)
  }
}

layer_resnet_block <- keras::create_layer_wrapper(ResNetBlock)

r"{class ResNet(Model):
  def __init__(self):
    super(ResNet, self).__init__()

    self.conv_1 = Sequential([Conv2D(64, 7, 2),
                              ReLU(),
                              MaxPooling2D(3, 2)
    ])

    self.resnet_chains = Sequential([ResNetBlock(64), ResNetBlock(64)] +
                                    [ResNetBlock(128, 2), ResNetBlock(128)] +
                                    [ResNetBlock(256, 2), ResNetBlock(256)] +
                                    [ResNetBlock(512, 2), ResNetBlock(512)])

    self.out = Sequential([GlobalAveragePooling2D(),
                           Dense(1, activation='sigmoid')])

  def call(self, x):
    x = self.conv_1(x)
    x = self.resnet_chains(x)
    x = self.out(x)
    return x

model = ResNet()
print(model)
}" |> invisible()

ResNet(keras$Model) %py_class% {

  initialize <- function() {
    super$initialize()

    self$conv_1 <- keras_model_sequential() %>% 
      layer_conv_2d(64, 7, 2) %>% 
      layer_activation_relu() %>% 
      layer_max_pooling_2d(3, 2)

    self$resnet_chains <- keras_model_sequential() %>% 
      layer_resnet_block(64) %>% 
      layer_resnet_block(64) %>% 
      layer_resnet_block(128, 2) %>% 
      layer_resnet_block(128) %>% 
      layer_resnet_block(256, 2) %>% 
      layer_resnet_block(256) %>% 
      layer_resnet_block(512, 2) %>% 
      layer_resnet_block(512) 

    self$out <- keras_model_sequential() %>% 
      layer_global_average_pooling_2d() %>% 
      layer_dense(1, activation = "sigmoid")
  }

  call <- function(x) {
    x %>% 
      self$conv_1() %>% 
      self$resnet_chains() %>% 
      self$out()
  }

}

model <- ResNet()

# call it once to build the model
x <- tf$ones(as.integer(c(1, 224, 224, 3)))
invisible(model(x))

model
#> Model: "res_net"
#> ┏━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━┓
#> ┃                          ┃                        ┃  Param ┃      ┃
#> ┃ Layer (type)             ┃ Output Shape           ┃      # ┃ Tra… ┃
#> ┡━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━┩
#> │ sequential (Sequential)  │ ?                      │  9,472 │  Y   │
#> ├──────────────────────────┼────────────────────────┼────────┼──────┤
#> │ sequential_9             │ ?                      │ 19,54… │  Y   │
#> │ (Sequential)             │                        │        │      │
#> ├──────────────────────────┼────────────────────────┼────────┼──────┤
#> │ sequential_10            │ ?                      │    513 │  Y   │
#> │ (Sequential)             │                        │        │      │
#> └──────────────────────────┴────────────────────────┴────────┴──────┘
#>  Total params: 19,558,913 (74.61 MB)
#>  Trainable params: 19,551,233 (74.58 MB)
#>  Non-trainable params: 7,680 (30.00 KB)

Created on 2023-11-01 with reprex v2.0.2

deepskydetail commented 10 months ago

Thank you so much! This is super helpful, and nicely laid out :)

I'm guessing that the main problem was I didn't use keras::create_layer_wrapper() when using pipes. It says so in the instructions, but it sort of went over my head that you need to use it when using %py_class%

Thanks again! 👍