I set up a model builder function for tuning the following hyperparameters:
batch normalization: yes vs. no
dropout rate: I would like to try [0.1, 0.3, 0.5]
optimizer: ADAM vs. RMSprop
The weird thing is that when I print the hyperparams list with tuner.search_space_summary() I get values which are different from the ones I would expect. For instance, pooling is tuned even though I don't want; similarly, the values of dropout are from 0.0 to 0.5 with step 0.1 while I set a step of 0.2; also, the learning rate is tuned even though I don't want...etc...
Any help would be appreciated :)
Here is the code to reproduce my issue (python 3.5 and tensorflow 2.3.0):
import tensorflow as tf
import kerastuner as kt
def dice_coeff(y_true, y_pred, smooth=1.):
"""This function computes the soft dice coefficient between the predicted mask and the ground truth mask
Args:
y_true (tf.Tensor): ground truth mask
y_pred (tf.Tensor): predicted mask
smooth (float): value added for numerical stability (avoid division by 0)
Returns:
dice_coefficient (tf.Tensor): dice coefficient
"""
# flatten vectors and cast to float32
y_true_f = tf.cast(tf.reshape(y_true, [-1]), tf.float32)
y_pred_f = tf.cast(tf.reshape(y_pred, [-1]), tf.float32)
intersection = tf.reduce_sum(y_true_f * y_pred_f)
union = tf.reduce_sum(tf.square(y_pred_f)) + tf.reduce_sum(tf.square(y_true_f))
dice_coefficient = (2. * intersection + smooth) / (union + smooth)
return dice_coefficient
def dice_loss(y_true, y_pred):
"""This function computes the dice loss as 1-dsc_coeff
Args:
y_true (tf.Tensor): ground truth mask
y_pred (tf.Tensor): predicted mask
Returns:
dsc_loss (tf.Tensor): dice loss
"""
dsc_loss = 1 - dice_coeff(y_true, y_pred)
return dsc_loss
def bce_dice_loss(loss_lambda):
"""This function combines the binary cross-entropy loss with the Dice loss into one unique hybrid loss
Args:
loss_lambda (float): value to balance/weight the two terms of the loss
Returns:
loss (function)
"""
def loss(y_true, y_pred):
"""This function computes the actual hybrid loss
Args:
y_true (tf.Tensor): label volume
y_pred (tf.Tensor): prediction volume
Returns:
hybrid_loss (tf.Tensor): sum of the two losses
"""
bce = tf.keras.losses.binary_crossentropy(y_true, y_pred) # compute binary cross entropy
bce_loss = tf.reduce_mean(bce) # reduce the result to get the final loss
hybrid_loss = (1 - loss_lambda) * bce_loss + loss_lambda * dice_loss(y_true, y_pred) # sum the two losses
return hybrid_loss
return loss
def build_model(hp):
train_patch_side = 32
lr = 1e-04
lambda_loss = 0.5
inputs = tf.keras.Input(shape=(train_patch_side, train_patch_side, train_patch_side, 1), name='TOF_patch')
hp_batch_norm = hp.Choice('batch_norm', values=[True, False])
# DOWNWARD PATH
conv1 = tf.keras.layers.Conv3D(16, 3, activation='relu', padding='same', data_format="channels_last")(inputs)
if hp_batch_norm:
conv1 = tf.keras.layers.BatchNormalization(axis=-1)(conv1)
conv1 = tf.keras.layers.Conv3D(16, 3, activation='relu', padding='same')(conv1)
if hp_batch_norm:
conv1 = tf.keras.layers.BatchNormalization(axis=-1)(conv1)
drop1 = tf.keras.layers.Dropout(hp.Float('dropout', min_value=0.1, max_value=0.5, step=0.2, default=0.5))(conv1)
pool1 = tf.keras.layers.MaxPooling3D(pool_size=(2, 2, 2))(drop1)
conv2 = tf.keras.layers.Conv3D(32, 3, activation='relu', padding='same')(pool1)
if hp_batch_norm:
conv2 = tf.keras.layers.BatchNormalization(axis=-1)(conv2)
conv2 = tf.keras.layers.Conv3D(32, 3, activation='relu', padding='same')(conv2)
if hp_batch_norm:
conv2 = tf.keras.layers.BatchNormalization(axis=-1)(conv2)
drop2 = tf.keras.layers.Dropout(hp.Float('dropout', min_value=0.1, max_value=0.5, step=0.2, default=0.5))(conv2)
pool2 = tf.keras.layers.MaxPooling3D(pool_size=(2, 2, 2))(drop2)
conv3 = tf.keras.layers.Conv3D(64, 3, activation='relu', padding='same')(pool2)
if hp_batch_norm:
conv3 = tf.keras.layers.BatchNormalization(axis=-1)(conv3)
conv3 = tf.keras.layers.Conv3D(64, 3, activation='relu', padding='same')(conv3)
if hp_batch_norm:
conv3 = tf.keras.layers.BatchNormalization(axis=-1)(conv3)
drop3 = tf.keras.layers.Dropout(hp.Float('dropout', min_value=0.1, max_value=0.5, step=0.2, default=0.5))(conv3)
pool3 = tf.keras.layers.MaxPooling3D(pool_size=(2, 2, 2))(drop3)
conv4 = tf.keras.layers.Conv3D(128, 3, activation='relu', padding='same')(pool3)
if hp_batch_norm:
conv4 = tf.keras.layers.BatchNormalization(axis=-1)(conv4)
conv4 = tf.keras.layers.Conv3D(128, 3, activation='relu', padding='same')(conv4)
if hp_batch_norm:
conv4 = tf.keras.layers.BatchNormalization(axis=-1)(conv4)
drop4 = tf.keras.layers.Dropout(hp.Float('dropout', min_value=0.1, max_value=0.5, step=0.2, default=0.5))(conv4)
# UPWARD PATH
up5 = tf.keras.layers.Conv3D(64, 2, activation='relu', padding='same')(tf.keras.layers.UpSampling3D(size=(2, 2, 2))(drop4))
merge5 = tf.keras.layers.Concatenate(axis=-1)([drop3, up5])
conv5 = tf.keras.layers.Conv3D(64, 3, activation='relu', padding='same')(merge5)
if hp_batch_norm:
conv5 = tf.keras.layers.BatchNormalization(axis=-1)(conv5)
conv5 = tf.keras.layers.Conv3D(64, 3, activation='relu', padding='same')(conv5)
if hp_batch_norm:
conv5 = tf.keras.layers.BatchNormalization(axis=-1)(conv5)
up6 = tf.keras.layers.Conv3D(32, 2, activation='relu', padding='same')(tf.keras.layers.UpSampling3D(size=(2, 2, 2))(conv5))
merge6 = tf.keras.layers.Concatenate(axis=-1)([conv2, up6])
conv6 = tf.keras.layers.Conv3D(32, 3, activation='relu', padding='same')(merge6)
if hp_batch_norm:
conv6 = tf.keras.layers.BatchNormalization(axis=-1)(conv6)
conv6 = tf.keras.layers.Conv3D(32, 3, activation='relu', padding='same')(conv6)
if hp_batch_norm:
conv6 = tf.keras.layers.BatchNormalization(axis=-1)(conv6)
up7 = tf.keras.layers.Conv3D(16, 2, activation='relu', padding='same')(tf.keras.layers.UpSampling3D(size=(2, 2, 2))(conv6))
merge7 = tf.keras.layers.Concatenate(axis=-1)([conv1, up7])
conv7 = tf.keras.layers.Conv3D(16, 3, activation='relu', padding='same')(merge7)
if hp_batch_norm:
conv7 = tf.keras.layers.BatchNormalization(axis=-1)(conv7)
conv7 = tf.keras.layers.Conv3D(16, 3, activation='relu', padding='same')(conv7)
if hp_batch_norm:
conv7 = tf.keras.layers.BatchNormalization(axis=-1)(conv7)
outputs = tf.keras.layers.Conv3D(1, 1, activation='sigmoid')(conv7)
model = tf.keras.Model(inputs=inputs, outputs=outputs)
hp_optimizer = hp.Choice('optimizer', [True, False])
if hp_optimizer:
optimizer = tf.keras.optimizers.Adam(learning_rate=lr)
else:
optimizer = tf.keras.optimizers.RMSprop(learning_rate=lr)
model.compile(optimizer=optimizer, loss=bce_dice_loss(lambda_loss), metrics=[dice_coeff, "binary_crossentropy"])
return model
def main():
# create TUNER
objective_object = kt.Objective("val_loss", direction="min")
tuner = kt.RandomSearch(build_model,
objective=objective_object,
max_trials=4)
tuner.search_space_summary()
if __name__ == '__main__':
main()
I set up a model builder function for tuning the following hyperparameters:
The weird thing is that when I print the hyperparams list with
tuner.search_space_summary()
I get values which are different from the ones I would expect. For instance,pooling
is tuned even though I don't want; similarly, the values of dropout are from 0.0 to 0.5 with step 0.1 while I set a step of 0.2; also, the learning rate is tuned even though I don't want...etc...Any help would be appreciated :)
Here is the code to reproduce my issue (python 3.5 and tensorflow 2.3.0):