KerasTuner best practices

This tutorial shows the hyperparameter tuning for MNIST dataset.


library(keras)
library(tensorflow)
library(kerastuneR)

if(tensorflow::tf_gpu_configured()) {
  physical_devices = tf$config$list_physical_devices('GPU')
  tf$config$experimental$set_memory_growth(physical_devices[[1]],TRUE)
}


# The data, shuffled and split between train and test sets
mnist <- dataset_mnist()
x_train <- mnist$train$x
y_train <- mnist$train$y
x_test <- mnist$test$x
y_test <- mnist$test$y

augment_images = function(x, hp) {
  use_rotation = hp$Boolean('use_rotation')
  if(use_rotation) {
    x = tf$keras$layers$experimental$preprocessing$RandomRotation(
      hp$Float('rotation_factor', min_value=0.05, max_value=0.2)
    )(x)
  }
  use_zoom = hp$Boolean('use_zoom')
  if(use_zoom) {
    x = tf$keras$layers$experimental$preprocessing$RandomZoom(
      hp$Float('use_zoom', min_value=0.05, max_value=0.2)
    )(x)
  }
  x
}

make_model = function(hp) {
  inputs = layer_input(shape=c(28, 28, 1))
  x = tf$keras$layers$experimental$preprocessing$Rescaling(1. / 255)(inputs)
  x = tf$keras$layers$experimental$preprocessing$Resizing(64L, 64L)(x)
  x = augment_images(x, hp)
  num_block = hp$Int('num_block', min_value=2, max_value=5, step=1)
  num_filters = hp$Int('num_filters', min_value=32, max_value=128, step=32)
  for (i in 1:length(num_block)) {
    x = x %>% layer_conv_2d(
      num_filters,
      kernel_size=3,
      activation='relu',
      padding='same'
    ) %>% 
      layer_conv_2d(
        num_filters,
        kernel_size=3,
        activation='relu',
        padding='same'
      ) %>% layer_max_pooling_2d(2)
  }
  reduction_type = hp$Choice('reduction_type', c('flatten', 'avg'))
  
  if(reduction_type == 'flatten') {
    x = x %>% layer_flatten()
  } else {
    x = x %>% layer_global_average_pooling_2d()
  }
  
  x = x %>% layer_dense(
    units=hp$Int('num_dense_units', min_value=32, max_value=512, step=32),
    activation='relu'
  ) %>% layer_dropout(
    hp$Float('dense_dropout', min_value = 0., max_value = 0.7)
  )

  outputs = x %>% layer_dense(10)
  model = keras_model(inputs, outputs)
  learning_rate = hp$Float('learning_rate', min_value = 3e-4, max_value = 3e-3)
  optimizer = optimizer_adam(lr=1e-3)
  model %>% compile(loss = tf$keras$losses$SparseCategoricalCrossentropy(from_logits = TRUE),
                optimizer = optimizer,
                metrics = tf$keras$metrics$SparseCategoricalAccuracy(name='acc'))
  model %>% summary()
  return(model)
}


tuner = RandomSearch(
  make_model,
  objective='val_acc',
  max_trials=2,
  overwrite=TRUE)


callbacks=callback_early_stopping(monitor = 'val_acc', mode = 'max', 
                                  patience = 3, baseline = 0.9)
tuner %>% fit_tuner(x_train, y_train, validation_split = 0.2, 
                    callbacks = list(callbacks), verbose=1, epochs=2)

Extract model and retrain:

best_hp = tuner %>% get_best_models(1)
history = model %>% fit(x_train, y_train, validation_split = 0.2, epochs = 2)