--- title: "Bayesian Optimisation" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Bayesian Optimisation} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE, eval = FALSE) ``` ## Intro The [fastai](https://github.com/fastai/fastai) library simplifies training fast and accurate neural nets using modern best practices. See the fastai website to get started. The library is based on research into deep learning best practices undertaken at ```fast.ai```, and includes "out of the box" support for ```vision```, ```text```, ```tabular```, and ```collab``` (collaborative filtering) models. ## Bayesian Optimisation The dataset can be downloaded from [Kaggle](https://www.kaggle.com/c/santander-customer-transaction-prediction): ```{r} library(rBayesianOptimization) library(magrittr) library(fastai) df = data.table::fread('train.csv') df$ID_code <- NULL df$target <- as.character(df$target) procs = list(FillMissing(),Categorify(),Normalize()) pct_80 = round(nrow(df) * .8) dep_var = 'target' cont_names = setdiff(names(df), dep_var) dls = TabularDataTable(df, procs, NULL, cont_names, y_names = dep_var, splits = list(c(1:pct_80),c(c(pct_80+1):nrow(df)) )) %>% dataloaders(bs = 100) fastai_fit = function(layer_1, layer_2, layer_3, lr, wd, emb_p) { model <- dls %>% tabular_learner(layers = c(layer_1, layer_2, layer_3), wd = wd, config = tabular_config(embed_p = emb_p, use_bn = TRUE), metrics=list(RocAucBinary(),accuracy()), cbs = list(EarlyStoppingCallback(monitor='valid_loss', patience = 2)) ) result_ <- model %>% fit_one_cycle(10,lr) score_ <- list(Score = unlist(tail(result_$roc_auc_score,1)), Pred = 0) rm(model) return(score_) } search_bound_fastai <- list(layer_1 = c(20,200), layer_2 = c(20,200), layer_3 = c(20,200), lr = c(0, 0.1), wd = c(0, 0.1), emb_p = c(0,1) ) set.seed(123) search_grid_fastai <- data.frame(layer_1 = runif(30, 20, 200), layer_2 = runif(30, 20, 200), layer_3 = runif(30, 20, 200), lr = runif(30, 0, 0.1), wd = runif(30, 0, 0.1), emb_p = runif(30, 0, 1) ) head(search_grid_fastai) set.seed(123) bayes_fastai <- BayesianOptimization(FUN = fastai_fit, bounds = search_bound_fastai, init_points = 2, init_grid_dt = search_grid_fastai, n_iter = 5, acq = "ucb") bayes_fastai$Best_Par ```