So far I have been using random search for hyperparameter tunning. Now I want to use Bayesian optimization for a more sophisticated search. I have some trouble implementing the code in R, here is what I have so far, as you can see I use also cross validation, the fold_idx is used 10 times splitting the train set and once all folds have been processed, it calculates and returns the average AUC score across all folds (the problem is only in the Bayesian optimization part, the rest of the code works well):
library(rBayesianOptimization)
trainModel = function(data = NULL,
trainIndex = NULL,
iteration_num = NULL,
add.genes = TRUE,
case = c(),
use.scale = TRUE,
using.Gene = FALSE,
algo = c()
) {
train_set <- data[trainIndex, ]
test_set <- data[-trainIndex, ]
if (case != 'Response'){
train_set = train_set[!is.na(train_set$outcome),]
test_set = test_set[!is.na(test_set$outcome),]
}
train_set$outcome = as.factor(train_set$outcome)
test_set$outcome = as.factor(test_set$outcome)
outcome_idx = grep("outcome", colnames(train_set))
train_x = data.matrix(train_set[, -outcome_idx])
train_y = train_set[, outcome_idx]
train_y = as.factor(train_y)
test_x = data.matrix(test_set[, -outcome_idx])
test_y = test_set[, outcome_idx]
test_y = as.factor(test_y)
train_x_filtered = train_x
test_x_filtered = test_x
k = 10
folds = createFolds(train_y, k = k, list = TRUE, returnTrain = FALSE)
bounds = list(
max_depth = c(3L, 12L),
num_leaves = c(3L, 65L),
min_data_in_leaf = c(3L, 50L),
feature_fraction = c(0.1, 0.9),
bagging_fraction = c(0.1, 0.9),
bagging_freq = c(0L, 10L),
lambda_l1 = c(1L, 25L),
lambda_l2 = c(1L, 40L),
learning_rate = c(0.005, 0.1),
min_split_gain = c(0.5, 20),
nrounds = c(50L, 1400L))
auc_score_lightgbm_bayes = function(max_depth, num_leaves, min_data_in_leaf, feature_fraction, bagging_fraction, bagging_freq, lambda_l1, lambda_l2, learning_rate, min_split_gain, nrounds) {
max_depth = round(max_depth)
num_leaves = round(num_leaves)
min_data_in_leaf = round(min_data_in_leaf)
bagging_freq = round(bagging_freq)
nrounds = round(nrounds)
params = list(
max_depth = max_depth,
num_leaves = num_leaves,
min_data_in_leaf = min_data_in_leaf,
feature_fraction = feature_fraction,
bagging_fraction = bagging_fraction,
bagging_freq = bagging_freq,
lambda_l1 = lambda_l1,
lambda_l2 = lambda_l2,
learning_rate = learning_rate,
min_split_gain = min_split_gain,
nrounds = nrounds
)
auc_cv <- rep(0, k)
for (j in 1:k) {
fold_idx <- folds[[j]]
dtrain <- lightgbm::lgb.Dataset(data = train_x_filtered[-fold_idx,], label = train_y[-fold_idx])
dtest <- lightgbm::lgb.Dataset(data = train_x_filtered[fold_idx,], label = train_y[fold_idx])
best_model <- lgb.train(
data = dtrain,
params = list(
objective = 'binary',
metric = 'auc',
learning_rate = learning_rate,
num_leaves = num_leaves,
max_depth = max_depth,
min_data_in_leaf = min_data_in_leaf,
feature_fraction = feature_fraction,
bagging_fraction = bagging_fraction,
bagging_freq = bagging_freq,
lambda_l1 = lambda_l1,
lambda_l2 = lambda_l2,
min_split_gain = min_split_gain,
num_threads = 7
),
valids = list(val = dtest),
nrounds = nrounds,
early_stopping_rounds = 100,
verbose = -1
)
View(best_model$best_score)
auc_cv[j] = best_model$best_score
}
return(list(Score = mean(auc_cv)))
}
optimization_result = BayesianOptimization(
FUN = auc_score_lightgbm_bayes,
bounds = bounds,
init_points = 20,
n_iter = 50,
acq = "ucb",
kappa = 2.576,
verbose = -1
)
best_params = optimization_result$Best_Par
print(best_params)
dtrain = lgb.Dataset(data = train_x_filtered, label = train_y)
dtest = lgb.Dataset(data = test_x_filtered, label = test_y)
best_model = best_model_lightgbm(dtrain,dtest,best_params)
train_pred = predict(best_model, train_x_filtered)
train_roc = roc(train_y, train_pred)
train_auc = auc(train_roc)
cat("Train AUC:", train_auc, "\n")
test_pred = predict(best_model, test_x_filtered)
test_roc = roc(test_y, test_pred)
test_auc = auc(test_roc)
cat("Test AUC:", test_auc, "\n")
df = data.frame(row.names = rownames(test_x_filtered), pred = test_pred)
return(list(df = df, best_params = best_params, auc_scores = auc_scores, train_x_filtered = train_x_filtered, test_x_filtered = test_x_filtered ,test_auc = test_auc))
}
I don't really understand the error I'm getting, it seems it's unable to find values for the hyperparameters or something like that:
Error in GP_deviance(beta = row, X = X, Y = Y, nug_thres = nug_thres, :
Infinite values of the Deviance Function,
unable to find optimum parameters
9.
stop("Infinite values of the Deviance Function, \n unable to find optimum parameters \n")
8.
GP_deviance(beta = row, X = X, Y = Y, nug_thres = nug_thres,
corr = corr)
7.
FUN(newX[, i], ...)
6.
apply(X = param_init_ps, MARGIN = 1L, FUN = function(row) GP_deviance(beta = row,
X = X, Y = Y, nug_thres = nug_thres, corr = corr))
5.
GPfit::GP_fit(X = Par_Mat[Rounds_Unique, ], Y = Value_Vec[Rounds_Unique],
corr = kernel, ...)
4.
withVisible(...elt(i))
3.
utils::capture.output({
GP <- GPfit::GP_fit(X = Par_Mat[Rounds_Unique, ], Y = Value_Vec[Rounds_Unique],
corr = kernel, ...)
})
2.
BayesianOptimization(FUN = auc_score_lightgbm_bayes, bounds = bounds,
init_points = 20, n_iter = 50, acq = "ucb", kappa = 2.576,
verbose = -1) at Functions_2.R#1499
1.
trainModel(data = data_newPD, trainIndex = trainIndex, iteration_num = i,
add.genes = add_genes[i], case = "new_PD",
using.Gene = FALSE, algo = "ALGO1")
Another weird thing is, as you can see, I view the best score for each iteration with View(best_model$best_score), and each time the best_model$best_score in all iterations is 1, it doesn't make sense does it? maybe that's what is causing the error?
How to fix it?
best_scoreAUC is fishy, and perhaps it could lead to the issue in the error. I don't see the problem though...