Bayesian optimization for lightGBM in R

Ask Question

Asked 2 years, 3 months ago

Modified 2 years, 3 months ago

Viewed 467 times

Part of R Language Collective

So far I have been using random search for hyperparameter tunning. Now I want to use Bayesian optimization for a more sophisticated search. I have some trouble implementing the code in R, here is what I have so far, as you can see I use also cross validation, the fold_idx is used 10 times splitting the train set and once all folds have been processed, it calculates and returns the average AUC score across all folds (the problem is only in the Bayesian optimization part, the rest of the code works well):

library(rBayesianOptimization)

trainModel = function(data = NULL,
                       trainIndex = NULL,
                       iteration_num = NULL,
                       add.genes = TRUE,
                       case = c(),
                       use.scale = TRUE,
                       using.Gene = FALSE,
                       algo = c()
) {

  train_set <- data[trainIndex, ]
  test_set <- data[-trainIndex, ]

  if (case != 'Response'){

    train_set = train_set[!is.na(train_set$outcome),]
    test_set = test_set[!is.na(test_set$outcome),]

  }
  
  train_set$outcome = as.factor(train_set$outcome)
  test_set$outcome = as.factor(test_set$outcome)
  
  outcome_idx = grep("outcome", colnames(train_set))
  train_x = data.matrix(train_set[, -outcome_idx])
  train_y = train_set[, outcome_idx]
  train_y = as.factor(train_y)
  test_x = data.matrix(test_set[, -outcome_idx])
  test_y = test_set[, outcome_idx]
  test_y = as.factor(test_y)

  train_x_filtered = train_x
  test_x_filtered = test_x

  k = 10
  folds = createFolds(train_y, k = k, list = TRUE, returnTrain = FALSE)

  bounds = list(
    max_depth = c(3L, 12L),
    num_leaves = c(3L, 65L),
    min_data_in_leaf = c(3L, 50L),
    feature_fraction = c(0.1, 0.9),
    bagging_fraction = c(0.1, 0.9),
    bagging_freq = c(0L, 10L),
    lambda_l1 = c(1L, 25L),
    lambda_l2 = c(1L, 40L),
    learning_rate = c(0.005, 0.1),
    min_split_gain = c(0.5, 20),
    nrounds = c(50L, 1400L))
  
  auc_score_lightgbm_bayes = function(max_depth, num_leaves, min_data_in_leaf, feature_fraction, bagging_fraction, bagging_freq, lambda_l1, lambda_l2, learning_rate, min_split_gain, nrounds) {
    max_depth = round(max_depth)
    num_leaves = round(num_leaves)
    min_data_in_leaf = round(min_data_in_leaf)
    bagging_freq = round(bagging_freq)
    nrounds = round(nrounds)
    
    params = list(
      max_depth = max_depth,
      num_leaves = num_leaves,
      min_data_in_leaf = min_data_in_leaf,
      feature_fraction = feature_fraction,
      bagging_fraction = bagging_fraction,
      bagging_freq = bagging_freq,
      lambda_l1 = lambda_l1,
      lambda_l2 = lambda_l2,
      learning_rate = learning_rate,
      min_split_gain = min_split_gain,
      nrounds = nrounds
    )
    
    auc_cv <- rep(0, k)
    for (j in 1:k) {
      
      fold_idx <- folds[[j]]
    
      dtrain <- lightgbm::lgb.Dataset(data = train_x_filtered[-fold_idx,], label = train_y[-fold_idx])
      dtest <- lightgbm::lgb.Dataset(data = train_x_filtered[fold_idx,], label = train_y[fold_idx])
      
      best_model <- lgb.train(
        data = dtrain,
        params = list(
          objective = 'binary',
          metric = 'auc',
          learning_rate = learning_rate,
          num_leaves = num_leaves,
          max_depth = max_depth,
          min_data_in_leaf = min_data_in_leaf,
          feature_fraction = feature_fraction,
          bagging_fraction = bagging_fraction,
          bagging_freq = bagging_freq,
          lambda_l1 = lambda_l1,
          lambda_l2 = lambda_l2,
          min_split_gain = min_split_gain,
          num_threads = 7
        ),
        valids = list(val = dtest),
        nrounds = nrounds,
        early_stopping_rounds = 100,
        verbose = -1
      )
      
      View(best_model$best_score)
      auc_cv[j] = best_model$best_score
    }
    return(list(Score = mean(auc_cv)))
  }
  
  optimization_result = BayesianOptimization(
    FUN = auc_score_lightgbm_bayes,
    bounds = bounds,
    init_points = 20,
    n_iter = 50, 
    acq = "ucb", 
    kappa = 2.576, 
    verbose = -1
  )
    
  best_params = optimization_result$Best_Par
  print(best_params)

  dtrain = lgb.Dataset(data = train_x_filtered, label = train_y)
  dtest = lgb.Dataset(data = test_x_filtered, label = test_y)
  best_model = best_model_lightgbm(dtrain,dtest,best_params)

  train_pred = predict(best_model, train_x_filtered)
  train_roc = roc(train_y, train_pred)
  train_auc = auc(train_roc)
  cat("Train AUC:", train_auc, "\n")

  test_pred = predict(best_model, test_x_filtered)
  test_roc = roc(test_y, test_pred)
  test_auc = auc(test_roc)
  cat("Test AUC:", test_auc, "\n")
  df = data.frame(row.names = rownames(test_x_filtered), pred = test_pred)

  return(list(df = df, best_params = best_params, auc_scores = auc_scores, train_x_filtered = train_x_filtered, test_x_filtered = test_x_filtered ,test_auc = test_auc))

}

I don't really understand the error I'm getting, it seems it's unable to find values for the hyperparameters or something like that:

Error in GP_deviance(beta = row, X = X, Y = Y, nug_thres = nug_thres, :
Infinite values of the Deviance Function,
unable to find optimum parameters
9.
stop("Infinite values of the Deviance Function, \n unable to find optimum parameters \n")
8.
GP_deviance(beta = row, X = X, Y = Y, nug_thres = nug_thres,
corr = corr)
7.
FUN(newX[, i], ...)
6.
apply(X = param_init_ps, MARGIN = 1L, FUN = function(row) GP_deviance(beta = row,
X = X, Y = Y, nug_thres = nug_thres, corr = corr))
5.
GPfit::GP_fit(X = Par_Mat[Rounds_Unique, ], Y = Value_Vec[Rounds_Unique],
corr = kernel, ...)
4.
withVisible(...elt(i))
3.
utils::capture.output({
GP <- GPfit::GP_fit(X = Par_Mat[Rounds_Unique, ], Y = Value_Vec[Rounds_Unique],
corr = kernel, ...)
})
2.
BayesianOptimization(FUN = auc_score_lightgbm_bayes, bounds = bounds,
init_points = 20, n_iter = 50, acq = "ucb", kappa = 2.576,
verbose = -1) at Functions_2.R#1499
1.
trainModel(data = data_newPD, trainIndex = trainIndex, iteration_num = i,
add.genes = add_genes[i], case = "new_PD",
using.Gene = FALSE,  algo = "ALGO1")

Another weird thing is, as you can see, I view the best score for each iteration with View(best_model$best_score), and each time the best_model$best_score in all iterations is 1, it doesn't make sense does it? maybe that's what is causing the error?

How to fix it?

edited Jul 17, 2023 at 17:08

desertnaut

60.8k32 gold badges155 silver badges183 bronze badges

asked Jul 17, 2023 at 10:41

Programming Noob

1,3522 gold badges9 silver badges27 bronze badges

I agree that having perfect best_score AUC is fishy, and perhaps it could lead to the issue in the error. I don't see the problem though...

Ben Reiniger
– Ben Reiniger

2023-07-18 00:07:31 +00:00
Commented Jul 18, 2023 at 0:07

Add a comment |

0

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.

Collectives™ on Stack Overflow

Bayesian optimization for lightGBM in R

0

Hot Network Questions

Collectives™ on Stack Overflow

0

Know someone who can answer? Share a link to this question via email, Twitter, or Facebook.