As I will show in a reprex below, I got some issues, tuning model arguments, and recipe arguments (from recipes and recipeselectors) both, by merging the grids.
I tried numerous was, but always get the error message:preprocessor 3/3: Error: You cannot prep()
a tuneable recipe. Argument(s) with tune()
: 'top_p'. Do you want to use a tuning function such as tune_grid()
?
If I tune all the model and recipe arguments except top_p, it all works fine. How can I understand this issue?
#### LIBS
suppressPackageStartupMessages(library(tidymodels))
suppressPackageStartupMessages(library(tidyverse))
suppressPackageStartupMessages(library(data.table))
suppressPackageStartupMessages(library(themis))
suppressPackageStartupMessages(library(doParallel))
suppressPackageStartupMessages(library(recipeselectors))
#### DATA
df <- fread("Churn_Modelling.csv") # source: https://www.kaggle.com/shrutimechlearn/churn-modelling
set.seed(31)
split <- initial_split(df, prop = 0.8)
train <- training(split)
test <- testing(split)
k_folds_data <- vfold_cv(training(split), v = 10)
#### FEATURES
# Define the recipe for Up-Sampling
rec <- recipe(Exited ~ ., data = train) %>%
step_rm(one_of("RowNumber", "Surname")) %>%
update_role(CustomerId, new_role = "Helper") %>%
step_num2factor(all_outcomes(),
levels = c("No", "Yes"),
transform = function(x) {x + 1}) %>%
step_normalize(all_numeric(), -has_role(match = "Helper")) %>%
step_dummy(all_nominal(), -all_outcomes()) %>%
step_nzv(all_predictors()) %>%
themis::step_upsample(Exited) %>%
step_other(all_nominal(), threshold = tune("cat_thresh")) %>%
step_corr(all_predictors(), threshold = tune("thresh_cor")) %>%
#step_pca(all_numeric(), -all_outcomes(), num_comp = tune())
step_select_roc(all_predictors(), outcome = "Exited", top_p = tune())
#### MODEL
model_metrics <- metric_set(roc_auc)
# xgboost model
xgb_spec <- boost_tree(
trees = tune(),
tree_depth = tune(), min_n = tune(),
loss_reduction = tune(),
sample_size = tune(), mtry = tune(),
learn_rate = tune(),
stop_iter = tune()
) %>%
set_engine("xgboost") %>%
set_mode("classification")
# grid
xgb_grid <- grid_latin_hypercube(
trees(),
tree_depth(),
min_n(),
loss_reduction(),
sample_size = sample_prop(),
finalize(mtry(), train),
learn_rate(),
stop_iter(range = c(5L,50L)),
size = 10
)
rec_grid <- grid_latin_hypercube(
parameters(rec) %>%
update(top_p = top_p(c(1,11))) ,
size = 10
)
comp_grid <- merge(xgb_grid, rec_grid)
# tune
cores <- parallel::detectCores(logical = FALSE)
cl <- makePSOCKcluster(cores)
registerDoParallel(cl)
set.seed(234)
model_res <- tune_grid(xgb_spec, preprocessor = rec,
resamples = k_folds_data,
grid = comp_grid,
metrics = model_metrics)
stopCluster(cl)