Generate a set of workflow objects from preprocessing and model objects

workflow_set(preproc, models, cross = TRUE)

Arguments

preproc

A list (preferably named) with preprocessing objects: formulas, recipes, or workflows::workflow_variables().

models

A list (preferably named) of parsnip model specifications.

cross

A logical: should all combinations of the preprocessors and models be used to create the workflows? If FALSE, the length of preproc and models should be equal.

Value

A tibble with extra class 'workflow_set'. A new set includes four columns (but others can be added):

  • wflow_id contains character strings for the preprocessor/workflow combination. These can be changed but must be unique.

  • info is a list column with tibbles containing more specific information, including any comments added using comment_add(). This tibble also contains the workflow object (which can be easily retrieved using pull_workflow()).

  • option is a list column that will include a list of optional arguments passed to the functions from the tune package. They can be added manually via option_add() or automatically when options are passed to workflow_map().

  • result is a list column that will contain any objects produced when workflow_map() is used.

Details

The preprocessors that can be combined with the model objects can be one or more of:

Since preproc is a named list column, any combination of these can be used in that argument (i.e., preproc can be mixed types).

See also

Examples

#> #> Attaching package: ‘recipes’
#> The following object is masked from ‘package:stats’: #> #> step
#> For binary classification, the first factor level is assumed to be the event. #> Use the argument `event_level = "second"` to alter this as needed.
# ------------------------------------------------------------------------------ data(cells) cells <- cells %>% dplyr::select(-case) set.seed(1) val_set <- validation_split(cells) # ------------------------------------------------------------------------------ basic_recipe <- recipe(class ~ ., data = cells) %>% step_YeoJohnson(all_predictors()) %>% step_normalize(all_predictors()) pca_recipe <- basic_recipe %>% step_pca(all_predictors(), num_comp = tune()) ss_recipe <- basic_recipe %>% step_spatialsign(all_predictors()) # ------------------------------------------------------------------------------ knn_mod <- nearest_neighbor(neighbors = tune(), weight_func = tune()) %>% set_engine("kknn") %>% set_mode("classification") lr_mod <- logistic_reg() %>% set_engine("glm") # ------------------------------------------------------------------------------ preproc <- list(none = basic_recipe, pca = pca_recipe, sp_sign = ss_recipe) models <- list(knn = knn_mod, logistic = lr_mod) cell_set <- workflow_set(preproc, models, cross = TRUE) cell_set
#> # A workflow set/tibble: 6 x 4 #> wflow_id info option result #> <chr> <list> <list> <list> #> 1 none_knn <tibble [1 × 4]> <opts[0]> <list [0]> #> 2 none_logistic <tibble [1 × 4]> <opts[0]> <list [0]> #> 3 pca_knn <tibble [1 × 4]> <opts[0]> <list [0]> #> 4 pca_logistic <tibble [1 × 4]> <opts[0]> <list [0]> #> 5 sp_sign_knn <tibble [1 × 4]> <opts[0]> <list [0]> #> 6 sp_sign_logistic <tibble [1 × 4]> <opts[0]> <list [0]>
# ------------------------------------------------------------------------------ # Using variables and formulas # Select predictors by their names channels <- paste0("ch_", 1:4) preproc <- purrr::map(channels, ~ workflow_variables(class, c(contains(!!.x)))) names(preproc) <- channels preproc$everything <- class ~ . preproc
#> $ch_1 #> $outcomes #> <quosure> #> expr: ^class #> env: 0x560c844f17c8 #> #> $predictors #> <quosure> #> expr: ^c(contains("ch_1")) #> env: 0x560c844f17c8 #> #> attr(,"class") #> [1] "workflow_variables" #> #> $ch_2 #> $outcomes #> <quosure> #> expr: ^class #> env: 0x560c844ed800 #> #> $predictors #> <quosure> #> expr: ^c(contains("ch_2")) #> env: 0x560c844ed800 #> #> attr(,"class") #> [1] "workflow_variables" #> #> $ch_3 #> $outcomes #> <quosure> #> expr: ^class #> env: 0x560c819d93b8 #> #> $predictors #> <quosure> #> expr: ^c(contains("ch_3")) #> env: 0x560c819d93b8 #> #> attr(,"class") #> [1] "workflow_variables" #> #> $ch_4 #> $outcomes #> <quosure> #> expr: ^class #> env: 0x560c819d7a80 #> #> $predictors #> <quosure> #> expr: ^c(contains("ch_4")) #> env: 0x560c819d7a80 #> #> attr(,"class") #> [1] "workflow_variables" #> #> $everything #> class ~ . #> <environment: 0x560c7fa36e90> #>
cell_set_by_group <- workflow_set(preproc, models["logistic"]) cell_set_by_group
#> # A workflow set/tibble: 5 x 4 #> wflow_id info option result #> <chr> <list> <list> <list> #> 1 ch_1_logistic <tibble [1 × 4]> <opts[0]> <list [0]> #> 2 ch_2_logistic <tibble [1 × 4]> <opts[0]> <list [0]> #> 3 ch_3_logistic <tibble [1 × 4]> <opts[0]> <list [0]> #> 4 ch_4_logistic <tibble [1 × 4]> <opts[0]> <list [0]> #> 5 everything_logistic <tibble [1 × 4]> <opts[0]> <list [0]>