1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76
|
## ----ex_setup, include=FALSE--------------------------------------------------
knitr::opts_chunk$set(
message = FALSE,
digits = 3,
collapse = TRUE,
comment = "#>",
eval = requireNamespace("modeldata", quietly = TRUE) && requireNamespace("rsample", quietly = TRUE)
)
options(digits = 3)
## ----data---------------------------------------------------------------------
library(recipes)
library(rsample)
library(modeldata)
data("credit_data")
set.seed(55)
train_test_split <- initial_split(credit_data)
credit_train <- training(train_test_split)
credit_test <- testing(train_test_split)
## ----missing------------------------------------------------------------------
vapply(credit_train, function(x) mean(!is.na(x)), numeric(1))
## ----first_rec----------------------------------------------------------------
rec_obj <- recipe(Status ~ ., data = credit_train)
rec_obj
## ----step_code, eval = FALSE--------------------------------------------------
# rec_obj <- step_{X}(rec_obj, arguments) ## or
# rec_obj <- rec_obj %>% step_{X}(arguments)
## ----imp-steps----------------------------------------------------------------
grep("impute_", ls("package:recipes"), value = TRUE)
## ----dummy--------------------------------------------------------------------
imputed <- rec_obj %>%
step_impute_knn(all_predictors())
imputed
## ----imputing-----------------------------------------------------------------
ind_vars <- imputed %>%
step_dummy(all_nominal_predictors())
ind_vars
## ----center_scale-------------------------------------------------------------
standardized <- ind_vars %>%
step_center(all_numeric_predictors()) %>%
step_scale(all_numeric_predictors())
standardized
## ----trained------------------------------------------------------------------
trained_rec <- prep(standardized, training = credit_train)
trained_rec
## ----apply--------------------------------------------------------------------
train_data <- bake(trained_rec, new_data = credit_train)
test_data <- bake(trained_rec, new_data = credit_test)
## ----tibbles------------------------------------------------------------------
class(test_data)
test_data
vapply(test_data, function(x) mean(!is.na(x)), numeric(1))
## ----step_list, echo = FALSE--------------------------------------------------
grep("^step_", ls("package:recipes"), value = TRUE)
## ----check, eval = FALSE------------------------------------------------------
# trained_rec <- trained_rec %>%
# check_missing(contains("Marital"))
## ----check_list, echo = FALSE-------------------------------------------------
grep("^check_", ls("package:recipes"), value = TRUE)
|