1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83
|
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/recipe.R
\name{bake}
\alias{bake}
\alias{bake.recipe}
\title{Apply a trained preprocessing recipe}
\usage{
bake(object, ...)
\method{bake}{recipe}(object, new_data, ..., composition = "tibble")
}
\arguments{
\item{object}{A trained object such as a \code{\link[=recipe]{recipe()}} with at least
one preprocessing operation.}
\item{...}{One or more selector functions to choose which variables will be
returned by the function. See \code{\link[=selections]{selections()}} for more details.
If no selectors are given, the default is to use
\code{\link[=everything]{everything()}}.}
\item{new_data}{A data frame or tibble for whom the preprocessing will be
applied. If \code{NULL} is given to \code{new_data}, the pre-processed \emph{training
data} will be returned (assuming that \code{prep(retain = TRUE)} was used).}
\item{composition}{Either "tibble", "matrix", "data.frame", or
"dgCMatrix" for the format of the processed data set. Note that
all computations during the baking process are done in a
non-sparse format. Also, note that this argument should be
called \strong{after} any selectors and the selectors should only
resolve to numeric columns (otherwise an error is thrown).}
}
\value{
A tibble, matrix, or sparse matrix that may have different
columns than the original columns in \code{new_data}.
}
\description{
For a recipe with at least one preprocessing operation that has been trained by
\code{\link[=prep]{prep()}}, apply the computations to new data.
}
\details{
\code{\link[=bake]{bake()}} takes a trained recipe and applies its operations to a
data set to create a design matrix. If you are using a recipe as a
preprocessor for modeling, we \strong{highly recommend} that you use a \code{workflow()}
instead of manually applying a recipe (see the example in \code{\link[=recipe]{recipe()}}).
If the data set is not too large, time can be saved by using the
\code{retain = TRUE} option of \code{\link[=prep]{prep()}}. This stores the processed version of the
training set. With this option set, \code{bake(object, new_data = NULL)}
will return it for free.
Also, any steps with \code{skip = TRUE} will not be applied to the
data when \code{\link[=bake]{bake()}} is invoked with a data set in \code{new_data}.
\code{bake(object, new_data = NULL)} will always have all of the steps applied.
}
\examples{
\dontshow{if (rlang::is_installed("modeldata")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
data(ames, package = "modeldata")
ames <- mutate(ames, Sale_Price = log10(Sale_Price))
ames_rec <-
recipe(Sale_Price ~ ., data = ames[-(1:6), ]) \%>\%
step_other(Neighborhood, threshold = 0.05) \%>\%
step_dummy(all_nominal()) \%>\%
step_interact(~ starts_with("Central_Air"):Year_Built) \%>\%
step_ns(Longitude, Latitude, deg_free = 2) \%>\%
step_zv(all_predictors()) \%>\%
prep()
# return the training set (already embedded in ames_rec)
bake(ames_rec, new_data = NULL)
# apply processing to other data:
bake(ames_rec, new_data = head(ames))
# only return selected variables:
bake(ames_rec, new_data = head(ames), all_numeric_predictors())
bake(ames_rec, new_data = head(ames), starts_with(c("Longitude", "Latitude")))
\dontshow{\}) # examplesIf}
}
\seealso{
\code{\link[=recipe]{recipe()}}, \code{\link[=prep]{prep()}}
}
|