File: step_mutate.Rd

package info (click to toggle)
r-cran-recipes 0.1.15%2Bdfsg-1
links: PTS, VCS
area: main
in suites: bullseye
size: 2,496 kB
sloc: sh: 37; makefile: 2
file content (123 lines) | stat: -rw-r--r-- 3,441 bytes
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/mutate.R, R/mutate_at.R
\name{step_mutate}
\alias{step_mutate}
\alias{tidy.step_mutate}
\alias{tidy.step_mutate_at}
\title{Add new variables using \code{mutate}}
\usage{
step_mutate(
  recipe,
  ...,
  role = "predictor",
  trained = FALSE,
  inputs = NULL,
  skip = FALSE,
  id = rand_id("mutate")
)

\method{tidy}{step_mutate}(x, ...)

\method{tidy}{step_mutate_at}(x, ...)
}
\arguments{
\item{recipe}{A recipe object. The step will be added to the
sequence of operations for this recipe.}

\item{...}{Name-value pairs of expressions. See \code{\link[dplyr:mutate]{dplyr::mutate()}}.
If the argument is not named, the expression is converted to
a column name.}

\item{role}{For model terms created by this step, what analysis
role should they be assigned? By default, the function assumes
that the new dimension columns created by the original variables
will be used as predictors in a model.}

\item{trained}{A logical to indicate if the quantities for
preprocessing have been estimated.}

\item{inputs}{Quosure(s) of \code{...}.}

\item{skip}{A logical. Should the step be skipped when the
recipe is baked by \code{\link[=bake.recipe]{bake.recipe()}}? While all operations are baked
when \code{\link[=prep.recipe]{prep.recipe()}} is run, some operations may not be able to be
conducted on new data (e.g. processing the outcome variable(s)).
Care should be taken when using \code{skip = TRUE} as it may affect
the computations for subsequent operations}

\item{id}{A character string that is unique to this step to identify it.}

\item{x}{A \code{step_mutate} object}
}
\value{
An updated version of \code{recipe} with the new step
added to the sequence of existing steps (if any). For the
\code{tidy} method, a tibble with columns \code{values} which
contains the \code{mutate} expressions as character strings
(and are not reparsable).
}
\description{
\code{step_mutate} creates a \emph{specification} of a recipe step
that will add variables using \code{\link[dplyr:mutate]{dplyr::mutate()}}.
}
\details{
When an object in the user's global environment is
referenced in the expression defining the new variable(s),
it is a good idea to use quasiquotation (e.g. \verb{!!}) to embed
the value of the object in the expression (to be portable
between sessions). See the examples.
}
\examples{
rec <-
  recipe( ~ ., data = iris) \%>\%
  step_mutate(
    dbl_width = Sepal.Width * 2,
    half_length = Sepal.Length / 2
  )

prepped <- prep(rec, training = iris \%>\% slice(1:75))

library(dplyr)

dplyr_train <-
  iris \%>\%
  as_tibble() \%>\%
  slice(1:75) \%>\%
  mutate(
    dbl_width = Sepal.Width * 2,
    half_length = Sepal.Length / 2
  )

rec_train <- bake(prepped, new_data = NULL)
all.equal(dplyr_train, rec_train)

dplyr_test <-
  iris \%>\%
  as_tibble() \%>\%
  slice(76:150) \%>\%
  mutate(
    dbl_width = Sepal.Width * 2,
    half_length = Sepal.Length / 2
  )
rec_test <- bake(prepped, iris \%>\% slice(76:150))
all.equal(dplyr_test, rec_test)

# Embedding objects:
const <- 1.414

qq_rec <-
  recipe( ~ ., data = iris) \%>\%
  step_mutate(
    bad_approach = Sepal.Width * const,
    best_approach = Sepal.Width * !!const
  ) \%>\%
  prep(training = iris)

bake(qq_rec, new_data = NULL, contains("appro")) \%>\% slice(1:4)

# The difference:
tidy(qq_rec, number = 1)
}
\concept{preprocessing}
\concept{transformation_methods}
\keyword{datagen}