1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69
|
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/extract.R
\name{extract}
\alias{extract}
\title{Extract a character column into multiple columns using regular
expression groups}
\usage{
extract(
data,
col,
into,
regex = "([[:alnum:]]+)",
remove = TRUE,
convert = FALSE,
...
)
}
\arguments{
\item{data}{A data frame.}
\item{col}{<\code{\link[=tidyr_tidy_select]{tidy-select}}> Column to expand.}
\item{into}{Names of new variables to create as character vector.
Use \code{NA} to omit the variable in the output.}
\item{regex}{A string representing a regular expression used to extract the
desired values. There should be one group (defined by \verb{()}) for each
element of \code{into}.}
\item{remove}{If \code{TRUE}, remove input column from output data frame.}
\item{convert}{If \code{TRUE}, will run \code{\link[=type.convert]{type.convert()}} with
\code{as.is = TRUE} on new columns. This is useful if the component
columns are integer, numeric or logical.
NB: this will cause string \code{"NA"}s to be converted to \code{NA}s.}
\item{...}{Additional arguments passed on to methods.}
}
\description{
\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#superseded}{\figure{lifecycle-superseded.svg}{options: alt='[Superseded]'}}}{\strong{[Superseded]}}
\code{extract()} has been superseded in favour of \code{\link[=separate_wider_regex]{separate_wider_regex()}}
because it has a more polished API and better handling of problems.
Superseded functions will not go away, but will only receive critical bug
fixes.
Given a regular expression with capturing groups, \code{extract()} turns
each group into a new column. If the groups don't match, or the input
is NA, the output will be NA.
}
\examples{
df <- tibble(x = c(NA, "a-b", "a-d", "b-c", "d-e"))
df \%>\% extract(x, "A")
df \%>\% extract(x, c("A", "B"), "([[:alnum:]]+)-([[:alnum:]]+)")
# Now recommended
df \%>\%
separate_wider_regex(
x,
patterns = c(A = "[[:alnum:]]+", "-", B = "[[:alnum:]]+")
)
# If no match, NA:
df \%>\% extract(x, c("A", "B"), "([a-d]+)-([a-d]+)")
}
\seealso{
\code{\link[=separate]{separate()}} to split up by a separator.
}
|