1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214
|
#' Select variables that match a pattern
#'
#' @description
#'
#' These [selection helpers][language] match variables according
#' to a given pattern.
#'
#' * [starts_with()]: Starts with an exact prefix.
#' * [ends_with()]: Ends with an exact suffix.
#' * [contains()]: Contains a literal string.
#' * [matches()]: Matches a regular expression.
#' * [num_range()]: Matches a numerical range like x01, x02, x03.
#'
#' @param match A character vector. If length > 1, the union of the
#' matches is taken.
#'
#' For `starts_with()`, `ends_with()`, and `contains()` this is an exact
#' match. For `matches()` this is a regular expression, and can be a
#' stringr pattern.
#' @param ignore.case If `TRUE`, the default, ignores case when matching
#' names.
#' @param vars A character vector of variable names. If not supplied,
#' the variables are taken from the current selection context (as
#' established by functions like `select()` or `pivot_longer()`).
#'
#' @section Examples:
#'
#' ```{r, child = "man/rmd/setup.Rmd"}
#' ```
#'
#' Selection helpers can be used in functions like `dplyr::select()`
#' or `tidyr::pivot_longer()`. Let's first attach the tidyverse:
#'
#' ```{r, comment = "#>", collapse = TRUE}
#' library(tidyverse)
#'
#' # For better printing
#' iris <- as_tibble(iris)
#' ```
#'
#' `starts_with()` selects all variables matching a prefix and
#' `ends_with()` matches a suffix:
#'
#' ```{r, comment = "#>", collapse = TRUE}
#' iris %>% select(starts_with("Sepal"))
#'
#' iris %>% select(ends_with("Width"))
#' ```
#'
#' You can supply multiple prefixes or suffixes. Note how the order of
#' variables depends on the order of the suffixes and prefixes:
#'
#' ```{r, comment = "#>", collapse = TRUE}
#' iris %>% select(starts_with(c("Petal", "Sepal")))
#'
#' iris %>% select(ends_with(c("Width", "Length")))
#' ```
#'
#' `contains()` selects columns whose names contain a word:
#'
#' ```{r, comment = "#>", collapse = TRUE}
#' iris %>% select(contains("al"))
#' ```
#'
#' `starts_with()`, `ends_with()`, and `contains()` do not use regular expressions. To select with a
#' regexp use `matches()`:
#'
#' ```{r, comment = "#>", collapse = TRUE}
#' # [pt] is matched literally:
#' iris %>% select(contains("[pt]al"))
#'
#' # [pt] is interpreted as a regular expression
#' iris %>% select(matches("[pt]al"))
#' ```
#'
#' `starts_with()` selects all variables starting with a prefix. To
#' select a range, use `num_range()`. Compare:
#'
#' ```{r, comment = "#>", collapse = TRUE}
#' billboard %>% select(starts_with("wk"))
#'
#' billboard %>% select(num_range("wk", 10:15))
#' ```
#'
#' @seealso `r rd_helpers_seealso()`
#' @export
starts_with <- function(match,
ignore.case = TRUE,
vars = NULL) {
check_match(match)
vars <- vars %||% peek_vars(fn = "starts_with")
if (ignore.case) {
vars <- tolower(vars)
match <- tolower(match)
}
flat_map_int(match, starts_with_impl, vars)
}
starts_with_impl <- function(x, vars) {
n <- nchar(x)
which_vars(x, substr(vars, 1, n))
}
#' @rdname starts_with
#' @export
ends_with <- function(match,
ignore.case = TRUE,
vars = NULL) {
check_match(match)
vars <- vars %||% peek_vars(fn = "ends_with")
if (ignore.case) {
vars <- tolower(vars)
match <- tolower(match)
}
length <- nchar(vars)
flat_map_int(match, ends_with_impl, vars, length)
}
ends_with_impl <- function(x, vars, length) {
n <- nchar(x)
which_vars(x, substr(vars, pmax(1, length - n + 1), length))
}
#' @rdname starts_with
#' @export
contains <- function(match,
ignore.case = TRUE,
vars = NULL) {
check_match(match)
vars <- vars %||% peek_vars(fn = "contains")
if (ignore.case) {
vars <- tolower(vars)
match <- tolower(match)
}
flat_map_int(match, grep_vars, vars, fixed = TRUE)
}
#' @rdname starts_with
#' @param perl Should Perl-compatible regexps be used?
#' @export
matches <- function(match,
ignore.case = TRUE,
perl = FALSE,
vars = NULL) {
check_match(match)
vars <- vars %||% peek_vars(fn = "matches")
if (inherits(match, "pattern") || inherits(match, "stringr_pattern")) {
check_installed("stringr")
if (!missing(ignore.case)) {
cli::cli_abort("{.arg ignore.case} not supported when {.arg match} is a {.pkg stringr} pattern.")
}
if (!missing(perl)) {
cli::cli_abort("{.arg perl} not supported when {.arg match} is a {.pkg stringr} pattern.")
}
# no [ or [[ methods for pattern objects
if (length(match) > 1) {
cli::cli_abort("{.pkg stringr} patterns must be length 1.")
}
stringr::str_which(vars, match)
} else {
flat_map_int(match, grep_vars, vars, ignore.case = ignore.case, perl = perl)
}
}
#' @rdname starts_with
#' @param prefix,suffix A prefix/suffix added before/after the numeric range.
#' @param range A sequence of integers, like `1:5`.
#' @param width Optionally, the "width" of the numeric range. For example,
#' a range of 2 gives "01", a range of three "001", etc.
#' @export
num_range <- function(prefix,
range,
suffix = "",
width = NULL,
vars = NULL) {
vars <- vars %||% peek_vars(fn = "num_range")
if (!is_null(width)) {
range <- sprintf(paste0("%0", width, "d"), range)
}
match_vars(paste0(prefix, range, suffix), vars)
}
check_match <- function(match) {
if (!is_character(match) || !all(nzchar(match))) {
cli::cli_abort("{.arg match} must be a character vector of non empty strings.")
}
}
match_vars <- function(needle, haystack) {
if (vctrs::vec_duplicate_any(haystack)) {
x <- map(needle, ~ which(. == haystack))
x <- vctrs::vec_c(!!!x)
} else {
x <- match(needle, haystack)
x[!is.na(x)]
}
}
grep_vars <- function(needle, haystack, ...) {
grep(needle, haystack, ...)
}
which_vars <- function(needle, haystack) {
which(needle == haystack)
}
|