1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166
|
#' Split up a string into pieces
#'
#' @description
#' This family of functions provides various ways of splitting a string up
#' into pieces. These two functions return a character vector:
#'
#' * `str_split_1()` takes a single string and splits it into pieces,
#' returning a single character vector.
#' * `str_split_i()` splits each string in a character vector into pieces and
#' extracts the `i`th value, returning a character vector.
#'
#' These two functions return a more complex object:
#'
#' * `str_split()` splits each string in a character vector into a varying
#' number of pieces, returning a list of character vectors.
#' * `str_split_fixed()` splits each string in a character vector into a
#' fixed number of pieces, returning a character matrix.
#'
#' @inheritParams str_extract
#' @param n Maximum number of pieces to return. Default (Inf) uses all
#' possible split positions.
#'
#' For `str_split()`, this determines the maximum length of each element
#' of the output. For `str_split_fixed()`, this determines the number of
#' columns in the output; if an input is too short, the result will be padded
#' with `""`.
#' @return
#' * `str_split_1()`: a character vector.
#' * `str_split()`: a list the same length as `string`/`pattern` containing
#' character vectors.
#' * `str_split_fixed()`: a character matrix with `n` columns and the same
#' number of rows as the length of `string`/`pattern`.
#' * `str_split_i()`: a character vector the same length as `string`/`pattern`.
#' @seealso [stringi::stri_split()] for the underlying implementation.
#' @export
#' @examples
#' fruits <- c(
#' "apples and oranges and pears and bananas",
#' "pineapples and mangos and guavas"
#' )
#'
#' str_split(fruits, " and ")
#' str_split(fruits, " and ", simplify = TRUE)
#'
#' # If you want to split a single string, use `str_split_1`
#' str_split_1(fruits[[1]], " and ")
#'
#' # Specify n to restrict the number of possible matches
#' str_split(fruits, " and ", n = 3)
#' str_split(fruits, " and ", n = 2)
#' # If n greater than number of pieces, no padding occurs
#' str_split(fruits, " and ", n = 5)
#'
#' # Use fixed to return a character matrix
#' str_split_fixed(fruits, " and ", 3)
#' str_split_fixed(fruits, " and ", 4)
#'
#' # str_split_i extracts only a single piece from a string
#' str_split_i(fruits, " and ", 1)
#' str_split_i(fruits, " and ", 4)
#' # use a negative number to select from the end
#' str_split_i(fruits, " and ", -1)
str_split <- function(string, pattern, n = Inf, simplify = FALSE) {
check_lengths(string, pattern)
check_positive_integer(n)
check_bool(simplify, allow_na = TRUE)
if (identical(n, Inf)) {
n <- -1L
}
out <- switch(
type(pattern),
empty = stri_split_boundaries(
string,
n = n,
simplify = simplify,
opts_brkiter = opts(pattern)
),
bound = stri_split_boundaries(
string,
n = n,
simplify = simplify,
opts_brkiter = opts(pattern)
),
fixed = stri_split_fixed(
string,
pattern,
n = n,
simplify = simplify,
opts_fixed = opts(pattern)
),
regex = stri_split_regex(
string,
pattern,
n = n,
simplify = simplify,
opts_regex = opts(pattern)
),
coll = stri_split_coll(
string,
pattern,
n = n,
simplify = simplify,
opts_collator = opts(pattern)
)
)
preserve_names_if_possible(string, pattern, out)
}
#' @export
#' @rdname str_split
str_split_1 <- function(string, pattern) {
check_string(string)
str_split(string, pattern)[[1]]
}
#' @export
#' @rdname str_split
str_split_fixed <- function(string, pattern, n) {
check_lengths(string, pattern)
check_positive_integer(n)
str_split(string, pattern, n = n, simplify = TRUE)
}
#' @export
#' @rdname str_split
#' @param i Element to return. Use a negative value to count from the
#' right hand side.
str_split_i <- function(string, pattern, i) {
check_number_whole(i)
if (i > 0) {
out <- str_split(string, pattern, simplify = NA, n = i + 1)
col <- out[, i]
if (keep_names(string, pattern)) copy_names(string, col) else col
} else if (i < 0) {
i <- abs(i)
pieces <- str_split(string, pattern)
last <- function(x) {
n <- length(x)
if (i > n) {
NA_character_
} else {
x[[n + 1 - i]]
}
}
out <- map_chr(pieces, last)
preserve_names_if_possible(string, pattern, out)
} else {
cli::cli_abort(tr_("{.arg i} must not be 0."))
}
}
check_positive_integer <- function(
x,
arg = caller_arg(x),
call = caller_env()
) {
if (!identical(x, Inf)) {
check_number_whole(x, min = 1, arg = arg, call = call)
}
}
|