File: locate.R

package info (click to toggle)
r-cran-stringr 1.5.1-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 1,032 kB
  • sloc: javascript: 11; sh: 9; makefile: 2
file content (85 lines) | stat: -rw-r--r-- 3,050 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
#' Find location of match
#'
#' @description
#' `str_locate()` returns the `start` and `end` position of the first match;
#' `str_locate_all()` returns the `start` and `end` position of each match.
#'
#' Because the `start` and `end` values are inclusive, zero-length matches
#' (e.g. `$`, `^`, `\\b`) will have an `end` that is smaller than `start`.
#'
#' @inheritParams str_detect
#' @returns
#' * `str_locate()` returns an integer matrix with two columns and
#'   one row for each element of `string`. The first column, `start`,
#'   gives the position at the start of the match, and the second column, `end`,
#'   gives the position of the end.
#'
#'* `str_locate_all()` returns a list of integer matrices with the same
#'   length as `string`/`pattern`. The matrices have columns `start` and `end`
#'   as above, and one row for each match.
#' @seealso
#'   [str_extract()] for a convenient way of extracting matches,
#'   [stringi::stri_locate()] for the underlying implementation.
#' @export
#' @examples
#' fruit <- c("apple", "banana", "pear", "pineapple")
#' str_locate(fruit, "$")
#' str_locate(fruit, "a")
#' str_locate(fruit, "e")
#' str_locate(fruit, c("a", "b", "p", "p"))
#'
#' str_locate_all(fruit, "a")
#' str_locate_all(fruit, "e")
#' str_locate_all(fruit, c("a", "b", "p", "p"))
#'
#' # Find location of every character
#' str_locate_all(fruit, "")
str_locate <- function(string, pattern) {
  check_lengths(string, pattern)

  switch(type(pattern),
    empty = ,
    bound = stri_locate_first_boundaries(string, opts_brkiter = opts(pattern)),
    fixed = stri_locate_first_fixed(string, pattern, opts_fixed = opts(pattern)),
    coll  = stri_locate_first_coll(string, pattern, opts_collator = opts(pattern)),
    regex = stri_locate_first_regex(string, pattern, opts_regex = opts(pattern))
  )
}

#' @rdname str_locate
#' @export
str_locate_all <- function(string, pattern) {
  check_lengths(string, pattern)
  opts <- opts(pattern)

  switch(type(pattern),
    empty = ,
    bound = stri_locate_all_boundaries(string, omit_no_match = TRUE, opts_brkiter = opts),
    fixed = stri_locate_all_fixed(string, pattern, omit_no_match = TRUE, opts_fixed = opts),
    regex = stri_locate_all_regex(string, pattern, omit_no_match = TRUE, opts_regex = opts),
    coll  = stri_locate_all_coll(string, pattern, omit_no_match = TRUE, opts_collator = opts)
  )
}


#' Switch location of matches to location of non-matches
#'
#' Invert a matrix of match locations to match the opposite of what was
#' previously matched.
#'
#' @param loc matrix of match locations, as from [str_locate_all()]
#' @return numeric match giving locations of non-matches
#' @export
#' @examples
#' numbers <- "1 and 2 and 4 and 456"
#' num_loc <- str_locate_all(numbers, "[0-9]+")[[1]]
#' str_sub(numbers, num_loc[, "start"], num_loc[, "end"])
#'
#' text_loc <- invert_match(num_loc)
#' str_sub(numbers, text_loc[, "start"], text_loc[, "end"])
invert_match <- function(loc) {
  cbind(
    start = c(0L, loc[, "end"] + 1L),
    end = c(loc[, "start"] - 1L, -1L)
  )
}