File: extract.r

package info (click to toggle)
r-cran-stringr 0.6.2-2
  • links: PTS, VCS
  • area: main
  • in suites: jessie, jessie-kfreebsd
  • size: 308 kB
  • sloc: makefile: 3
file content (49 lines) | stat: -rw-r--r-- 1,706 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
#' Extract first piece of a string that matches a pattern.
#'
#' Vectorised over \code{string}.  \code{pattern} should be a single pattern,
#' i.e. a character vector of length one.
#'
#' @inheritParams str_detect
#' @return character vector.
#' @keywords character
#' @seealso \code{\link{str_extract_all}} to extract all matches
#' @export
#' @examples
#' shopping_list <- c("apples x4", "flour", "sugar", "milk x2")
#' str_extract(shopping_list, "\\d")
#' str_extract(shopping_list, "[a-z]+")
#' str_extract(shopping_list, "[a-z]{1,4}")
#' str_extract(shopping_list, "\\b[a-z]{1,4}\\b")
str_extract <- function(string, pattern) {
  string <- check_string(string)
  pattern <- check_pattern(pattern, string)

  positions <- str_locate(string, pattern)
  str_sub(string, positions[, "start"], positions[, "end"])
}

#' Extract all pieces of a string that match a pattern.
#'
#' Vectorised over \code{string}.  \code{pattern} should be a single pattern,
#' i.e. a character vector of length one.
#'
#' @inheritParams str_detect
#' @return list of character vectors.
#' @keywords character
#' @seealso \code{\link{str_extract}} to extract the first match
#' @export
#' @examples
#' shopping_list <- c("apples x4", "bag of flour", "bag of sugar", "milk x2")
#' str_extract_all(shopping_list, "[a-z]+")
#' str_extract_all(shopping_list, "\\b[a-z]+\\b")
#' str_extract_all(shopping_list, "\\d")
str_extract_all <- function(string, pattern) {
  string <- check_string(string)
  pattern <- check_pattern(pattern, string)

  positions <- str_locate_all(string, pattern)
  lapply(seq_along(string), function(i) {
    position <- positions[[i]]
    str_sub(string[i], position[, "start"], position[, "end"])
  })
}