File: modifiers.Rd

package info (click to toggle)
r-cran-stringr 1.5.1-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 1,032 kB
  • sloc: javascript: 11; sh: 9; makefile: 2
file content (114 lines) | stat: -rw-r--r-- 3,708 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/modifiers.R
\name{modifiers}
\alias{modifiers}
\alias{fixed}
\alias{coll}
\alias{regex}
\alias{boundary}
\title{Control matching behaviour with modifier functions}
\usage{
fixed(pattern, ignore_case = FALSE)

coll(pattern, ignore_case = FALSE, locale = "en", ...)

regex(
  pattern,
  ignore_case = FALSE,
  multiline = FALSE,
  comments = FALSE,
  dotall = FALSE,
  ...
)

boundary(
  type = c("character", "line_break", "sentence", "word"),
  skip_word_none = NA,
  ...
)
}
\arguments{
\item{pattern}{Pattern to modify behaviour.}

\item{ignore_case}{Should case differences be ignored in the match?
For \code{fixed()}, this uses a simple algorithm which assumes a
one-to-one mapping between upper and lower case letters.}

\item{locale}{Locale to use for comparisons. See
\code{\link[stringi:stri_locale_list]{stringi::stri_locale_list()}} for all possible options.
Defaults to "en" (English) to ensure that default behaviour is
consistent across platforms.}

\item{...}{Other less frequently used arguments passed on to
\code{\link[stringi:stri_opts_collator]{stringi::stri_opts_collator()}},
\code{\link[stringi:stri_opts_regex]{stringi::stri_opts_regex()}}, or
\code{\link[stringi:stri_opts_brkiter]{stringi::stri_opts_brkiter()}}}

\item{multiline}{If \code{TRUE}, \code{$} and \code{^} match
the beginning and end of each line. If \code{FALSE}, the
default, only match the start and end of the input.}

\item{comments}{If \code{TRUE}, white space and comments beginning with
\verb{#} are ignored. Escape literal spaces with \verb{\\\\ }.}

\item{dotall}{If \code{TRUE}, \code{.} will also match line terminators.}

\item{type}{Boundary type to detect.
\describe{
\item{\code{character}}{Every character is a boundary.}
\item{\code{line_break}}{Boundaries are places where it is acceptable to have
a line break in the current locale.}
\item{\code{sentence}}{The beginnings and ends of sentences are boundaries,
using intelligent rules to avoid counting abbreviations
(\href{https://www.unicode.org/reports/tr29/#Sentence_Boundaries}{details}).}
\item{\code{word}}{The beginnings and ends of words are boundaries.}
}}

\item{skip_word_none}{Ignore "words" that don't contain any characters
or numbers - i.e. punctuation. Default \code{NA} will skip such "words"
only when splitting on \code{word} boundaries.}
}
\value{
A stringr modifier object, i.e. a character vector with
parent S3 class \code{stringr_pattern}.
}
\description{
Modifier functions control the meaning of the \code{pattern} argument to
stringr functions:
\itemize{
\item \code{boundary()}: Match boundaries between things.
\item \code{coll()}: Compare strings using standard Unicode collation rules.
\item \code{fixed()}: Compare literal bytes.
\item \code{regex()} (the default): Uses ICU regular expressions.
}
}
\examples{
pattern <- "a.b"
strings <- c("abb", "a.b")
str_detect(strings, pattern)
str_detect(strings, fixed(pattern))
str_detect(strings, coll(pattern))

# coll() is useful for locale-aware case-insensitive matching
i <- c("I", "\u0130", "i")
i
str_detect(i, fixed("i", TRUE))
str_detect(i, coll("i", TRUE))
str_detect(i, coll("i", TRUE, locale = "tr"))

# Word boundaries
words <- c("These are   some words.")
str_count(words, boundary("word"))
str_split(words, " ")[[1]]
str_split(words, boundary("word"))[[1]]

# Regular expression variations
str_extract_all("The Cat in the Hat", "[a-z]+")
str_extract_all("The Cat in the Hat", regex("[a-z]+", TRUE))

str_extract_all("a\nb\nc", "^.")
str_extract_all("a\nb\nc", regex("^.", multiline = TRUE))

str_extract_all("a\nb\nc", "a.")
str_extract_all("a\nb\nc", regex("a.", dotall = TRUE))
}