1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114
|
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/modifiers.R
\name{modifiers}
\alias{modifiers}
\alias{fixed}
\alias{coll}
\alias{regex}
\alias{boundary}
\title{Control matching behaviour with modifier functions}
\usage{
fixed(pattern, ignore_case = FALSE)
coll(pattern, ignore_case = FALSE, locale = "en", ...)
regex(
pattern,
ignore_case = FALSE,
multiline = FALSE,
comments = FALSE,
dotall = FALSE,
...
)
boundary(
type = c("character", "line_break", "sentence", "word"),
skip_word_none = NA,
...
)
}
\arguments{
\item{pattern}{Pattern to modify behaviour.}
\item{ignore_case}{Should case differences be ignored in the match?
For \code{fixed()}, this uses a simple algorithm which assumes a
one-to-one mapping between upper and lower case letters.}
\item{locale}{Locale to use for comparisons. See
\code{\link[stringi:stri_locale_list]{stringi::stri_locale_list()}} for all possible options.
Defaults to "en" (English) to ensure that default behaviour is
consistent across platforms.}
\item{...}{Other less frequently used arguments passed on to
\code{\link[stringi:stri_opts_collator]{stringi::stri_opts_collator()}},
\code{\link[stringi:stri_opts_regex]{stringi::stri_opts_regex()}}, or
\code{\link[stringi:stri_opts_brkiter]{stringi::stri_opts_brkiter()}}}
\item{multiline}{If \code{TRUE}, \code{$} and \code{^} match
the beginning and end of each line. If \code{FALSE}, the
default, only match the start and end of the input.}
\item{comments}{If \code{TRUE}, white space and comments beginning with
\verb{#} are ignored. Escape literal spaces with \verb{\\\\ }.}
\item{dotall}{If \code{TRUE}, \code{.} will also match line terminators.}
\item{type}{Boundary type to detect.
\describe{
\item{\code{character}}{Every character is a boundary.}
\item{\code{line_break}}{Boundaries are places where it is acceptable to have
a line break in the current locale.}
\item{\code{sentence}}{The beginnings and ends of sentences are boundaries,
using intelligent rules to avoid counting abbreviations
(\href{https://www.unicode.org/reports/tr29/#Sentence_Boundaries}{details}).}
\item{\code{word}}{The beginnings and ends of words are boundaries.}
}}
\item{skip_word_none}{Ignore "words" that don't contain any characters
or numbers - i.e. punctuation. Default \code{NA} will skip such "words"
only when splitting on \code{word} boundaries.}
}
\value{
A stringr modifier object, i.e. a character vector with
parent S3 class \code{stringr_pattern}.
}
\description{
Modifier functions control the meaning of the \code{pattern} argument to
stringr functions:
\itemize{
\item \code{boundary()}: Match boundaries between things.
\item \code{coll()}: Compare strings using standard Unicode collation rules.
\item \code{fixed()}: Compare literal bytes.
\item \code{regex()} (the default): Uses ICU regular expressions.
}
}
\examples{
pattern <- "a.b"
strings <- c("abb", "a.b")
str_detect(strings, pattern)
str_detect(strings, fixed(pattern))
str_detect(strings, coll(pattern))
# coll() is useful for locale-aware case-insensitive matching
i <- c("I", "\u0130", "i")
i
str_detect(i, fixed("i", TRUE))
str_detect(i, coll("i", TRUE))
str_detect(i, coll("i", TRUE, locale = "tr"))
# Word boundaries
words <- c("These are some words.")
str_count(words, boundary("word"))
str_split(words, " ")[[1]]
str_split(words, boundary("word"))[[1]]
# Regular expression variations
str_extract_all("The Cat in the Hat", "[a-z]+")
str_extract_all("The Cat in the Hat", regex("[a-z]+", TRUE))
str_extract_all("a\nb\nc", "^.")
str_extract_all("a\nb\nc", regex("^.", multiline = TRUE))
str_extract_all("a\nb\nc", "a.")
str_extract_all("a\nb\nc", regex("a.", dotall = TRUE))
}
|