File: stri_opts_regex.Rd

package info (click to toggle)
r-cran-stringi 1.7.12-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 39,772 kB
  • sloc: cpp: 482,349; ansic: 51,900; perl: 471; makefile: 9; sh: 1
file content (113 lines) | stat: -rw-r--r-- 4,277 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/opts.R
\name{stri_opts_regex}
\alias{stri_opts_regex}
\title{Generate a List with Regex Matcher Settings}
\usage{
stri_opts_regex(
  case_insensitive,
  comments,
  dotall,
  dot_all = dotall,
  literal,
  multiline,
  multi_line = multiline,
  unix_lines,
  uword,
  error_on_unknown_escapes,
  time_limit = 0L,
  stack_limit = 0L,
  ...
)
}
\arguments{
\item{case_insensitive}{logical; enables case insensitive matching [regex flag \code{(?i)}]}

\item{comments}{logical; allows white space and comments within patterns [regex flag \code{(?x)}]}

\item{dotall}{logical;  if set, `\code{.}` matches line terminators,
otherwise matching of `\code{.}`  stops at a line end [regex flag \code{(?s)}]}

\item{dot_all}{alias of \code{dotall}}

\item{literal}{logical; if set, treat the entire pattern as a literal string:
metacharacters or escape sequences in the input sequence will be given no special meaning;
note that in most cases you would rather use the \link{stringi-search-fixed}
facilities in this case}

\item{multiline}{logical; controls the behavior of `\code{$}` and `\code{^}`.
If set, recognize line terminators within a string, otherwise,
 match only at start and end of input string [regex flag \code{(?m)}]}

\item{multi_line}{alias of \code{multiline}}

\item{unix_lines}{logical; Unix-only line endings;
when enabled, only \code{U+000a} is recognized as a
line ending by `\code{.}`, `\code{$}`, and `\code{^}`.}

\item{uword}{logical; Unicode word boundaries;
if set, uses the Unicode TR 29 definition of word boundaries;
warning: Unicode word boundaries are quite different from traditional
regex word boundaries. [regex flag \code{(?w)}]
See \url{https://unicode.org/reports/tr29/#Word_Boundaries}}

\item{error_on_unknown_escapes}{logical;
whether to generate an error on unrecognized backslash escapes;
if set, fail with an error on patterns that contain backslash-escaped ASCII
letters without a known special meaning;
otherwise, these escaped letters represent themselves}

\item{time_limit}{integer; processing time limit, in ~milliseconds (but not precisely so,
depends on the CPU speed), for match operations;
setting a limit is desirable if poorly written regexes are expected on input;
0 for no limit}

\item{stack_limit}{integer; maximal size, in bytes, of the heap storage available
for the match backtracking stack; setting a limit is desirable if poorly
written regexes are expected on input; 0 for no limit}

\item{...}{[DEPRECATED] any other arguments passed to this function
generate a warning; this argument will be removed in the future}
}
\value{
Returns a named list object; missing settings are left with default values.
}
\description{
A convenience function to tune the \pkg{ICU} regular expressions
matcher's behavior, e.g., in \code{\link{stri_count_regex}}
and other \link{stringi-search-regex} functions.
}
\details{
Note that some regex settings may be changed using ICU regex flags
inside regexes. For example, \code{'(?i)pattern'} performs
a case-insensitive match of a given pattern,
see the \pkg{ICU} User Guide entry on Regular Expressions
in the References section or \link{stringi-search-regex}.
}
\examples{
stri_detect_regex('ala', 'ALA') # case-sensitive by default
stri_detect_regex('ala', 'ALA', opts_regex=stri_opts_regex(case_insensitive=TRUE))
stri_detect_regex('ala', 'ALA', case_insensitive=TRUE) # equivalent
stri_detect_regex('ala', '(?i)ALA') # equivalent
}
\references{
\emph{\code{enum URegexpFlag}: Constants for Regular Expression Match Modes}
-- ICU4C API Documentation,
\url{https://unicode-org.github.io/icu-docs/apidoc/dev/icu4c/uregex_8h.html}

\emph{Regular Expressions} -- ICU User Guide,
\url{https://unicode-org.github.io/icu/userguide/strings/regexp.html}
}
\seealso{
The official online manual of \pkg{stringi} at \url{https://stringi.gagolewski.com/}

Gagolewski M., \pkg{stringi}: Fast and portable character string processing in R, \emph{Journal of Statistical Software} 103(2), 2022, 1-59, \doi{10.18637/jss.v103.i02}

Other search_regex: 
\code{\link{about_search_regex}},
\code{\link{about_search}}
}
\concept{search_regex}
\author{
\href{https://www.gagolewski.com/}{Marek Gagolewski} and other contributors
}