File: stri_match.Rd

package info (click to toggle)
r-cran-stringi 1.7.12-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 39,772 kB
  • sloc: cpp: 482,349; ansic: 51,900; perl: 471; makefile: 9; sh: 1
file content (148 lines) | stat: -rw-r--r-- 4,983 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/search_match_4.R
\name{stri_match_all}
\alias{stri_match_all}
\alias{stri_match_first}
\alias{stri_match_last}
\alias{stri_match}
\alias{stri_match_all_regex}
\alias{stri_match_first_regex}
\alias{stri_match_last_regex}
\title{Extract Regex Pattern Matches, Together with Capture Groups}
\usage{
stri_match_all(str, ..., regex)

stri_match_first(str, ..., regex)

stri_match_last(str, ..., regex)

stri_match(str, ..., regex, mode = c("first", "all", "last"))

stri_match_all_regex(
  str,
  pattern,
  omit_no_match = FALSE,
  cg_missing = NA_character_,
  ...,
  opts_regex = NULL
)

stri_match_first_regex(
  str,
  pattern,
  cg_missing = NA_character_,
  ...,
  opts_regex = NULL
)

stri_match_last_regex(
  str,
  pattern,
  cg_missing = NA_character_,
  ...,
  opts_regex = NULL
)
}
\arguments{
\item{str}{character vector; strings to search in}

\item{...}{supplementary arguments passed to the underlying functions,
including additional settings for \code{opts_regex}}

\item{mode}{single string;
one of: \code{'first'} (the default), \code{'all'}, \code{'last'}}

\item{pattern, regex}{character vector;
search patterns; for more details refer to \link{stringi-search}}

\item{omit_no_match}{single logical value; if \code{FALSE},
then a row with missing values will indicate that there was no match;
\code{stri_match_all_*} only}

\item{cg_missing}{single string to be used if a capture group match
is unavailable}

\item{opts_regex}{a named list with \pkg{ICU} Regex settings,
see \code{\link{stri_opts_regex}}; \code{NULL}
for default settings}
}
\value{
For \code{stri_match_all*},
a list of character matrices is returned. Each list element
represents the results of a different search scenario.

For \code{stri_match_first*} and \code{stri_match_last*}
a character matrix is returned.
Each row corresponds to a different search result.

The first matrix column gives the whole match. The second one corresponds to
the first capture group, the third -- the second capture group, and so on.

If regular expressions feature a named capture group,
the matrix columns will be named accordingly.
However, for \code{stri_match_first*} and \code{stri_match_last*}
this will only be the case if there is a single pattern.
}
\description{
These functions extract substrings in \code{str} that
match a given regex \code{pattern}. Additionally, they extract matches
to every \emph{capture group}, i.e., to all the sub-patterns given
in round parentheses.
}
\details{
Vectorized over \code{str} and \code{pattern} (with recycling
of the elements in the shorter vector if necessary). This allows to,
for instance, search for one pattern in each given string,
search for each pattern in one given string,
and search for the i-th pattern within the i-th string.

If no pattern match is detected and \code{omit_no_match=FALSE},
then \code{NA}s are included in the resulting matrix (matrices), see Examples.

\code{stri_match}, \code{stri_match_all}, \code{stri_match_first},
and \code{stri_match_last} are convenience functions.
They merely call \code{stri_match_*_regex} and are
provided for consistency with other string searching functions' wrappers,
see, among others, \code{\link{stri_extract}}.
}
\examples{
stri_match_all_regex('breakfast=eggs, lunch=pizza, dessert=icecream',
   '(\\\\w+)=(\\\\w+)')
stri_match_all_regex(c('breakfast=eggs', 'lunch=pizza', 'no food here'),
   '(\\\\w+)=(\\\\w+)')
stri_match_all_regex(c('breakfast=eggs;lunch=pizza',
   'breakfast=bacon;lunch=spaghetti', 'no food here'),
   '(\\\\w+)=(\\\\w+)')
stri_match_all_regex(c('breakfast=eggs;lunch=pizza',
   'breakfast=bacon;lunch=spaghetti', 'no food here'),
   '(?<when>\\\\w+)=(?<what>\\\\w+)')  # named capture groups
stri_match_first_regex(c('breakfast=eggs;lunch=pizza',
   'breakfast=bacon;lunch=spaghetti', 'no food here'),
   '(\\\\w+)=(\\\\w+)')
stri_match_last_regex(c('breakfast=eggs;lunch=pizza',
   'breakfast=bacon;lunch=spaghetti', 'no food here'),
   '(\\\\w+)=(\\\\w+)')

stri_match_first_regex(c('abcd', ':abcd', ':abcd:'), '^(:)?([^:]*)(:)?$')
stri_match_first_regex(c('abcd', ':abcd', ':abcd:'), '^(:)?([^:]*)(:)?$', cg_missing='')

# Match all the pattern of the form XYX, including overlapping matches:
stri_match_all_regex('ACAGAGACTTTAGATAGAGAAGA', '(?=(([ACGT])[ACGT]\\\\2))')[[1]][,2]
# Compare the above to:
stri_extract_all_regex('ACAGAGACTTTAGATAGAGAAGA', '([ACGT])[ACGT]\\\\1')

}
\seealso{
The official online manual of \pkg{stringi} at \url{https://stringi.gagolewski.com/}

Gagolewski M., \pkg{stringi}: Fast and portable character string processing in R, \emph{Journal of Statistical Software} 103(2), 2022, 1-59, \doi{10.18637/jss.v103.i02}

Other search_extract: 
\code{\link{about_search}},
\code{\link{stri_extract_all_boundaries}()},
\code{\link{stri_extract_all}()}
}
\concept{search_extract}
\author{
\href{https://www.gagolewski.com/}{Marek Gagolewski} and other contributors
}