File: stri_extract_boundaries.Rd

package info (click to toggle)
r-cran-stringi 1.8.4-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 30,632 kB
  • sloc: cpp: 301,844; perl: 471; makefile: 9; sh: 1
file content (140 lines) | stat: -rw-r--r-- 4,490 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/search_extract_bound.R
\name{stri_extract_all_boundaries}
\alias{stri_extract_all_boundaries}
\alias{stri_extract_last_boundaries}
\alias{stri_extract_first_boundaries}
\alias{stri_extract_all_words}
\alias{stri_extract_first_words}
\alias{stri_extract_last_words}
\title{Extract Data Between Text Boundaries}
\usage{
stri_extract_all_boundaries(
  str,
  simplify = FALSE,
  omit_no_match = FALSE,
  ...,
  opts_brkiter = NULL
)

stri_extract_last_boundaries(str, ..., opts_brkiter = NULL)

stri_extract_first_boundaries(str, ..., opts_brkiter = NULL)

stri_extract_all_words(
  str,
  simplify = FALSE,
  omit_no_match = FALSE,
  locale = NULL
)

stri_extract_first_words(str, locale = NULL)

stri_extract_last_words(str, locale = NULL)
}
\arguments{
\item{str}{character vector or an object coercible to}

\item{simplify}{single logical value;
if \code{TRUE} or \code{NA}, then a character matrix is returned;
otherwise (the default), a list of character vectors is given, see Value}

\item{omit_no_match}{single logical value; if \code{FALSE},
then a missing value will indicate that there are no words}

\item{...}{additional settings for \code{opts_brkiter}}

\item{opts_brkiter}{a named list with \pkg{ICU} BreakIterator's settings,
see \code{\link{stri_opts_brkiter}};
\code{NULL} for the default break iterator, i.e., \code{line_break}}

\item{locale}{\code{NULL} or \code{''} for text boundary analysis following
the conventions of the default locale, or a single string with
locale identifier, see \link{stringi-locale}}
}
\value{
For \code{stri_extract_all_*},
if \code{simplify=FALSE} (the default), then a
 list of character vectors is returned. Each string consists of
a separate word. In case of \code{omit_no_match=FALSE} and
if there are no words or if a string is missing,
a single \code{NA} is provided on output.

Otherwise, \code{\link{stri_list2matrix}} with \code{byrow=TRUE} argument
is called on the resulting object.
In such a case, a character matrix with \code{length(str)} rows
is returned. Note that \code{\link{stri_list2matrix}}'s \code{fill} argument
is set to an empty string and \code{NA},
for \code{simplify} \code{TRUE} and \code{NA}, respectively.

For \code{stri_extract_first_*} and \code{stri_extract_last_*},
a character vector is returned.
A \code{NA} element indicates a no-match.
}
\description{
These functions extract data between text boundaries.
}
\details{
Vectorized over \code{str}.

For more information on text boundary analysis
performed by \pkg{ICU}'s \code{BreakIterator}, see
\link{stringi-search-boundaries}.

In case of \code{stri_extract_*_words},
just like in \code{\link{stri_count_words}},
\pkg{ICU}'s word \code{BreakIterator} iterator is used
to locate the word boundaries, and all non-word characters
(\code{UBRK_WORD_NONE} rule status) are ignored.
}
\examples{
stri_extract_all_words('stringi: THE string processing package 123.48...')

}
\seealso{
The official online manual of \pkg{stringi} at \url{https://stringi.gagolewski.com/}

Gagolewski M., \pkg{stringi}: Fast and portable character string processing in R, \emph{Journal of Statistical Software} 103(2), 2022, 1-59, \doi{10.18637/jss.v103.i02}

Other search_extract: 
\code{\link{about_search}},
\code{\link{stri_extract_all}()},
\code{\link{stri_match_all}()}

Other locale_sensitive: 
\code{\link{\%s<\%}()},
\code{\link{about_locale}},
\code{\link{about_search_boundaries}},
\code{\link{about_search_coll}},
\code{\link{stri_compare}()},
\code{\link{stri_count_boundaries}()},
\code{\link{stri_duplicated}()},
\code{\link{stri_enc_detect2}()},
\code{\link{stri_locate_all_boundaries}()},
\code{\link{stri_opts_collator}()},
\code{\link{stri_order}()},
\code{\link{stri_rank}()},
\code{\link{stri_sort_key}()},
\code{\link{stri_sort}()},
\code{\link{stri_split_boundaries}()},
\code{\link{stri_trans_tolower}()},
\code{\link{stri_unique}()},
\code{\link{stri_wrap}()}

Other text_boundaries: 
\code{\link{about_search_boundaries}},
\code{\link{about_search}},
\code{\link{stri_count_boundaries}()},
\code{\link{stri_locate_all_boundaries}()},
\code{\link{stri_opts_brkiter}()},
\code{\link{stri_split_boundaries}()},
\code{\link{stri_split_lines}()},
\code{\link{stri_trans_tolower}()},
\code{\link{stri_wrap}()}
}
\concept{locale_sensitive}
\concept{search_extract}
\concept{text_boundaries}
\author{
\href{https://www.gagolewski.com/}{Marek Gagolewski} and other contributors
}