1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140
|
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/search_extract_bound.R
\name{stri_extract_all_boundaries}
\alias{stri_extract_all_boundaries}
\alias{stri_extract_last_boundaries}
\alias{stri_extract_first_boundaries}
\alias{stri_extract_all_words}
\alias{stri_extract_first_words}
\alias{stri_extract_last_words}
\title{Extract Data Between Text Boundaries}
\usage{
stri_extract_all_boundaries(
str,
simplify = FALSE,
omit_no_match = FALSE,
...,
opts_brkiter = NULL
)
stri_extract_last_boundaries(str, ..., opts_brkiter = NULL)
stri_extract_first_boundaries(str, ..., opts_brkiter = NULL)
stri_extract_all_words(
str,
simplify = FALSE,
omit_no_match = FALSE,
locale = NULL
)
stri_extract_first_words(str, locale = NULL)
stri_extract_last_words(str, locale = NULL)
}
\arguments{
\item{str}{character vector or an object coercible to}
\item{simplify}{single logical value;
if \code{TRUE} or \code{NA}, then a character matrix is returned;
otherwise (the default), a list of character vectors is given, see Value}
\item{omit_no_match}{single logical value; if \code{FALSE},
then a missing value will indicate that there are no words}
\item{...}{additional settings for \code{opts_brkiter}}
\item{opts_brkiter}{a named list with \pkg{ICU} BreakIterator's settings,
see \code{\link{stri_opts_brkiter}};
\code{NULL} for the default break iterator, i.e., \code{line_break}}
\item{locale}{\code{NULL} or \code{''} for text boundary analysis following
the conventions of the default locale, or a single string with
locale identifier, see \link{stringi-locale}}
}
\value{
For \code{stri_extract_all_*},
if \code{simplify=FALSE} (the default), then a
list of character vectors is returned. Each string consists of
a separate word. In case of \code{omit_no_match=FALSE} and
if there are no words or if a string is missing,
a single \code{NA} is provided on output.
Otherwise, \code{\link{stri_list2matrix}} with \code{byrow=TRUE} argument
is called on the resulting object.
In such a case, a character matrix with \code{length(str)} rows
is returned. Note that \code{\link{stri_list2matrix}}'s \code{fill} argument
is set to an empty string and \code{NA},
for \code{simplify} \code{TRUE} and \code{NA}, respectively.
For \code{stri_extract_first_*} and \code{stri_extract_last_*},
a character vector is returned.
A \code{NA} element indicates a no-match.
}
\description{
These functions extract data between text boundaries.
}
\details{
Vectorized over \code{str}.
For more information on text boundary analysis
performed by \pkg{ICU}'s \code{BreakIterator}, see
\link{stringi-search-boundaries}.
In case of \code{stri_extract_*_words},
just like in \code{\link{stri_count_words}},
\pkg{ICU}'s word \code{BreakIterator} iterator is used
to locate the word boundaries, and all non-word characters
(\code{UBRK_WORD_NONE} rule status) are ignored.
}
\examples{
stri_extract_all_words('stringi: THE string processing package 123.48...')
}
\seealso{
The official online manual of \pkg{stringi} at \url{https://stringi.gagolewski.com/}
Gagolewski M., \pkg{stringi}: Fast and portable character string processing in R, \emph{Journal of Statistical Software} 103(2), 2022, 1-59, \doi{10.18637/jss.v103.i02}
Other search_extract:
\code{\link{about_search}},
\code{\link{stri_extract_all}()},
\code{\link{stri_match_all}()}
Other locale_sensitive:
\code{\link{\%s<\%}()},
\code{\link{about_locale}},
\code{\link{about_search_boundaries}},
\code{\link{about_search_coll}},
\code{\link{stri_compare}()},
\code{\link{stri_count_boundaries}()},
\code{\link{stri_duplicated}()},
\code{\link{stri_enc_detect2}()},
\code{\link{stri_locate_all_boundaries}()},
\code{\link{stri_opts_collator}()},
\code{\link{stri_order}()},
\code{\link{stri_rank}()},
\code{\link{stri_sort_key}()},
\code{\link{stri_sort}()},
\code{\link{stri_split_boundaries}()},
\code{\link{stri_trans_tolower}()},
\code{\link{stri_unique}()},
\code{\link{stri_wrap}()}
Other text_boundaries:
\code{\link{about_search_boundaries}},
\code{\link{about_search}},
\code{\link{stri_count_boundaries}()},
\code{\link{stri_locate_all_boundaries}()},
\code{\link{stri_opts_brkiter}()},
\code{\link{stri_split_boundaries}()},
\code{\link{stri_split_lines}()},
\code{\link{stri_trans_tolower}()},
\code{\link{stri_wrap}()}
}
\concept{locale_sensitive}
\concept{search_extract}
\concept{text_boundaries}
\author{
\href{https://www.gagolewski.com/}{Marek Gagolewski} and other contributors
}
|