1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148
|
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/search_locate_bound.R
\name{stri_locate_all_boundaries}
\alias{stri_locate_all_boundaries}
\alias{stri_locate_last_boundaries}
\alias{stri_locate_first_boundaries}
\alias{stri_locate_all_words}
\alias{stri_locate_last_words}
\alias{stri_locate_first_words}
\title{Locate Text Boundaries}
\usage{
stri_locate_all_boundaries(
str,
omit_no_match = FALSE,
get_length = FALSE,
...,
opts_brkiter = NULL
)
stri_locate_last_boundaries(str, get_length = FALSE, ..., opts_brkiter = NULL)
stri_locate_first_boundaries(str, get_length = FALSE, ..., opts_brkiter = NULL)
stri_locate_all_words(
str,
omit_no_match = FALSE,
locale = NULL,
get_length = FALSE
)
stri_locate_last_words(str, locale = NULL, get_length = FALSE)
stri_locate_first_words(str, locale = NULL, get_length = FALSE)
}
\arguments{
\item{str}{character vector or an object coercible to}
\item{omit_no_match}{single logical value; if \code{TRUE},
a no-match will be indicated by a matrix with 0 rows
\code{stri_locate_all_*} only}
\item{get_length}{single logical value; if \code{FALSE} (default),
generate \emph{from-to} matrices; otherwise, output
\emph{from-length} ones}
\item{...}{additional settings for \code{opts_brkiter}}
\item{opts_brkiter}{named list with \pkg{ICU} BreakIterator's settings,
see \code{\link{stri_opts_brkiter}};
\code{NULL} for default break iterator, i.e., \code{line_break}}
\item{locale}{\code{NULL} or \code{''} for text boundary analysis following
the conventions of the default locale, or a single string with
locale identifier, see \link{stringi-locale}}
}
\value{
\code{stri_locate_all_*} yields a list of \code{length(str)}
integer matrices.
\code{stri_locate_first_*} and \code{stri_locate_last_*} generate
return an integer matrix.
See \code{\link{stri_locate}} for more details.
}
\description{
These functions locate text boundaries
(like character, word, line, or sentence boundaries).
Use \code{stri_locate_all_*} to locate all the matches.
\code{stri_locate_first_*} and \code{stri_locate_last_*}
give the first or the last matches, respectively.
}
\details{
Vectorized over \code{str}.
For more information on text boundary analysis
performed by \pkg{ICU}'s \code{BreakIterator}, see
\link{stringi-search-boundaries}.
For \code{stri_locate_*_words},
just like in \code{\link{stri_extract_all_words}} and \code{\link{stri_count_words}},
\pkg{ICU}'s word \code{BreakIterator} iterator is used
to locate the word boundaries, and all non-word characters
(\code{UBRK_WORD_NONE} rule status) are ignored.
This function is equivalent to a call to
\code{stri_locate_*_boundaries(str, type='word', skip_word_none=TRUE, locale=locale)}
}
\examples{
test <- 'The\u00a0above-mentioned features are very useful. Spam, spam, eggs, bacon, and spam.'
stri_locate_all_words(test)
stri_locate_all_boundaries(
'Mr. Jones and Mrs. Brown are very happy. So am I, Prof. Smith.',
type='sentence',
locale='en_US@ss=standard' # ICU >= 56 only
)
}
\seealso{
The official online manual of \pkg{stringi} at \url{https://stringi.gagolewski.com/}
Gagolewski M., \pkg{stringi}: Fast and portable character string processing in R, \emph{Journal of Statistical Software} 103(2), 2022, 1-59, \doi{10.18637/jss.v103.i02}
Other search_locate:
\code{\link{about_search}},
\code{\link{stri_locate_all}()}
Other indexing:
\code{\link{stri_locate_all}()},
\code{\link{stri_sub_all}()},
\code{\link{stri_sub}()}
Other locale_sensitive:
\code{\link{\%s<\%}()},
\code{\link{about_locale}},
\code{\link{about_search_boundaries}},
\code{\link{about_search_coll}},
\code{\link{stri_compare}()},
\code{\link{stri_count_boundaries}()},
\code{\link{stri_duplicated}()},
\code{\link{stri_enc_detect2}()},
\code{\link{stri_extract_all_boundaries}()},
\code{\link{stri_opts_collator}()},
\code{\link{stri_order}()},
\code{\link{stri_rank}()},
\code{\link{stri_sort_key}()},
\code{\link{stri_sort}()},
\code{\link{stri_split_boundaries}()},
\code{\link{stri_trans_tolower}()},
\code{\link{stri_unique}()},
\code{\link{stri_wrap}()}
Other text_boundaries:
\code{\link{about_search_boundaries}},
\code{\link{about_search}},
\code{\link{stri_count_boundaries}()},
\code{\link{stri_extract_all_boundaries}()},
\code{\link{stri_opts_brkiter}()},
\code{\link{stri_split_boundaries}()},
\code{\link{stri_split_lines}()},
\code{\link{stri_trans_tolower}()},
\code{\link{stri_wrap}()}
}
\concept{indexing}
\concept{locale_sensitive}
\concept{search_locate}
\concept{text_boundaries}
\author{
\href{https://www.gagolewski.com/}{Marek Gagolewski} and other contributors
}
|