1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105
|
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/opts.R
\name{stri_opts_brkiter}
\alias{stri_opts_brkiter}
\title{Generate a List with BreakIterator Settings}
\usage{
stri_opts_brkiter(
type,
locale,
skip_word_none,
skip_word_number,
skip_word_letter,
skip_word_kana,
skip_word_ideo,
skip_line_soft,
skip_line_hard,
skip_sentence_term,
skip_sentence_sep
)
}
\arguments{
\item{type}{single string; either the break iterator type, one of \code{character},
\code{line_break}, \code{sentence}, \code{word},
or a custom set of ICU break iteration rules;
see \link{stringi-search-boundaries}}
\item{locale}{single string, \code{NULL} or \code{''} for default locale}
\item{skip_word_none}{logical; perform no action for 'words' that
do not fit into any other categories}
\item{skip_word_number}{logical; perform no action for words that
appear to be numbers}
\item{skip_word_letter}{logical; perform no action for words that
contain letters, excluding hiragana, katakana, or ideographic characters}
\item{skip_word_kana}{logical; perform no action for words
containing kana characters}
\item{skip_word_ideo}{logical; perform no action for words
containing ideographic characters}
\item{skip_line_soft}{logical; perform no action for soft line breaks,
i.e., positions where a line break is acceptable but not required}
\item{skip_line_hard}{logical; perform no action for hard,
or mandatory line breaks}
\item{skip_sentence_term}{logical; perform no action for sentences
ending with a sentence terminator ('\code{.}', '\code{,}', '\code{?}',
'\code{!}'), possibly followed by a hard separator
(\code{CR}, \code{LF}, \code{PS}, etc.)}
\item{skip_sentence_sep}{logical; perform no action for sentences
that do not contain an ending sentence terminator, but are ended
by a hard separator or end of input}
}
\value{
Returns a named list object.
Omitted \code{skip_*} values act as they have been set to \code{FALSE}.
}
\description{
A convenience function to tune the \pkg{ICU} \code{BreakIterator}'s behavior
in some text boundary analysis functions, see
\link{stringi-search-boundaries}.
}
\details{
The \code{skip_*} family of settings may be used to prevent performing
any special actions on particular types of text boundaries, e.g.,
in case of the \code{\link{stri_locate_all_boundaries}} and
\code{\link{stri_split_boundaries}} functions.
Note that custom break iterator rules (advanced users only)
should be specified as a single string.
For a detailed description of the syntax of RBBI rules, please refer
to the ICU User Guide on Boundary Analysis.
}
\references{
\emph{\code{ubrk.h} File Reference} -- ICU4C API Documentation,
\url{https://unicode-org.github.io/icu-docs/apidoc/dev/icu4c/ubrk_8h.html}
\emph{Boundary Analysis} -- ICU User Guide,
\url{https://unicode-org.github.io/icu/userguide/boundaryanalysis/}
}
\seealso{
The official online manual of \pkg{stringi} at \url{https://stringi.gagolewski.com/}
Gagolewski M., \pkg{stringi}: Fast and portable character string processing in R, \emph{Journal of Statistical Software} 103(2), 2022, 1-59, \doi{10.18637/jss.v103.i02}
Other text_boundaries:
\code{\link{about_search_boundaries}},
\code{\link{about_search}},
\code{\link{stri_count_boundaries}()},
\code{\link{stri_extract_all_boundaries}()},
\code{\link{stri_locate_all_boundaries}()},
\code{\link{stri_split_boundaries}()},
\code{\link{stri_split_lines}()},
\code{\link{stri_trans_tolower}()},
\code{\link{stri_wrap}()}
}
\concept{text_boundaries}
\author{
\href{https://www.gagolewski.com/}{Marek Gagolewski} and other contributors
}
|