File: stri_opts_brkiter.Rd

package info (click to toggle)
r-cran-stringi 1.8.4-1
links: PTS, VCS
area: main
in suites: forky, sid, trixie
size: 30,632 kB
sloc: cpp: 301,844; perl: 471; makefile: 9; sh: 1
file content (105 lines) | stat: -rw-r--r-- 3,614 bytes
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/opts.R
\name{stri_opts_brkiter}
\alias{stri_opts_brkiter}
\title{Generate a List with BreakIterator Settings}
\usage{
stri_opts_brkiter(
  type,
  locale,
  skip_word_none,
  skip_word_number,
  skip_word_letter,
  skip_word_kana,
  skip_word_ideo,
  skip_line_soft,
  skip_line_hard,
  skip_sentence_term,
  skip_sentence_sep
)
}
\arguments{
\item{type}{single string; either the break iterator type, one of \code{character},
\code{line_break}, \code{sentence}, \code{word},
or a custom set of ICU break iteration rules;
see \link{stringi-search-boundaries}}

\item{locale}{single string, \code{NULL} or \code{''} for default locale}

\item{skip_word_none}{logical; perform no action for 'words' that
do not fit into any other categories}

\item{skip_word_number}{logical; perform no action for words that
appear to be numbers}

\item{skip_word_letter}{logical; perform no action for words that
contain letters, excluding hiragana, katakana, or ideographic characters}

\item{skip_word_kana}{logical; perform no action for words
containing kana characters}

\item{skip_word_ideo}{logical; perform no action for words
containing ideographic characters}

\item{skip_line_soft}{logical; perform no action for soft line breaks,
i.e., positions where a line break is acceptable but not required}

\item{skip_line_hard}{logical; perform no action for hard,
or mandatory line breaks}

\item{skip_sentence_term}{logical; perform no action for sentences
ending with a sentence terminator ('\code{.}', '\code{,}', '\code{?}',
'\code{!}'), possibly followed by a hard separator
(\code{CR}, \code{LF}, \code{PS}, etc.)}

\item{skip_sentence_sep}{logical; perform no action for sentences
that do not contain an ending sentence terminator, but are ended
by a hard separator or end of input}
}
\value{
Returns a named list object.
Omitted \code{skip_*} values act as they have been set to \code{FALSE}.
}
\description{
A convenience function to tune the \pkg{ICU} \code{BreakIterator}'s behavior
in some text boundary analysis functions, see
\link{stringi-search-boundaries}.
}
\details{
The \code{skip_*} family of settings may be used to prevent performing
any special actions on particular types of text boundaries, e.g.,
in case of the \code{\link{stri_locate_all_boundaries}} and
\code{\link{stri_split_boundaries}} functions.

Note that custom break iterator rules (advanced users only)
should be specified as a single string.
For a detailed description of the syntax of RBBI rules, please refer
to the ICU User Guide on Boundary Analysis.
}
\references{
\emph{\code{ubrk.h} File Reference} -- ICU4C API Documentation,
\url{https://unicode-org.github.io/icu-docs/apidoc/dev/icu4c/ubrk_8h.html}

\emph{Boundary Analysis} -- ICU User Guide,
\url{https://unicode-org.github.io/icu/userguide/boundaryanalysis/}
}
\seealso{
The official online manual of \pkg{stringi} at \url{https://stringi.gagolewski.com/}

Gagolewski M., \pkg{stringi}: Fast and portable character string processing in R, \emph{Journal of Statistical Software} 103(2), 2022, 1-59, \doi{10.18637/jss.v103.i02}

Other text_boundaries: 
\code{\link{about_search_boundaries}},
\code{\link{about_search}},
\code{\link{stri_count_boundaries}()},
\code{\link{stri_extract_all_boundaries}()},
\code{\link{stri_locate_all_boundaries}()},
\code{\link{stri_split_boundaries}()},
\code{\link{stri_split_lines}()},
\code{\link{stri_trans_tolower}()},
\code{\link{stri_wrap}()}
}
\concept{text_boundaries}
\author{
\href{https://www.gagolewski.com/}{Marek Gagolewski} and other contributors
}