1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115
|
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/search_count_bound.R
\name{stri_count_boundaries}
\alias{stri_count_boundaries}
\alias{stri_count_words}
\title{Count the Number of Text Boundaries}
\usage{
stri_count_boundaries(str, ..., opts_brkiter = NULL)
stri_count_words(str, locale = NULL)
}
\arguments{
\item{str}{character vector or an object coercible to}
\item{...}{additional settings for \code{opts_brkiter}}
\item{opts_brkiter}{a named list with \pkg{ICU} BreakIterator's settings,
see \code{\link{stri_opts_brkiter}};
\code{NULL} for the default break iterator, i.e., \code{line_break}}
\item{locale}{\code{NULL} or \code{''} for text boundary analysis following
the conventions of the default locale, or a single string with
locale identifier, see \link{stringi-locale}}
}
\value{
Both functions return an integer vector.
}
\description{
These functions determine the number of text boundaries
(like character, word, line, or sentence boundaries) in a string.
}
\details{
Vectorized over \code{str}.
For more information on text boundary analysis
performed by \pkg{ICU}'s \code{BreakIterator}, see
\link{stringi-search-boundaries}.
In case of \code{stri_count_words},
just like in \code{\link{stri_extract_all_words}} and
\code{\link{stri_locate_all_words}},
\pkg{ICU}'s word \code{BreakIterator} iterator is used
to locate the word boundaries, and all non-word characters
(\code{UBRK_WORD_NONE} rule status) are ignored.
This function is equivalent to a call to
\code{\link{stri_count_boundaries}(str, type='word', skip_word_none=TRUE, locale=locale)}.
Note that a \code{BreakIterator} of type \code{character}
may be used to count the number of \emph{Unicode characters} in a string.
The \code{\link{stri_length}} function,
which aims to count the number of \emph{Unicode code points},
might report different results.
Moreover, a \code{BreakIterator} of type \code{sentence}
may be used to count the number of sentences in a text piece.
}
\examples{
test <- 'The\u00a0above-mentioned features are very useful. Spam, spam, eggs, bacon, and spam.'
stri_count_boundaries(test, type='word')
stri_count_boundaries(test, type='sentence')
stri_count_boundaries(test, type='character')
stri_count_words(test)
test2 <- stri_trans_nfkd('\u03c0\u0153\u0119\u00a9\u00df\u2190\u2193\u2192')
stri_count_boundaries(test2, type='character')
stri_length(test2)
stri_numbytes(test2)
}
\seealso{
The official online manual of \pkg{stringi} at \url{https://stringi.gagolewski.com/}
Gagolewski M., \pkg{stringi}: Fast and portable character string processing in R, \emph{Journal of Statistical Software} 103(2), 2022, 1-59, \doi{10.18637/jss.v103.i02}
Other search_count:
\code{\link{about_search}},
\code{\link{stri_count}()}
Other locale_sensitive:
\code{\link{\%s<\%}()},
\code{\link{about_locale}},
\code{\link{about_search_boundaries}},
\code{\link{about_search_coll}},
\code{\link{stri_compare}()},
\code{\link{stri_duplicated}()},
\code{\link{stri_enc_detect2}()},
\code{\link{stri_extract_all_boundaries}()},
\code{\link{stri_locate_all_boundaries}()},
\code{\link{stri_opts_collator}()},
\code{\link{stri_order}()},
\code{\link{stri_rank}()},
\code{\link{stri_sort_key}()},
\code{\link{stri_sort}()},
\code{\link{stri_split_boundaries}()},
\code{\link{stri_trans_tolower}()},
\code{\link{stri_unique}()},
\code{\link{stri_wrap}()}
Other text_boundaries:
\code{\link{about_search_boundaries}},
\code{\link{about_search}},
\code{\link{stri_extract_all_boundaries}()},
\code{\link{stri_locate_all_boundaries}()},
\code{\link{stri_opts_brkiter}()},
\code{\link{stri_split_boundaries}()},
\code{\link{stri_split_lines}()},
\code{\link{stri_trans_tolower}()},
\code{\link{stri_wrap}()}
}
\concept{locale_sensitive}
\concept{search_count}
\concept{text_boundaries}
\author{
\href{https://www.gagolewski.com/}{Marek Gagolewski} and other contributors
}
|