1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142
|
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/opts.R
\name{stri_opts_collator}
\alias{stri_opts_collator}
\alias{stri_coll}
\title{Generate a List with Collator Settings}
\usage{
stri_opts_collator(
locale = NULL,
strength = 3L,
alternate_shifted = FALSE,
french = FALSE,
uppercase_first = NA,
case_level = FALSE,
normalization = FALSE,
normalisation = normalization,
numeric = FALSE,
...
)
stri_coll(
locale = NULL,
strength = 3L,
alternate_shifted = FALSE,
french = FALSE,
uppercase_first = NA,
case_level = FALSE,
normalization = FALSE,
normalisation = normalization,
numeric = FALSE,
...
)
}
\arguments{
\item{locale}{single string, \code{NULL} or
\code{''} for default locale}
\item{strength}{single integer in \{1,2,3,4\}, which defines collation strength;
\code{1} for the most permissive collation rules, \code{4} for the strictest
ones}
\item{alternate_shifted}{single logical value; \code{FALSE}
treats all the code points with non-ignorable primary weights in the same way,
\code{TRUE} causes code points with primary weights that are equal or below
the variable top value to be ignored on primary level and moved to the quaternary level}
\item{french}{single logical value; used in Canadian French;
\code{TRUE} results in secondary weights being considered backwards}
\item{uppercase_first}{single logical value; \code{NA}
orders upper and lower case letters in accordance to their tertiary weights,
\code{TRUE} forces upper case letters to sort before lower case letters,
\code{FALSE} does the opposite}
\item{case_level}{single logical value;
controls whether an extra case level (positioned before the third level) is generated or not}
\item{normalization}{single logical value; if \code{TRUE},
then incremental check is performed to see whether the input data is in
the FCD form. If the data is not in the FCD form, incremental NFD
normalization is performed}
\item{normalisation}{alias of \code{normalization}}
\item{numeric}{single logical value;
when turned on, this attribute generates a collation key for
the numeric value of substrings of digits;
this is a way to get '100' to sort AFTER '2';
note that negative numbers will not be ordered properly}
\item{...}{[DEPRECATED] any other arguments passed to this function
generate a warning; this argument will be removed in the future}
}
\value{
Returns a named list object; missing settings are left with default values.
}
\description{
A convenience function to tune the \pkg{ICU} Collator's behavior,
e.g., in \code{\link{stri_compare}}, \code{\link{stri_order}},
\code{\link{stri_unique}}, \code{\link{stri_duplicated}},
as well as \code{\link{stri_detect_coll}}
and other \link{stringi-search-coll} functions.
}
\details{
\pkg{ICU}'s \emph{collator} performs a locale-aware, natural-language
alike string comparison.
This is a more reliable way of establishing relationships between
strings than the one provided by base \R, and definitely
one that is more complex and appropriate than ordinary bytewise
comparison.
}
\examples{
stri_cmp('number100', 'number2')
stri_cmp('number100', 'number2', opts_collator=stri_opts_collator(numeric=TRUE))
stri_cmp('number100', 'number2', numeric=TRUE) # equivalent
stri_cmp('above mentioned', 'above-mentioned')
stri_cmp('above mentioned', 'above-mentioned', alternate_shifted=TRUE)
}
\references{
\emph{Collation} -- ICU User Guide,
\url{https://unicode-org.github.io/icu/userguide/collation/}
\emph{ICU Collation Service Architecture} -- ICU User Guide,
\url{https://unicode-org.github.io/icu/userguide/collation/architecture.html}
\emph{\code{icu::Collator} Class Reference} -- ICU4C API Documentation,
\url{https://unicode-org.github.io/icu-docs/apidoc/dev/icu4c/classicu_1_1Collator.html}
}
\seealso{
The official online manual of \pkg{stringi} at \url{https://stringi.gagolewski.com/}
Gagolewski M., \pkg{stringi}: Fast and portable character string processing in R, \emph{Journal of Statistical Software} 103(2), 2022, 1-59, \doi{10.18637/jss.v103.i02}
Other locale_sensitive:
\code{\link{\%s<\%}()},
\code{\link{about_locale}},
\code{\link{about_search_boundaries}},
\code{\link{about_search_coll}},
\code{\link{stri_compare}()},
\code{\link{stri_count_boundaries}()},
\code{\link{stri_duplicated}()},
\code{\link{stri_enc_detect2}()},
\code{\link{stri_extract_all_boundaries}()},
\code{\link{stri_locate_all_boundaries}()},
\code{\link{stri_order}()},
\code{\link{stri_rank}()},
\code{\link{stri_sort_key}()},
\code{\link{stri_sort}()},
\code{\link{stri_split_boundaries}()},
\code{\link{stri_trans_tolower}()},
\code{\link{stri_unique}()},
\code{\link{stri_wrap}()}
Other search_coll:
\code{\link{about_search_coll}},
\code{\link{about_search}}
}
\concept{locale_sensitive}
\concept{search_coll}
\author{
\href{https://www.gagolewski.com/}{Marek Gagolewski} and other contributors
}
|