File: stri_opts_collator.Rd

package info (click to toggle)
r-cran-stringi 1.7.12-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 39,772 kB
  • sloc: cpp: 482,349; ansic: 51,900; perl: 471; makefile: 9; sh: 1
file content (142 lines) | stat: -rw-r--r-- 4,894 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/opts.R
\name{stri_opts_collator}
\alias{stri_opts_collator}
\alias{stri_coll}
\title{Generate a List with Collator Settings}
\usage{
stri_opts_collator(
  locale = NULL,
  strength = 3L,
  alternate_shifted = FALSE,
  french = FALSE,
  uppercase_first = NA,
  case_level = FALSE,
  normalization = FALSE,
  normalisation = normalization,
  numeric = FALSE,
  ...
)

stri_coll(
  locale = NULL,
  strength = 3L,
  alternate_shifted = FALSE,
  french = FALSE,
  uppercase_first = NA,
  case_level = FALSE,
  normalization = FALSE,
  normalisation = normalization,
  numeric = FALSE,
  ...
)
}
\arguments{
\item{locale}{single string, \code{NULL} or
\code{''} for default locale}

\item{strength}{single integer in \{1,2,3,4\}, which defines collation strength;
\code{1} for the most permissive collation rules, \code{4} for the strictest
ones}

\item{alternate_shifted}{single logical value; \code{FALSE}
treats all the code points with non-ignorable primary weights in the same way,
\code{TRUE} causes code points with primary weights that are equal or below
the variable top value to be ignored on primary level and moved to the quaternary level}

\item{french}{single logical value; used in Canadian French;
\code{TRUE} results in secondary weights being considered backwards}

\item{uppercase_first}{single logical value; \code{NA}
orders upper and lower case letters in accordance to their tertiary weights,
\code{TRUE} forces upper case letters to sort before lower case letters,
\code{FALSE} does the opposite}

\item{case_level}{single logical value;
controls whether an extra case level (positioned before the third level) is generated or not}

\item{normalization}{single logical value; if \code{TRUE},
then incremental check is performed to see whether the input data is in
the FCD form. If the data is not in the FCD form, incremental NFD
normalization is performed}

\item{normalisation}{alias of \code{normalization}}

\item{numeric}{single logical value;
when turned on, this attribute generates a collation key for
the numeric value of substrings of digits;
this is a way to get '100' to sort AFTER '2';
note that negative numbers will not be ordered properly}

\item{...}{[DEPRECATED] any other arguments passed to this function
generate a warning; this argument will be removed in the future}
}
\value{
Returns a named list object; missing settings are left with default values.
}
\description{
A convenience function to tune the \pkg{ICU} Collator's behavior,
e.g., in \code{\link{stri_compare}}, \code{\link{stri_order}},
\code{\link{stri_unique}}, \code{\link{stri_duplicated}},
as well as \code{\link{stri_detect_coll}}
and other \link{stringi-search-coll} functions.
}
\details{
\pkg{ICU}'s \emph{collator} performs a locale-aware, natural-language
alike string comparison.
This is a more reliable way of establishing relationships between
strings than the one provided by base \R, and definitely
one that is more complex and appropriate than ordinary bytewise
comparison.
}
\examples{
stri_cmp('number100', 'number2')
stri_cmp('number100', 'number2', opts_collator=stri_opts_collator(numeric=TRUE))
stri_cmp('number100', 'number2', numeric=TRUE) # equivalent
stri_cmp('above mentioned', 'above-mentioned')
stri_cmp('above mentioned', 'above-mentioned', alternate_shifted=TRUE)
}
\references{
\emph{Collation} -- ICU User Guide,
\url{https://unicode-org.github.io/icu/userguide/collation/}

\emph{ICU Collation Service Architecture} -- ICU User Guide,
\url{https://unicode-org.github.io/icu/userguide/collation/architecture.html}

\emph{\code{icu::Collator} Class Reference} -- ICU4C API Documentation,
\url{https://unicode-org.github.io/icu-docs/apidoc/dev/icu4c/classicu_1_1Collator.html}
}
\seealso{
The official online manual of \pkg{stringi} at \url{https://stringi.gagolewski.com/}

Gagolewski M., \pkg{stringi}: Fast and portable character string processing in R, \emph{Journal of Statistical Software} 103(2), 2022, 1-59, \doi{10.18637/jss.v103.i02}

Other locale_sensitive: 
\code{\link{\%s<\%}()},
\code{\link{about_locale}},
\code{\link{about_search_boundaries}},
\code{\link{about_search_coll}},
\code{\link{stri_compare}()},
\code{\link{stri_count_boundaries}()},
\code{\link{stri_duplicated}()},
\code{\link{stri_enc_detect2}()},
\code{\link{stri_extract_all_boundaries}()},
\code{\link{stri_locate_all_boundaries}()},
\code{\link{stri_order}()},
\code{\link{stri_rank}()},
\code{\link{stri_sort_key}()},
\code{\link{stri_sort}()},
\code{\link{stri_split_boundaries}()},
\code{\link{stri_trans_tolower}()},
\code{\link{stri_unique}()},
\code{\link{stri_wrap}()}

Other search_coll: 
\code{\link{about_search_coll}},
\code{\link{about_search}}
}
\concept{locale_sensitive}
\concept{search_coll}
\author{
\href{https://www.gagolewski.com/}{Marek Gagolewski} and other contributors
}