1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76
|
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/sort.R
\name{stri_unique}
\alias{stri_unique}
\title{Extract Unique Elements}
\usage{
stri_unique(str, ..., opts_collator = NULL)
}
\arguments{
\item{str}{a character vector}
\item{...}{additional settings for \code{opts_collator}}
\item{opts_collator}{a named list with \pkg{ICU} Collator's options,
see \code{\link{stri_opts_collator}}, \code{NULL}
for default collation options}
}
\value{
Returns a character vector.
}
\description{
This function returns a character vector like \code{str},
but with duplicate elements removed.
}
\details{
As usual in \pkg{stringi}, no attributes are copied.
Unlike \code{\link{unique}}, this function
tests for canonical equivalence of strings (and not
whether the strings are just bytewise equal). Such an operation
is locale-dependent. Hence, \code{stri_unique} is significantly
slower (but much better suited for natural language processing)
than its base R counterpart.
See also \code{\link{stri_duplicated}} for indicating non-unique elements.
}
\examples{
# normalized and non-Unicode-normalized version of the same code point:
stri_unique(c('\u0105', stri_trans_nfkd('\u0105')))
unique(c('\u0105', stri_trans_nfkd('\u0105')))
stri_unique(c('gro\u00df', 'GROSS', 'Gro\u00df', 'Gross'), strength=1)
}
\references{
\emph{Collation} - ICU User Guide,
\url{https://unicode-org.github.io/icu/userguide/collation/}
}
\seealso{
The official online manual of \pkg{stringi} at \url{https://stringi.gagolewski.com/}
Gagolewski M., \pkg{stringi}: Fast and portable character string processing in R, \emph{Journal of Statistical Software} 103(2), 2022, 1-59, \doi{10.18637/jss.v103.i02}
Other locale_sensitive:
\code{\link{\%s<\%}()},
\code{\link{about_locale}},
\code{\link{about_search_boundaries}},
\code{\link{about_search_coll}},
\code{\link{stri_compare}()},
\code{\link{stri_count_boundaries}()},
\code{\link{stri_duplicated}()},
\code{\link{stri_enc_detect2}()},
\code{\link{stri_extract_all_boundaries}()},
\code{\link{stri_locate_all_boundaries}()},
\code{\link{stri_opts_collator}()},
\code{\link{stri_order}()},
\code{\link{stri_rank}()},
\code{\link{stri_sort_key}()},
\code{\link{stri_sort}()},
\code{\link{stri_split_boundaries}()},
\code{\link{stri_trans_tolower}()},
\code{\link{stri_wrap}()}
}
\concept{locale_sensitive}
\author{
\href{https://www.gagolewski.com/}{Marek Gagolewski} and other contributors
}
|