1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57
|
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/utf8.R
\name{utf8_normalize}
\alias{utf8_normalize}
\title{Text Normalization}
\usage{
utf8_normalize(
x,
...,
map_case = FALSE,
map_compat = FALSE,
map_quote = FALSE,
remove_ignorable = FALSE
)
}
\arguments{
\item{x}{character object.}
\item{...}{These dots are for future extensions and must be empty.}
\item{map_case}{a logical value indicating whether to apply Unicode case
mapping to the text. For most languages, this transformation changes
uppercase characters to their lowercase equivalents.}
\item{map_compat}{a logical value indicating whether to apply Unicode
compatibility mappings to the characters, those required for NFKC and NFKD
normal forms.}
\item{map_quote}{a logical value indicating whether to replace curly single
quotes and Unicode apostrophe characters with ASCII apostrophe (U+0027).}
\item{remove_ignorable}{a logical value indicating whether to remove Unicode
"default ignorable" characters like zero-width spaces and soft hyphens.}
}
\value{
The result is a character object with the same attributes as
\code{x} but with \code{Encoding} set to \code{"UTF-8"}.
}
\description{
Transform text to normalized form, optionally mapping to lowercase and
applying compatibility maps.
}
\details{
\code{utf8_normalize()} converts the elements of a character object to Unicode
normalized composed form (NFC) while applying the character maps specified
by the \code{map_case}, \code{map_compat}, \code{map_quote}, and
\code{remove_ignorable} arguments.
}
\examples{
angstrom <- c("\u00c5", "\u0041\u030a", "\u212b")
utf8_normalize(angstrom) == "\u00c5"
}
\seealso{
\code{\link[=as_utf8]{as_utf8()}}.
}
|