File: stri_trans_casemap.Rd

package info (click to toggle)
r-cran-stringi 1.7.12-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 39,772 kB
  • sloc: cpp: 482,349; ansic: 51,900; perl: 471; makefile: 9; sh: 1
file content (128 lines) | stat: -rw-r--r-- 4,486 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/trans_casemap.R
\name{stri_trans_tolower}
\alias{stri_trans_tolower}
\alias{stri_trans_toupper}
\alias{stri_trans_casefold}
\alias{stri_trans_totitle}
\title{Transform Strings with Case Mapping or Folding}
\usage{
stri_trans_tolower(str, locale = NULL)

stri_trans_toupper(str, locale = NULL)

stri_trans_casefold(str)

stri_trans_totitle(str, ..., opts_brkiter = NULL)
}
\arguments{
\item{str}{character vector}

\item{locale}{\code{NULL} or \code{''} for case mapping following
the conventions of the default locale, or a single string with
locale identifier, see \link{stringi-locale}.}

\item{...}{additional settings for \code{opts_brkiter}}

\item{opts_brkiter}{a named list with \pkg{ICU} BreakIterator's settings,
see \code{\link{stri_opts_brkiter}};
\code{NULL} for default break iterator, i.e., \code{word};
\code{stri_trans_totitle} only}
}
\value{
Each function returns a character vector.
}
\description{
These functions transform strings either to lower case,
UPPER CASE, or Title Case or perform case folding.
}
\details{
Vectorized over \code{str}.

\pkg{ICU} implements full Unicode string case mappings. It is
worth noting that, generally, case mapping:
\itemize{
\item can change the number of code points and/or code units
   of a string,
\item is language-sensitive (results may differ depending on the locale), and
\item is context-sensitive (a character in the input string may map
   differently depending on surrounding characters).
}

With \code{stri_trans_totitle}, if \code{word} \code{BreakIterator}
is used (the default), then the first letter of each word will be capitalized
and the rest will be transformed to lower case.
With the break iterator of type \code{sentence}, the first letter
of each sentence will be capitalized only.
Note that according the \pkg{ICU} User Guide,
the string \code{'one. two. three.'} consists of one sentence.

Case folding, on the other hand, is locale-independent.
Its purpose is to make two pieces of text that differ only in case identical.
This may come in handy when comparing strings.

For more general (but not locale dependent)
text transforms refer to \code{\link{stri_trans_general}}.
}
\examples{
stri_trans_toupper('\u00DF', 'de_DE') # small German Eszett / scharfes S
stri_cmp_eq(stri_trans_toupper('i', 'en_US'), stri_trans_toupper('i', 'tr_TR'))
stri_trans_toupper(c('abc', '123', '\u0105\u0104'))
stri_trans_tolower(c('AbC', '123', '\u0105\u0104'))
stri_trans_totitle(c('AbC', '123', '\u0105\u0104'))
stri_trans_casefold(c('AbC', '123', '\u0105\u0104'))
stri_trans_totitle('stringi is a FREE R pAcKaGe. WItH NO StrinGS attached.') # word boundary
stri_trans_totitle('stringi is a FREE R pAcKaGe. WItH NO StrinGS attached.', type='sentence')
}
\references{
\emph{Case Mappings} -- ICU User Guide,
\url{https://unicode-org.github.io/icu/userguide/transforms/casemappings.html}
}
\seealso{
The official online manual of \pkg{stringi} at \url{https://stringi.gagolewski.com/}

Gagolewski M., \pkg{stringi}: Fast and portable character string processing in R, \emph{Journal of Statistical Software} 103(2), 2022, 1-59, \doi{10.18637/jss.v103.i02}

Other locale_sensitive: 
\code{\link{\%s<\%}()},
\code{\link{about_locale}},
\code{\link{about_search_boundaries}},
\code{\link{about_search_coll}},
\code{\link{stri_compare}()},
\code{\link{stri_count_boundaries}()},
\code{\link{stri_duplicated}()},
\code{\link{stri_enc_detect2}()},
\code{\link{stri_extract_all_boundaries}()},
\code{\link{stri_locate_all_boundaries}()},
\code{\link{stri_opts_collator}()},
\code{\link{stri_order}()},
\code{\link{stri_rank}()},
\code{\link{stri_sort_key}()},
\code{\link{stri_sort}()},
\code{\link{stri_split_boundaries}()},
\code{\link{stri_unique}()},
\code{\link{stri_wrap}()}

Other transform: 
\code{\link{stri_trans_char}()},
\code{\link{stri_trans_general}()},
\code{\link{stri_trans_list}()},
\code{\link{stri_trans_nfc}()}

Other text_boundaries: 
\code{\link{about_search_boundaries}},
\code{\link{about_search}},
\code{\link{stri_count_boundaries}()},
\code{\link{stri_extract_all_boundaries}()},
\code{\link{stri_locate_all_boundaries}()},
\code{\link{stri_opts_brkiter}()},
\code{\link{stri_split_boundaries}()},
\code{\link{stri_split_lines}()},
\code{\link{stri_wrap}()}
}
\concept{locale_sensitive}
\concept{text_boundaries}
\concept{transform}
\author{
\href{https://www.gagolewski.com/}{Marek Gagolewski} and other contributors
}