File: about_stringi.Rd

package info (click to toggle)
r-cran-stringi 1.8.4-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 30,632 kB
  • sloc: cpp: 301,844; perl: 471; makefile: 9; sh: 1
file content (158 lines) | stat: -rw-r--r-- 6,103 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/stringi-package.R
\docType{package}
\name{about_stringi}
\alias{about_stringi}
\alias{stringi}
\alias{stringi-package}
\title{Fast and Portable Character String Processing in R}
\description{
\pkg{stringi} is THE R package for fast, correct, consistent,
and convenient string/text manipulation.
It gives predictable results on every platform, in each locale,
and under any native character encoding.

\bold{Keywords}: R, text processing, character strings,
internationalization, localization, ICU, ICU4C, i18n, l10n, Unicode.

\bold{Homepage}: \url{https://stringi.gagolewski.com/}

\bold{License}: The BSD-3-clause license for the package code,
the ICU license for the accompanying ICU4C distribution,
and the UCD license for the Unicode Character Database.
See the COPYRIGHTS and LICENSE file for more details.
}
\details{
Manual pages on general topics:
\itemize{
\item \link{about_encoding} -- character encoding issues, including
   information on encoding management in \pkg{stringi}, as well as
   on encoding detection and conversion.

\item \link{about_locale} -- locale issues, including locale
   management and specification in \pkg{stringi}, and the list of
   locale-sensitive operations. In particular, see
   \code{\link{stri_opts_collator}} for a description of the string
   collation algorithm, which is used for string comparing, ordering,
   ranking, sorting, case-folding, and searching.

\item \link{about_arguments} -- information on how \pkg{stringi}
   handles the arguments passed to its function.
}
}
\section{Facilities available}{


Refer to the following:
\itemize{
\item \link{about_search} for string searching facilities;
these include pattern searching, matching, string splitting, and so on.
The following independent search engines are provided:
\itemize{
\item \link{about_search_regex} -- with ICU (Java-like) regular expressions,
\item \link{about_search_fixed} -- fast, locale-independent, byte-wise pattern
   matching,
\item \link{about_search_coll} -- locale-aware pattern matching
   for natural language processing tasks,
\item \link{about_search_charclass} -- seeking elements of
   particular character classes, like ``all whites-paces'' or ``all digits'',
\item \link{about_search_boundaries} -- text boundary analysis.
}

\item \code{\link{stri_datetime_format}} for date/time formatting
and parsing. Also refer to the links therein for other date/time/time zone-
related operations.

\item \code{\link{stri_stats_general}} and \code{\link{stri_stats_latex}}
for gathering some fancy statistics on a character vector's contents.

\item \code{\link{stri_join}}, \code{\link{stri_dup}}, \code{\link{\%s+\%}},
and \code{\link{stri_flatten}} for concatenation-based operations.

\item \code{\link{stri_sub}} for extracting and replacing substrings,
and \code{\link{stri_reverse}} for a joyful function
to reverse all code points in a string.

\item \code{\link{stri_length}} (among others) for determining the number
of code points in a string. See also \code{\link{stri_count_boundaries}}
for counting the number of Unicode characters
and \code{\link{stri_width}} for approximating the width of a string.

\item \code{\link{stri_trim}} (among others) for
trimming characters from the beginning or/and end of a string,
see also \link{about_search_charclass}, and \code{\link{stri_pad}}
for padding strings so that they are of the same width.
Additionally, \code{\link{stri_wrap}} wraps text into lines.

\item \code{\link{stri_trans_tolower}} (among others) for case mapping,
i.e., conversion to lower, UPPER, or Title Case,
\code{\link{stri_trans_nfc}} (among others) for Unicode normalization,
\code{\link{stri_trans_char}} for translating individual code points,
and \code{\link{stri_trans_general}} for other universal
text transforms, including transliteration.

\item \code{\link{stri_cmp}}, \code{\link{\%s<\%}}, \code{\link{stri_order}},
\code{\link{stri_sort}}, \code{\link{stri_rank}}, \code{\link{stri_unique}},
and \code{\link{stri_duplicated}} for collation-based,
locale-aware operations, see also \link{about_locale}.

\item \code{\link{stri_split_lines}} (among others)
to split a string into text lines.

\item \code{\link{stri_escape_unicode}} (among others) for escaping
some code points.

\item \code{\link{stri_rand_strings}}, \code{\link{stri_rand_shuffle}},
and \code{\link{stri_rand_lipsum}} for generating (pseudo)random strings.

\item \code{\link{stri_read_raw}},
\code{\link{stri_read_lines}}, and  \code{\link{stri_write_lines}}
for reading and writing text files.
}

Note that each man page provides many further links to other
interesting facilities and topics.
}

\references{
\emph{\pkg{stringi} Package Homepage},
\url{https://stringi.gagolewski.com/}

Gagolewski M., \pkg{stringi}: Fast and portable character string
processing in R, \emph{Journal of Statistical Software} 103(2), 2022, 1-59,
\doi{10.18637/jss.v103.i02}

\emph{ICU -- International Components for Unicode},
\url{https://icu.unicode.org/}

\emph{ICU4C API Documentation},
\url{https://unicode-org.github.io/icu-docs/apidoc/dev/icu4c/}

\emph{The Unicode Consortium},
\url{https://home.unicode.org/}

\emph{UTF-8, A Transformation Format of ISO 10646} -- RFC 3629,
\url{https://www.rfc-editor.org/rfc/rfc3629}
}
\seealso{
The official online manual of \pkg{stringi} at \url{https://stringi.gagolewski.com/}

Gagolewski M., \pkg{stringi}: Fast and portable character string processing in R, \emph{Journal of Statistical Software} 103(2), 2022, 1-59, \doi{10.18637/jss.v103.i02}

Other stringi_general_topics: 
\code{\link{about_arguments}},
\code{\link{about_encoding}},
\code{\link{about_locale}},
\code{\link{about_search_boundaries}},
\code{\link{about_search_charclass}},
\code{\link{about_search_coll}},
\code{\link{about_search_fixed}},
\code{\link{about_search_regex}},
\code{\link{about_search}}
}
\author{
Marek Gagolewski,
with contributions from Bartek Tartanus and many others.
ICU4C was developed by IBM, Unicode, Inc., and others.
}
\concept{stringi_general_topics}