File: lazutf16.xml

package info (click to toggle)
lazarus 2.2.6%2Bdfsg2-2
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 219,980 kB
  • sloc: pascal: 1,944,919; xml: 357,634; makefile: 270,608; cpp: 57,115; sh: 3,249; java: 609; perl: 297; sql: 222; ansic: 137
file content (319 lines) | stat: -rw-r--r-- 9,661 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
<?xml version="1.0" encoding="UTF-8"?>
<fpdoc-descriptions>
<package name="lazutils">

<!--
  ====================================================================
    lazutf16
  ====================================================================
-->

<module name="lazutf16">
<short>Contains routines used for UTF-16 character and string operations.</short>
<descr>
<p>
<file>lazutf16.pas</file> includes string routines which are based on UTF-16
implementations, although it might also include routines for other encodings.
</p>
<p>
A UTF-16 based implementation for LowerCase, for example, is faster in
WideString and UnicodeString then the default UTF-8 implementation.
</p>
<p>
Currently this unit includes only UTF8LowerCaseViaTables which is based on
a UTF-16 table, but it might be extended to include various UTF-16 routines.
</p>
<p>
This file is part of the <file>LazUtils</file> package.
</p>
</descr>

<!-- function Visibility: default -->
<element name="UTF16CharacterLength">
<short>Gets the length of the UTF-16 character in the specified PWideChar value.</short>
<descr>
Uses the endian-ness for the platform. Returns 0, 1, or 2.
</descr>
</element>
<!-- function result Visibility: default -->
<element name="UTF16CharacterLength.Result">
<short>Length of the UTF-16 character in the value, or 0 when Nil.</short>
</element>
<!-- argument Visibility: default -->
<element name="UTF16CharacterLength.p">
<short>PWideChar value examined in the routine.</short>
</element>

<!-- function Visibility: default -->
<element name="UTF16Length">
<short>Gets the length for the specified value in UTF-16 characters.</short>
<descr/>
<seealso/>
</element>
<!-- function result Visibility: default -->
<element name="UTF16Length.Result">
<short></short>
</element>
<!-- argument Visibility: default -->
<element name="UTF16Length.s">
<short></short>
</element>
<!-- argument Visibility: default -->
<element name="UTF16Length.p">
<short></short>
</element>
<!-- argument Visibility: default -->
<element name="UTF16Length.WordCount">
<short></short>
</element>

<!-- function Visibility: default -->
<element name="UTF16Copy">
<short>
Copies a number of UTF-16 characters at the given character position in the specified value.
</short>
<descr/>
<seealso/>
</element>
<element name="UTF16Copy.Result">
<short>UnicodeString with the values copied in the routine.</short>
</element>
<element name="UTF16Copy.s">
<short>UnicodeString with the values examined in the routine.</short>
</element>
<element name="UTF16Copy.StartCharIndex">
<short>1-based staring character (code point) position in the Unicode string.</short>
</element>
<element name="UTF16Copy.CharCount">
<short>Number of characters (code points) copied in the the routine.</short>
</element>

<element name="UTF16CharStart">
<short/>
<descr/>
<seealso/>
</element>
<element name="UTF16CharStart.Result">
<short/>
</element>
<element name="UTF16CharStart.P">
<short>PWideChar value with the values examined in the routine.</short>
</element>
<element name="UTF16CharStart.Len">
<short>Len is the length in words of P.</short>
</element>
<element name="UTF16CharStart.CharIndex">
<short>CharIndex is the position of the desired UnicodeChar (starting at 0).</short>
</element>

<element name="UTF16Pos">
<short>Pos implemented for UTF-16-encoded values.</short>
<descr>
<p>
<var>UTF16Pos</var> is a <var>PtrInt</var> function used to get the character
index in SearchInText where the value in SearchForText is located. StartPos
allows the search to begin at a specific character (code point).
</p>
<p>
The return value is the 1-based UTF-16 character index where the SearchForText
starts in SearchInText, or 0 when not found.
</p>
</descr>
<seealso/>
</element>
<element name="UTF16Pos.Result">
<short>
Character index where the SearchForText starts in SearchInText, or 0 when not found.
</short>
</element>
<element name="UTF16Pos.SearchForText">
<short>UTF-16-encoded value to locate in SearchInText.</short>
</element>
<element name="UTF16Pos.SearchInText">
<short>UTF-16-encoded value searched in the routine.</short>
</element>
<element name="UTF16Pos.StartPos">
<short>Optional starting position (in UTF-16 code points, not in words).</short>
</element>

<element name="UTF16CharacterToUnicode">
<short>Converts ordinal values for UTF-16 code points in p to its Unicode equivalent.</short>
<descr>
<p>
UTF16CharacterToUnicode converts 16-bit values in p to the equivalent Unicode value.
</p>
<p>
Unpaired surrogates are invalid in any UTFs. These include any value in the range
$D800..$DBFF not followed by a value in the range $DC00..$DFFF, or any value in
the range $DC00..$DFFF not preceded by a value in the range $D800..$DBFF.
</p>
<p>
UTF16CharacterToUnicode ensures that ordinal value(s) in the reserved range(s)
are converted to the correct Unicode value. CharLen is updated to reflect whether
the values in p are a character represented by a single UTF-16 code point (1), or
requires 2 code points for the surrogate pair (2). It is set to 0 when p contains an
invalid UTF-16 code point.
</p>
<p>
The return value contains the Cardinal value for the Unicode code point, or 0 when p
contains an invalid UTF-16 code point.
</p>
</descr>
<seealso/>
</element>
<!-- function result Visibility: default -->
<element name="UTF16CharacterToUnicode.Result">
<short>Unicode code point for the values in p.</short>
</element>
<!-- argument Visibility: default -->
<element name="UTF16CharacterToUnicode.p">
<short>UTF-16 code points examined and converted in the routine.</short>
</element>
<!-- argument Visibility: default -->
<element name="UTF16CharacterToUnicode.CharLen">
<short>Number of UTF-16 code points for the converted character.</short>
</element>

<!-- function Visibility: default -->
<element name="UnicodeToUTF16">
<short>Converts a Unicode character value to its UTF-16 equivalent as a WideString value.</short>
<descr>
<p>
Cardinal values below $10000 result in a single WideChar code value for the
code point. Other cardinal values result in 2 WideChar values in the result to
represent the UTF-16 code point.
</p>
</descr>
<seealso/>
</element>
<!-- function result Visibility: default -->
<element name="UnicodeToUTF16.Result">
<short>WideString value with UTF-16 code point the Unicode character.</short>
</element>
<!-- argument Visibility: default -->
<element name="UnicodeToUTF16.u">
<short>Unicode character value converted in the routine.</short>
</element>

<element name="IsUTF16CharValid">
<short></short>
<descr>
<p>
Based on the specification defined by the Unicode consortium, at:
</p>
<p>
<url href="http://unicode.org/faq/utf_bom.html#utf16-7">
http://unicode.org/faq/utf_bom.html#utf16-7
</url>
</p>
<p>
Q: Are there any 16-bit values that are invalid?
</p>
<p>
A: Unpaired surrogates are invalid in UTFs. These include any value in the
range D800 to DBFF not followed by a value in the range DC00 to DFFF, or
any value in the range DC00 to DFFF not preceded by a value in the range
D800 to DBFF. [AF]
</p>
<p>
Use ANextChar = #0 to indicate that there is no next char.
</p>
</descr>
<seealso/>
</element>
<element name="IsUTF16CharValid.Result">
<short/>
</element>
<element name="IsUTF16CharValid.AChar">
<short/>
</element>
<element name="IsUTF16CharValid.ANextChar">
<short/>
</element>

<element name="IsUTF16StringValid">
<short>Determines if the specified WideString contains valid UTF-16 code points.</short>
<descr>
<p>
Examines the content in AWideStr for valid UTF-16 characters. Calls
IsUTF16CharValid for consecutive code point pairs.
</p>
</descr>
<seealso/>
</element>
<element name="IsUTF16StringValid.Result">
<short>True if the specified WideString contains valid UTF-16 code points.</short>
</element>
<element name="IsUTF16StringValid.AWideStr">
<short>WideString examined in the routine.</short>
</element>

<element name="Utf16StringReplace">
<short/>
<descr>
<p>
Same as <var>SysUtil.StringReplace</var> but for WideStrings and UnicodeStrings,
since it's not available in FPC yet.
</p>
</descr>
<seealso/>
</element>
<element name="Utf16StringReplace.Result">
<short/>
</element>
<element name="Utf16StringReplace.S">
<short/>
</element>
<element name="Utf16StringReplace.OldPattern">
<short/>
</element>
<element name="Utf16StringReplace.NewPattern">
<short/>
</element>
<element name="Utf16StringReplace.Flags">
<short/>
</element>
<element name="Utf16StringReplace.Count">
<short/>
</element>

<!-- function Visibility: default -->
<element name="UnicodeLowercase">
<short>Converts a Unicode character value to its lowercase equivalent.</short>
<descr>
<p>
Uses internal tables to map Unicode character ranges common to both UTF-16 and UTF-32.
</p>
</descr>
<seealso/>
</element>
<!-- function result Visibility: default -->
<element name="UnicodeLowercase.Result">
<short>Cardinal value for the lowercase equivalent of u.</short>
</element>
<!-- argument Visibility: default -->
<element name="UnicodeLowercase.u">
<short>Unicode character vale converted to lowercase in the routine.</short>
</element>

<!-- function Visibility: default -->
<element name="UTF8LowerCaseViaTables">
<short>
Converts a UTF-8-encoded string to lowercase Unicode values using internal case tables.
</short>
<descr/>
<seealso/>
</element>
<!-- function result Visibility: default -->
<element name="UTF8LowerCaseViaTables.Result">
<short>String with the lowercase Unicode values for s.</short>
</element>
<!-- argument Visibility: default -->
<element name="UTF8LowerCaseViaTables.s">
<short>String with UTF-8 values converted to lowercase Unicode in the routine.</short>
</element>

</module>
<!-- lazutf16 -->
</package>
</fpdoc-descriptions>