1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345
|
/* SPDX-License-Identifier: GPL-2.0+ */
/*
* charset conversion utils
*
* Copyright (c) 2017 Rob Clark
*/
#ifndef __CHARSET_H_
#define __CHARSET_H_
#include <linux/kernel.h>
#include <linux/types.h>
#define MAX_UTF8_PER_UTF16 3
/*
* codepage_437 - Unicode to codepage 437 translation table
*/
extern const u16 codepage_437[160];
/**
* console_read_unicode() - read Unicode code point from console
*
* @code: pointer to store Unicode code point
* Return: 0 = success
*/
int console_read_unicode(s32 *code);
/**
* utf8_get() - get next UTF-8 code point from buffer
*
* @src: pointer to current byte, updated to point to next byte
* Return: code point, or 0 for end of string, or -1 if no legal
* code point is found. In case of an error src points to
* the incorrect byte.
*/
s32 utf8_get(const char **src);
/**
* utf8_put() - write UTF-8 code point to buffer
*
* @code: code point
* @dst: pointer to destination buffer, updated to next position
* Return: -1 if the input parameters are invalid
*/
int utf8_put(s32 code, char **dst);
/**
* utf8_utf16_strnlen() - length of a truncated utf-8 string after conversion
* to utf-16
*
* @src: utf-8 string
* @count: maximum number of code points to convert
* Return: length in u16 after conversion to utf-16 without the
* trailing \0. If an invalid UTF-8 sequence is hit one
* u16 will be reserved for a replacement character.
*/
size_t utf8_utf16_strnlen(const char *src, size_t count);
/**
* utf8_utf16_strlen() - length of a utf-8 string after conversion to utf-16
*
* @a: utf-8 string
* Return: length in u16 after conversion to utf-16 without the
* trailing \0. If an invalid UTF-8 sequence is hit one
* u16 will be reserved for a replacement character.
*/
#define utf8_utf16_strlen(a) utf8_utf16_strnlen((a), SIZE_MAX)
/**
* utf8_utf16_strncpy() - copy utf-8 string to utf-16 string
*
* @dst: destination buffer
* @src: source buffer
* @count: maximum number of code points to copy
* Return: -1 if the input parameters are invalid
*/
int utf8_utf16_strncpy(u16 **dst, const char *src, size_t count);
/**
* utf8_utf16_strcpy() - copy utf-8 string to utf-16 string
*
* @d: destination buffer
* @s: source buffer
* Return: -1 if the input parameters are invalid
*/
#define utf8_utf16_strcpy(d, s) utf8_utf16_strncpy((d), (s), SIZE_MAX)
/**
* utf16_get() - get next UTF-16 code point from buffer
*
* @src: pointer to current word, updated to point to next word
* Return: code point, or 0 for end of string, or -1 if no legal
* code point is found. In case of an error src points to
* the incorrect word.
*/
s32 utf16_get(const u16 **src);
/**
* utf16_put() - write UTF-16 code point to buffer
*
* @code: code point
* @dst: pointer to destination buffer, updated to next position
* Return: -1 if the input parameters are invalid
*/
int utf16_put(s32 code, u16 **dst);
/**
* utf16_strnlen() - length of a truncated utf-16 string
*
* @src: utf-16 string
* @count: maximum number of code points to convert
* Return: length in code points. If an invalid UTF-16 sequence is
* hit one position will be reserved for a replacement
* character.
*/
size_t utf16_strnlen(const u16 *src, size_t count);
/**
* utf16_utf8_strnlen() - length of a truncated utf-16 string after conversion
* to utf-8
*
* @src: utf-16 string
* @count: maximum number of code points to convert
* Return: length in bytes after conversion to utf-8 without the
* trailing \0. If an invalid UTF-16 sequence is hit one
* byte will be reserved for a replacement character.
*/
size_t utf16_utf8_strnlen(const u16 *src, size_t count);
/**
* utf16_utf8_strlen() - length of a utf-16 string after conversion to utf-8
*
* @a: utf-16 string
* Return: length in bytes after conversion to utf-8 without the
* trailing \0. If an invalid UTF-16 sequence is hit one
* byte will be reserved for a replacement character.
*/
#define utf16_utf8_strlen(a) utf16_utf8_strnlen((a), SIZE_MAX)
/**
* utf16_utf8_strncpy() - copy utf-16 string to utf-8 string
*
* @dst: destination buffer
* @src: source buffer
* @count: maximum number of code points to copy
* Return: -1 if the input parameters are invalid
*/
int utf16_utf8_strncpy(char **dst, const u16 *src, size_t count);
/**
* utf16_utf8_strcpy() - copy utf-16 string to utf-8 string
*
* @d: destination buffer
* @s: source buffer
* Return: -1 if the input parameters are invalid
*/
#define utf16_utf8_strcpy(d, s) utf16_utf8_strncpy((d), (s), SIZE_MAX)
/**
* utf_to_lower() - convert a Unicode letter to lower case
*
* @code: letter to convert
* Return: lower case letter or unchanged letter
*/
s32 utf_to_lower(const s32 code);
/**
* utf_to_upper() - convert a Unicode letter to upper case
*
* @code: letter to convert
* Return: upper case letter or unchanged letter
*/
s32 utf_to_upper(const s32 code);
/**
* u16_strcasecmp() - compare two u16 strings case insensitively
*
* @s1: first string to compare
* @s2: second string to compare
* Return: 0 if the first n u16 are the same in s1 and s2
* < 0 if the first different u16 in s1 is less than the
* corresponding u16 in s2
* > 0 if the first different u16 in s1 is greater than the
*/
int u16_strcasecmp(const u16 *s1, const u16 *s2);
/**
* u16_strncmp() - compare two u16 string
*
* @s1: first string to compare
* @s2: second string to compare
* @n: maximum number of u16 to compare
* Return: 0 if the first n u16 are the same in s1 and s2
* < 0 if the first different u16 in s1 is less than the
* corresponding u16 in s2
* > 0 if the first different u16 in s1 is greater than the
* corresponding u16 in s2
*/
int u16_strncmp(const u16 *s1, const u16 *s2, size_t n);
/**
* u16_strcmp() - compare two u16 string
*
* @s1: first string to compare
* @s2: second string to compare
* Return: 0 if the first n u16 are the same in s1 and s2
* < 0 if the first different u16 in s1 is less than the
* corresponding u16 in s2
* > 0 if the first different u16 in s1 is greater than the
* corresponding u16 in s2
*/
#define u16_strcmp(s1, s2) u16_strncmp((s1), (s2), SIZE_MAX)
/**
* u16_strsize() - count size of u16 string in bytes including the null
* character
*
* Counts the number of bytes occupied by a u16 string
*
* @in: null terminated u16 string
* Return: bytes in a u16 string
*/
size_t u16_strsize(const void *in);
/**
* u16_strnlen() - count non-zero words
*
* This function matches wscnlen_s() if the -fshort-wchar compiler flag is set.
* In the EFI context we explicitly need a function handling u16 strings.
*
* @in: null terminated u16 string
* @count: maximum number of words to count
* Return: number of non-zero words.
* This is not the number of utf-16 letters!
*/
size_t u16_strnlen(const u16 *in, size_t count);
/**
* u16_strlen - count non-zero words
*
* This function matches wsclen() if the -fshort-wchar compiler flag is set.
* In the EFI context we explicitly need a function handling u16 strings.
*
* @in: null terminated u16 string
* Return: number of non-zero words.
* This is not the number of utf-16 letters!
*/
size_t u16_strlen(const void *in);
#define u16_strlen(in) u16_strnlen(in, SIZE_MAX)
/**
* u16_strcpy() - copy u16 string
*
* Copy u16 string pointed to by src, including terminating null word, to
* the buffer pointed to by dest.
*
* @dest: destination buffer
* @src: source buffer (null terminated)
* Return: 'dest' address
*/
u16 *u16_strcpy(u16 *dest, const u16 *src);
/**
* u16_strdup() - duplicate u16 string
*
* Copy u16 string pointed to by src, including terminating null word, to a
* newly allocated buffer.
*
* @src: source buffer (null terminated)
* Return: allocated new buffer on success, NULL on failure
*/
u16 *u16_strdup(const void *src);
/**
* u16_strlcat() - Append a length-limited, %NUL-terminated string to another
*
* Append the source string @src to the destination string @dest, overwriting
* null word at the end of @dest adding a terminating null word.
*
* @dest: zero terminated u16 destination string
* @src: zero terminated u16 source string
* @count: size of buffer in u16 words including taling 0x0000
* Return: required size including trailing 0x0000 in u16 words
* If return value >= count, truncation occurred.
*/
size_t u16_strlcat(u16 *dest, const u16 *src, size_t count);
/**
* utf16_to_utf8() - Convert an utf16 string to utf8
*
* Converts 'size' characters of the utf16 string 'src' to utf8
* written to the 'dest' buffer.
*
* NOTE that a single utf16 character can generate up to 3 utf8
* characters. See MAX_UTF8_PER_UTF16.
*
* @dest: the destination buffer to write the utf8 characters
* @src: the source utf16 string
* @size: the number of utf16 characters to convert
* Return: the pointer to the first unwritten byte in 'dest'
*/
uint8_t *utf16_to_utf8(uint8_t *dest, const uint16_t *src, size_t size);
/**
* utf_to_cp() - translate Unicode code point to 8bit codepage
*
* Codepoints that do not exist in the codepage are rendered as question mark.
*
* @c: pointer to Unicode code point to be translated
* @codepage: Unicode to codepage translation table
* Return: 0 on success, -ENOENT if codepoint cannot be translated
*/
int utf_to_cp(s32 *c, const u16 *codepage);
/**
* utf8_to_cp437_stream() - convert UTF-8 stream to codepage 437
*
* @c: next UTF-8 character to convert
* @buffer: buffer, at least 5 characters
* Return: next codepage 437 character or 0
*/
int utf8_to_cp437_stream(u8 c, char *buffer);
/**
* utf8_to_utf32_stream() - convert UTF-8 byte stream to Unicode code points
*
* The function is called for each byte @c in a UTF-8 stream. The byte is
* appended to the temporary storage @buffer until the UTF-8 stream in
* @buffer describes a Unicode code point.
*
* When a new code point has been decoded it is returned and buffer[0] is
* set to '\0', otherwise the return value is 0.
*
* The buffer must be at least 5 characters long. Before the first function
* invocation buffer[0] must be set to '\0'."
*
* @c: next UTF-8 character to convert
* @buffer: buffer, at least 5 characters
* Return: Unicode code point or 0
*/
int utf8_to_utf32_stream(u8 c, char *buffer);
#endif /* __CHARSET_H_ */
|