1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147
|
/*
* Copyright 2004-2005 Timo Hirvonen
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
* 02111-1307, USA.
*/
#ifndef _UCHAR_H
#define _UCHAR_H
typedef unsigned int uchar;
extern const char hex_tab[16];
/*
* Invalid bytes are or'ed with this
* for example 0xff -> 0x100000ff
*/
#define U_INVALID_MASK 0x10000000U
/*
* @uch potential unicode character
*
* Returns 1 if @uch is valid unicode character, 0 otherwise
*/
static inline int u_is_unicode(uchar uch)
{
return uch <= 0x0010ffffU;
}
/*
* Returns size of @uch in bytes
*/
static inline int u_char_size(uchar uch)
{
if (uch <= 0x0000007fU) {
return 1;
} else if (uch <= 0x000007ffU) {
return 2;
} else if (uch <= 0x0000ffffU) {
return 3;
} else if (uch <= 0x0010ffffU) {
return 4;
} else {
return 1;
}
}
/*
* Returns width of @uch (normally 1 or 2, 4 for invalid chars (<xx>))
*/
extern int u_char_width(uchar uch);
/*
* @str any null-terminated string
*
* Returns 1 if @str is valid UTF-8 string, 0 otherwise.
*/
extern int u_is_valid(const char *str);
/*
* @str null-terminated UTF-8 string
*
* Retuns length of @str in UTF-8 characters.
*/
extern int u_strlen(const char *str);
/*
* @str null-terminated UTF-8 string
*
* Retuns width of @str.
*/
extern int u_str_width(const char *str);
/*
* @str null-terminated UTF-8 string
* @len number of characters to measure
*
* Retuns width of the first @len characters in @str.
*/
extern int u_str_nwidth(const char *str, int len);
extern void u_prev_char_pos(const char *str, int *idx);
/*
* @str null-terminated UTF-8 string
* @idx pointer to byte index in @str (not UTF-8 character index!)
* @uch pointer to returned unicode character
*/
extern void u_get_char(const char *str, int *idx, uchar *uch);
/*
* @str destination buffer
* @idx pointer to byte index in @str (not UTF-8 character index!)
* @uch unicode character
*/
extern void u_set_char_raw(char *str, int *idx, uchar uch);
extern void u_set_char(char *str, int *idx, uchar uch);
/*
* @dst destination buffer
* @src null-terminated UTF-8 string
* @width how much to copy
*
* Copies at most @count characters, less if null byte was hit.
* Null byte is _never_ copied.
* Actual width of copied characters is stored to @width.
*
* Returns number of _bytes_ copied.
*/
extern int u_copy_chars(char *dst, const char *src, int *width);
/*
* @str null-terminated UTF-8 string, must be long enough
* @width how much to skip
*
* Skips @count UTF-8 characters.
* Total width of skipped characters is stored to @width.
* Returned @width can be the given @width + 1 if the last skipped
* character was double width.
*
* Returns number of _bytes_ skipped.
*/
extern int u_skip_chars(const char *str, int *width);
extern int u_strcasecmp(const char *a, const char *b);
extern int u_strncasecmp(const char *a, const char *b, int len);
extern char *u_strcasestr(const char *haystack, const char *needle);
static inline char *u_strcasestr_filename(const char *haystack, const char *needle)
{
return u_strcasestr(haystack, needle);
}
#endif
|