1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163
|
/* $OpenLDAP$ */
/* This work is part of OpenLDAP Software <http://www.openldap.org/>.
*
* Copyright 1998-2024 The OpenLDAP Foundation.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted only as authorized by the OpenLDAP
* Public License.
*
* A copy of this license is available in file LICENSE in the
* top-level directory of the distribution or, alternatively, at
* <http://www.OpenLDAP.org/license.html>.
*/
/*
* ldap_pvt_uc.h - Header for Unicode functions.
* These are meant to be used by the OpenLDAP distribution only.
* These should be named ldap_pvt_....()
*/
#ifndef _LDAP_PVT_UC_H
#define _LDAP_PVT_UC_H 1
#include <lber.h> /* get ber_slen_t */
#include <ac/bytes.h>
#include "../libraries/liblunicode/ucdata/ucdata.h"
LDAP_BEGIN_DECL
/*
* UTF-8 (in utf-8.c)
*/
/* UCDATA uses UCS-2 passed in a 4 byte unsigned int */
typedef ac_uint4 ldap_unicode_t;
/* Convert a string with csize octets per character to UTF-8 */
LDAP_F( int ) ldap_ucs_to_utf8s LDAP_P((
struct berval *ucs, int csize, struct berval *utf8s ));
/* returns the number of bytes in the UTF-8 string */
LDAP_F (ber_len_t) ldap_utf8_bytes( const char * );
/* returns the number of UTF-8 characters in the string */
LDAP_F (ber_len_t) ldap_utf8_chars( const char * );
/* returns the length (in bytes) of the UTF-8 character */
LDAP_F (int) ldap_utf8_offset( const char * );
/* returns the length (in bytes) indicated by the UTF-8 character */
LDAP_F (int) ldap_utf8_charlen( const char * );
/* returns the length (in bytes) indicated by the UTF-8 character
* also checks that shortest possible encoding was used
*/
LDAP_F (int) ldap_utf8_charlen2( const char * );
/* copies a UTF-8 character and returning number of bytes copied */
LDAP_F (int) ldap_utf8_copy( char *, const char *);
/* returns pointer of next UTF-8 character in string */
LDAP_F (char*) ldap_utf8_next( const char * );
/* returns pointer of previous UTF-8 character in string */
LDAP_F (char*) ldap_utf8_prev( const char * );
/* primitive ctype routines -- not aware of non-ascii characters */
LDAP_F (int) ldap_utf8_isascii( const char * );
LDAP_F (int) ldap_utf8_isalpha( const char * );
LDAP_F (int) ldap_utf8_isalnum( const char * );
LDAP_F (int) ldap_utf8_isdigit( const char * );
LDAP_F (int) ldap_utf8_isxdigit( const char * );
LDAP_F (int) ldap_utf8_isspace( const char * );
/* span characters not in set, return bytes spanned */
LDAP_F (ber_len_t) ldap_utf8_strcspn( const char* str, const char *set);
/* span characters in set, return bytes spanned */
LDAP_F (ber_len_t) ldap_utf8_strspn( const char* str, const char *set);
/* return first occurrence of character in string */
LDAP_F (char *) ldap_utf8_strchr( const char* str, const char *chr);
/* return first character of set in string */
LDAP_F (char *) ldap_utf8_strpbrk( const char* str, const char *set);
/* reentrant tokenizer */
LDAP_F (char*) ldap_utf8_strtok( char* sp, const char* sep, char **last);
/* Optimizations */
LDAP_V (const char) ldap_utf8_lentab[128];
LDAP_V (const char) ldap_utf8_mintab[32];
#define LDAP_UTF8_ISASCII(p) ( !(*(const unsigned char *)(p) & 0x80 ) )
#define LDAP_UTF8_CHARLEN(p) ( LDAP_UTF8_ISASCII(p) \
? 1 : ldap_utf8_lentab[*(const unsigned char *)(p) ^ 0x80] )
/* This is like CHARLEN but additionally validates to make sure
* the char used the shortest possible encoding.
* 'l' is used to temporarily hold the result of CHARLEN.
*/
#define LDAP_UTF8_CHARLEN2(p, l) ( ( ( l = LDAP_UTF8_CHARLEN( p )) < 3 || \
( ldap_utf8_mintab[*(const unsigned char *)(p) & 0x1f] & (p)[1] ) ) ? \
l : 0 )
#define LDAP_UTF8_OFFSET(p) ( LDAP_UTF8_ISASCII(p) \
? 1 : ldap_utf8_offset((p)) )
#define LDAP_UTF8_COPY(d,s) ( LDAP_UTF8_ISASCII(s) \
? (*(d) = *(s), 1) : ldap_utf8_copy((d),(s)) )
#define LDAP_UTF8_NEXT(p) ( LDAP_UTF8_ISASCII(p) \
? (char *)(p)+1 : ldap_utf8_next((p)) )
#define LDAP_UTF8_INCR(p) ((p) = LDAP_UTF8_NEXT(p))
/* For symmetry */
#define LDAP_UTF8_PREV(p) (ldap_utf8_prev((p)))
#define LDAP_UTF8_DECR(p) ((p)=LDAP_UTF8_PREV((p)))
/* these probably should be renamed */
LDAP_LUNICODE_F(int) ucstrncmp(
const ldap_unicode_t *,
const ldap_unicode_t *,
ber_len_t );
LDAP_LUNICODE_F(int) ucstrncasecmp(
const ldap_unicode_t *,
const ldap_unicode_t *,
ber_len_t );
LDAP_LUNICODE_F(ldap_unicode_t *) ucstrnchr(
const ldap_unicode_t *,
ber_len_t,
ldap_unicode_t );
LDAP_LUNICODE_F(ldap_unicode_t *) ucstrncasechr(
const ldap_unicode_t *,
ber_len_t,
ldap_unicode_t );
LDAP_LUNICODE_F(void) ucstr2upper(
ldap_unicode_t *,
ber_len_t );
#define LDAP_UTF8_NOCASEFOLD 0x0U
#define LDAP_UTF8_CASEFOLD 0x1U
#define LDAP_UTF8_ARG1NFC 0x2U
#define LDAP_UTF8_ARG2NFC 0x4U
#define LDAP_UTF8_APPROX 0x8U
LDAP_LUNICODE_F(struct berval *) UTF8bvnormalize(
struct berval *,
struct berval *,
unsigned,
void *memctx );
LDAP_LUNICODE_F(int) UTF8bvnormcmp(
struct berval *,
struct berval *,
unsigned,
void *memctx );
LDAP_END_DECL
#endif
|