1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168
|
/*
* Copyright (c) 2020 Proofpoint, Inc. and its suppliers.
* All rights reserved.
*
* By using this file, you agree to the terms and conditions set
* forth in the LICENSE file which can be found at the top level of
* the sendmail distribution.
*
*/
#include <sm/gen.h>
#include <sm/sendmail.h>
#include <ctype.h>
#include <sm/string.h>
#include <sm/heap.h>
#if USE_EAI
# include <sm/ixlen.h>
# include <unicode/ucasemap.h>
# include <unicode/ustring.h>
# include <unicode/uchar.h>
/*
** ASCIISTR -- check whether a string is printable ASCII
**
** Parameters:
** str -- string
**
** Returns:
** TRUE iff printable ASCII
*/
bool
asciistr(str)
const char *str;
{
unsigned char ch;
if (str == NULL)
return true;
SM_REQUIRE(len < INT_MAX);
n = 0;
while (n < len && (ch = (unsigned char)*str) != '\0'
&& ch >= 32 && ch < 127)
{
n++;
str++;
return ch == '\0';
}
#endif /* USE_EAI */
/*
** MAKELOWER -- Translate a line into lower case
**
** Parameters:
** p -- string to translate (modified in place if possible). [A]
**
** Returns:
** lower cased string
**
** Side Effects:
** String p is translated to lower case if possible.
*/
char *
makelower(p)
char *p;
{
char c;
char *orig;
if (p == NULL)
return p;
orig = p;
#if USE_EAI
if (!asciistr(p))
return (char *)sm_lowercase(p);
#endif
for (; (c = *p) != '\0'; p++)
if (isascii(c) && isupper(c))
*p = tolower(c);
return orig;
}
#if USE_EAI
/*
** SM_LOWERCASE -- lower case a UTF-8 string
** Note: this should ONLY be applied to a UTF-8 string,
** i.e., the caller should check first if it isn't an ASCII string.
**
** Parameters:
** str -- original string
**
** Returns:
** lower case version of string [S]
**
** How to return an error description due to failed unicode calls?
** However, is that even relevant?
*/
char *
sm_lowercase(str)
const char *str;
{
int olen, ilen;
UErrorCode error;
ssize_t req;
int n;
static UCaseMap *csm = NULL;
static char *out = NULL;
static int outlen = 0;
# if SM_CHECK_REQUIRE
if (sm_debug_active(&SmExpensiveRequire, 3))
SM_REQUIRE(!asciistr(str));
# endif
/* an empty string is always ASCII */
SM_REQUIRE(NULL != str && '\0' != *str);
if (NULL == csm)
{
error = U_ZERO_ERROR;
csm = ucasemap_open("en_US", U_FOLD_CASE_DEFAULT, &error);
if (U_SUCCESS(error) == 0)
{
/* syserr("ucasemap_open error: %s", u_errorName(error)); */
return NULL;
}
}
ilen = strlen(str);
olen = ilen + 1;
if (olen > outlen)
{
outlen = olen;
out = sm_realloc_x(out, outlen);
}
for (n = 0; n < 3; n++)
{
error = U_ZERO_ERROR;
req = ucasemap_utf8FoldCase(csm, out, olen, str, ilen, &error);
if (U_SUCCESS(error))
{
if (req >= olen)
{
outlen = req + 1;
out = sm_realloc_x(out, outlen);
out[req] = '\0';
}
break;
}
else if (error == U_BUFFER_OVERFLOW_ERROR)
{
outlen = req + 1;
out = sm_realloc_x(out, outlen);
olen = outlen;
}
else
{
/* syserr("conversion error for \"%s\": %s", str, u_errorName(error)); */
return NULL;
}
}
return out;
}
#endif /* USE_EAI */
|