1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175
|
/*
tre_regcomp.c - TRE POSIX compatible regex compilation functions.
This software is released under a BSD-style license.
See the file LICENSE for details and copyright.
*/
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif /* HAVE_CONFIG_H */
#include <string.h>
#include <errno.h>
#include <stdlib.h>
#include "tre-internal.h"
#include "xmalloc.h"
int
tre_regncomp(regex_t *preg, const char *regex, size_t n, int cflags)
{
int ret;
#if TRE_WCHAR
tre_char_t *wregex;
size_t wlen;
wregex = xmalloc(sizeof(tre_char_t) * (n + 1));
if (wregex == NULL)
return REG_ESPACE;
/* If the current locale uses the standard single byte encoding of
characters, we don't do a multibyte string conversion. If we did,
many applications which use the default locale would break since
the default "C" locale uses the 7-bit ASCII character set, and
all characters with the eighth bit set would be considered invalid. */
#if TRE_MULTIBYTE
if (TRE_MB_CUR_MAX == 1)
#endif /* TRE_MULTIBYTE */
{
unsigned int i;
const unsigned char *str = (const unsigned char *)regex;
tre_char_t *wstr = wregex;
for (i = 0; i < n; i++)
*(wstr++) = *(str++);
wlen = n;
}
#if TRE_MULTIBYTE
else
{
int consumed;
tre_char_t *wcptr = wregex;
#ifdef HAVE_MBSTATE_T
mbstate_t state;
memset(&state, '\0', sizeof(state));
#endif /* HAVE_MBSTATE_T */
while (n > 0)
{
consumed = tre_mbrtowc(wcptr, regex, n, &state);
switch (consumed)
{
case 0:
if (*regex == '\0')
consumed = 1;
else
{
xfree(wregex);
return REG_BADPAT;
}
break;
case -1:
DPRINT(("mbrtowc: error %d: %s.\n", errno, strerror(errno)));
xfree(wregex);
return REG_BADPAT;
case -2:
/* The last character wasn't complete. Let's not call it a
fatal error. */
consumed = n;
break;
}
regex += consumed;
n -= consumed;
wcptr++;
}
wlen = wcptr - wregex;
}
#endif /* TRE_MULTIBYTE */
wregex[wlen] = L'\0';
ret = tre_compile(preg, wregex, wlen, cflags);
xfree(wregex);
#else /* !TRE_WCHAR */
ret = tre_compile(preg, (const tre_char_t *)regex, n, cflags);
#endif /* !TRE_WCHAR */
return ret;
}
/* this version takes bytes literally, to be used with raw vectors */
int
tre_regncompb(regex_t *preg, const char *regex, size_t n, int cflags)
{
int ret;
#if TRE_WCHAR /* wide chars = we need to convert it all to the wide format */
tre_char_t *wregex;
size_t i;
wregex = xmalloc(sizeof(tre_char_t) * n);
if (wregex == NULL)
return REG_ESPACE;
for (i = 0; i < n; i++)
wregex[i] = (tre_char_t) ((unsigned char) regex[i]);
ret = tre_compile(preg, wregex, n, cflags | REG_USEBYTES);
xfree(wregex);
#else /* !TRE_WCHAR */
ret = tre_compile(preg, (const tre_char_t *)regex, n, cflags | REG_USEBYTES);
#endif /* !TRE_WCHAR */
return ret;
}
int
tre_regcomp(regex_t *preg, const char *regex, int cflags)
{
return tre_regncomp(preg, regex, regex ? strlen(regex) : 0, cflags);
}
int
tre_regcompb(regex_t *preg, const char *regex, int cflags)
{
int ret;
tre_char_t *wregex;
size_t wlen, n = strlen(regex);
unsigned int i;
const unsigned char *str = (const unsigned char *)regex;
tre_char_t *wstr;
wregex = xmalloc(sizeof(tre_char_t) * (n + 1));
if (wregex == NULL) return REG_ESPACE;
wstr = wregex;
for (i = 0; i < n; i++) *(wstr++) = *(str++);
wlen = n;
wregex[wlen] = L'\0';
ret = tre_compile(preg, wregex, wlen, cflags | REG_USEBYTES);
xfree(wregex);
return ret;
}
#ifdef TRE_WCHAR
int
tre_regwncomp(regex_t *preg, const wchar_t *regex, size_t n, int cflags)
{
return tre_compile(preg, regex, n, cflags);
}
int
tre_regwcomp(regex_t *preg, const wchar_t *regex, int cflags)
{
return tre_compile(preg, regex, regex ? wcslen(regex) : 0, cflags);
}
#endif /* TRE_WCHAR */
void
tre_regfree(regex_t *preg)
{
tre_free(preg);
}
/* EOF */
|