1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99
|
/****************************************************************
* *
* Copyright (c) 2001-2018 Fidelity National Information *
* Services, Inc. and/or its subsidiaries. All rights reserved. *
* *
* This source code contains the intellectual property *
* of its copyright holder(s), and is made available *
* under a license. If you do not know the terms of *
* the license, please stop and do not read further. *
* *
****************************************************************/
#include "mdef.h"
#include "mvalconv.h"
#include "op.h"
#ifdef UTF8_SUPPORTED
#include "gtm_utf8.h"
GBLREF boolean_t badchar_inhibit;
GBLREF boolean_t gtm_utf8_mode;
/* Returns the UTF code point of a character in a string at a given index */
void op_fnascii(int4 num, mval* in, mval* out)
{
int bytelen;
unsigned int code;
char *in_ptr, *in_top;
DCL_THREADGBL_ACCESS;
SETUP_THREADGBL_ACCESS;
/* We should never check badchar_inhibit if we are in compile_time and have badchar's;
ie, we should never get this far with badchar's during compile time */
assert(!TREF(compile_time) || valid_utf_string(&in->str));
num--; /* make it a 0-index based */
if (!gtm_utf8_mode || MV_IS_SINGLEBYTE(in))
{ /* Fast path evaluation for strings with no multi-byte characters */
assert(MV_IS_STRING(in)); /* MV_UTF_LEN must subsume MV_STR */
if ((num < in->str.len) && (num >= 0))
{
if ((code = *(unsigned char *)(in->str.addr + num)) > ASCII_MAX)
{ /* Isolated bytes in the range [0x80,0xFF] must be considered illegal */
if (!gtm_utf8_mode || badchar_inhibit)
code = (unsigned int)-1;
else
UTF8_BADCHAR(1, in->str.addr + num, NULL, 0, NULL);
}
} else
code = (unsigned int)-1;
} else
{ /* Generic processing for strings with multi-byte characters */
MV_FORCE_STR(in);
in_ptr = in->str.addr;
in_top = in->str.addr + in->str.len;
if (!badchar_inhibit)
{
for (; (0 < num && in_ptr < in_top); --num)
{
if (!UTF8_VALID(in_ptr, in_top, bytelen))
UTF8_BADCHAR(0, in_ptr, in_top, 0, NULL);
in_ptr += bytelen;
}
} else
{
for (; (0 < num && in_ptr < in_top); --num)
in_ptr = (char *)UTF8_MBNEXT(in_ptr, in_top); /* Note: code for invalid character ?? */
}
if (0 == num && in_ptr < in_top)
{
UTF8_MBTOWC(in_ptr, in_top, code);
if (WEOF == code)
{
if (badchar_inhibit)
code = (unsigned int)-1;
else
UTF8_BADCHAR(0, in_ptr, in_top, 0, NULL);
}
} else
code = (unsigned int)-1; /* required character position exceeds the character length */
}
MV_FORCE_MVAL(out, (int)code);
}
#endif /* UTF8_SUPPORTED */
void op_fnzascii(int4 num, mval* in, mval* out)
{
int k ;
MV_FORCE_STR(in);
num--;
if ( num >= in->str.len || num < 0 )
{
k = -1 ;
} else
k = *(unsigned char *)(in->str.addr + num) ;
MV_FORCE_MVAL(out,k) ;
}
|