1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205
|
/****************************************************************
* *
* Copyright (c) 2006-2018 Fidelity National Information *
* Services, Inc. and/or its subsidiaries. All rights reserved. *
* *
* This source code contains the intellectual property *
* of its copyright holder(s), and is made available *
* under a license. If you do not know the terms of *
* the license, please stop and do not read further. *
* *
****************************************************************/
/*
* -----------------------------------------------
* op_fnp1 Piece function (the piecemaker) for UTF
* Special case of 1 char delimiter and 1 piece (reference)
*
* Arguments:
* src - pointer to Source mval
* del - delimiter char to use looking for a piece
* trgpcidx - index of piece to extract from source string
* dst - pointer to Destination mval to save the piece in
*
* Return:
* none
*
* Side effects:
* dst structure gets filled with the result
* -----------------------------------------------
*/
#include "mdef.h"
#include "gtm_string.h"
#include "fnpc.h"
#include "gtm_stdio.h"
#include "min_max.h"
#include "op.h"
#include "gtm_utf8.h"
GBLREF boolean_t gtm_utf8_mode; /* We are indeed doing the UTF8 thang */
GBLREF boolean_t badchar_inhibit; /* No BADCHAR errors should be signaled */
void op_fnp1(mval *src, int delim, int trgpcidx, mval *dst)
{
unsigned char *first, *last, *start, *end;
unsigned int *pcoff, *pcoffmax, fnpc_indx, slen;
int trgpc, cpcidx, spcidx, mblen, dlmlen;
boolean_t valid_char;
mval ldst; /* Local copy since &dst == &src .. move to dst at return */
fnpc *cfnpc;
delimfmt ldelim;
DCL_THREADGBL_ACCESS;
SETUP_THREADGBL_ACCESS;
assert(gtm_utf8_mode);
MV_FORCE_STR(src);
ldelim.unichar_val = delim;
if (!UTF8_VALID(ldelim.unibytes_val, ldelim.unibytes_val + SIZEOF(ldelim.unibytes_val), dlmlen) &&
!badchar_inhibit)
{ /* The delimiter is a bad character so error out if badchar not inhibited */
UTF8_BADCHAR(0, ldelim.unibytes_val, ldelim.unibytes_val + SIZEOF(ldelim.unibytes_val), 0, NULL);
}
ldst.mvtype = MV_STR;
start = first = last = (unsigned char *)src->str.addr;
slen = src->str.len;
end = start + slen;
/* Detect annoyance cases and deal with quickly so we don't muck up the
* logic below trying to handle it properly.
*/
if (0 >= trgpcidx || 0 == slen)
{
ldst.str.addr = (char *)start;
ldst.str.len = 0;
*dst = ldst;
return;
}
/* Test mval for valid cache: index ok, mval addr same, delim same. One additional test
* is if the cache entry is byte_oriented, then this cache entry was created by $ZPIECE
* (using bytes) and since its results are not same as $PIECE(), we must ignore the cache
* and rebuild it for this mval.
*/
fnpc_indx = src->fnpc_indx - 1;
cfnpc = &(TREF(fnpca)).fnpcs[fnpc_indx];
if (FNPC_MAX > fnpc_indx && cfnpc->last_str.addr == (char *)first &&
cfnpc->last_str.len == slen && cfnpc->delim == ldelim.unichar_val &&
!cfnpc->byte_oriented) /* cannot use the cache created by an earlier $ZPIECE() */
{
/* Have valid cache. See if piece we want already in cache */
COUNT_EVENT(hit);
INCR_COUNT(pskip, cfnpc->npcs);
pcoffmax = &cfnpc->pstart[FNPC_ELEM_MAX]; /* Local end of array value */
if (trgpcidx <= cfnpc->npcs)
{
/* Piece is totally in cache no scan needed */
ldst.str.addr = (char *)first + cfnpc->pstart[trgpcidx - 1];
ldst.str.len = cfnpc->pstart[trgpcidx] - cfnpc->pstart[trgpcidx - 1] - dlmlen;
assert(ldst.str.len >= 0 && ldst.str.len <= src->str.len);
*dst = ldst;
return;
} else
{
/* Not in cache but pick up scan where we left off */
cpcidx = cfnpc->npcs;
first = last = start + cfnpc->pstart[cpcidx]; /* First byte of next pc */
pcoff = &cfnpc->pstart[cpcidx];
if (pcoff == pcoffmax)
++pcoff; /* No further updates to pstart array */
++cpcidx; /* Now past last piece and on to next one */
COUNT_EVENT(parscan);
}
} else
{
/* The piece cache index or mval validation was incorrect.
* Start from the beginning
*/
COUNT_EVENT(miss);
/* Need to steal a new piece cache, get "least recently reused" */
cfnpc = (TREF(fnpca)).fnpcsteal; /* Get next element to steal */
if ((TREF(fnpca)).fnpcmax < cfnpc)
cfnpc = &(TREF(fnpca)).fnpcs[0];
(TREF(fnpca)).fnpcsteal = cfnpc + 1; /* -> next element to steal */
cfnpc->last_str = src->str; /* Save validation info */
cfnpc->delim = ldelim.unichar_val;
cfnpc->npcs = 0;
cfnpc->byte_oriented = FALSE;
src->fnpc_indx = cfnpc->indx + 1; /* Save where we are putting this element
* (1 based index in mval so 0 isn't so common)
*/
pcoff = &cfnpc->pstart[0];
pcoffmax = &cfnpc->pstart[FNPC_ELEM_MAX]; /* Local end of array value */
cpcidx = 1; /* current piece index */
}
/* Do scan filling in offsets of pieces if they fit in the cache */
spcidx = cpcidx; /* Starting value for search */
while ((cpcidx <= trgpcidx) && (last < end))
{
/* Once through for each piece we pass, last time through to find length of piece we want */
first = last; /* First char of current piece */
while (last < end)
{
valid_char = UTF8_VALID(last, end, mblen); /* Length of next char */
if (!valid_char)
{ /* Next character is not valid UTF8. If badchar error is not inhibited,
* signal it now. If it is inhibited, just treat the character as a single
* character and continue.
*/
if (!badchar_inhibit)
utf8_badchar(0, last, end, 0, NULL);
assert(1 == mblen);
}
/* Getting mblen first allows us to do quick length compare before the
* heavier weight memcmp call.
*/
assert(0 < mblen);
if (mblen == dlmlen)
{
if (1 == dlmlen)
{
if (*last == ldelim.unibytes_val[0]) /* Shortcut - test single byte */
break;
} else if (0 == memcmp(last, ldelim.unibytes_val, dlmlen)) /* Longcut - for multibyte chk */
break;
}
last += mblen; /* Find delim signaling end of piece */
}
last += dlmlen; /* Bump past delim to first byte of next piece. The length of
* the delimiter is assumed in the pcoff array and is removed
* when piece length is calculated so even if we hit the end of
* the scanned source, we bump the pointer so this extra length
* is reflected in the pcoff array consistently.
*/
++cpcidx; /* Next piece */
if (pcoff < pcoffmax)
*pcoff++ =(unsigned int)(first - start); /* Offset to this piece */
if (pcoff == pcoffmax)
*pcoff++ = (unsigned int)(last - start); /* Store start of first piece beyond what is in cache */
}
ldst.str.addr = (char *)first;
/* If we scanned some chars, adjust end pointer and save end of final piece */
if (spcidx != cpcidx)
{
if (pcoff < pcoffmax)
*pcoff = (unsigned int)(last - start); /* If not at end of cache, save start of "next" piece */
last -= dlmlen; /* Undo bump past last delim (existing or not)
* of piece for accurate string len
*/
/* Update count of pieces in cache */
cfnpc->npcs = MIN((cfnpc->npcs + cpcidx - spcidx), FNPC_ELEM_MAX);
assert(cfnpc->npcs <= FNPC_ELEM_MAX);
assert(cfnpc->npcs > 0);
/* If we the above loop ended prematurely because we ran out of text, we return null string */
if (trgpcidx < cpcidx)
ldst.str.len = INTCAST(last - first); /* Length of piece we located */
else
ldst.str.len = 0;
INCR_COUNT(pscan, cpcidx - spcidx); /* Pieces scanned */
} else
ldst.str.len = 0;
assert(0 < cfnpc->npcs);
assert((0 <= ldst.str.len) && (ldst.str.len <= src->str.len));
*dst = ldst;
return;
}
|