File: op_fnp1.c

package info (click to toggle)
fis-gtm 6.3-014-3
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 36,680 kB
  • sloc: ansic: 333,039; asm: 5,180; csh: 4,956; sh: 1,924; awk: 291; makefile: 66; sed: 13
file content (205 lines) | stat: -rwxr-xr-x 7,624 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
/****************************************************************
 *								*
 * Copyright (c) 2006-2018 Fidelity National Information	*
 * Services, Inc. and/or its subsidiaries. All rights reserved.	*
 *								*
 *	This source code contains the intellectual property	*
 *	of its copyright holder(s), and is made available	*
 *	under a license.  If you do not know the terms of	*
 *	the license, please stop and do not read further.	*
 *								*
 ****************************************************************/

/*
 * -----------------------------------------------
 * op_fnp1 Piece function (the piecemaker) for UTF
 * Special case of 1 char delimiter and 1 piece (reference)
 *
 * Arguments:
 *	src		- pointer to Source mval
 *	del		- delimiter char to use looking for a piece
 *	trgpcidx	- index of piece to extract from source string
 *	dst		- pointer to Destination mval to save the piece in
 *
 * Return:
 *	none
 *
 * Side effects:
 *	dst structure gets filled with the result
 * -----------------------------------------------
 */

#include "mdef.h"

#include "gtm_string.h"

#include "fnpc.h"
#include "gtm_stdio.h"
#include "min_max.h"
#include "op.h"
#include "gtm_utf8.h"

GBLREF boolean_t	gtm_utf8_mode;		/* We are indeed doing the UTF8 thang */
GBLREF boolean_t	badchar_inhibit;	/* No BADCHAR errors should be signaled */

void op_fnp1(mval *src, int delim, int trgpcidx,  mval *dst)
{
	unsigned char	*first, *last, *start, *end;
	unsigned int	*pcoff, *pcoffmax, fnpc_indx, slen;
	int		trgpc, cpcidx, spcidx, mblen, dlmlen;
	boolean_t       valid_char;
	mval		ldst;		/* Local copy since &dst == &src .. move to dst at return */
	fnpc   		*cfnpc;
	delimfmt	ldelim;
	DCL_THREADGBL_ACCESS;

	SETUP_THREADGBL_ACCESS;
	assert(gtm_utf8_mode);
	MV_FORCE_STR(src);
	ldelim.unichar_val = delim;
	if (!UTF8_VALID(ldelim.unibytes_val, ldelim.unibytes_val + SIZEOF(ldelim.unibytes_val), dlmlen) &&
			!badchar_inhibit)
	{ /* The delimiter is a bad character so error out if badchar not inhibited */
		UTF8_BADCHAR(0, ldelim.unibytes_val, ldelim.unibytes_val + SIZEOF(ldelim.unibytes_val), 0, NULL);
	}
	ldst.mvtype = MV_STR;
	start = first = last = (unsigned char *)src->str.addr;
	slen = src->str.len;
	end = start + slen;
	/* Detect annoyance cases and deal with quickly so we don't muck up the
	 * logic below trying to handle it properly.
	 */
	if (0 >= trgpcidx || 0 == slen)
	{
		ldst.str.addr = (char *)start;
		ldst.str.len  = 0;
		*dst = ldst;
		return;
	}
	/* Test mval for valid cache: index ok, mval addr same, delim same. One additional test
	 * is if the cache entry is byte_oriented, then this cache entry was created by $ZPIECE
	 * (using bytes) and since its results are not same as $PIECE(), we must ignore the cache
	 * and rebuild it for this mval.
	 */
	fnpc_indx = src->fnpc_indx - 1;
	cfnpc = &(TREF(fnpca)).fnpcs[fnpc_indx];
	if (FNPC_MAX > fnpc_indx && cfnpc->last_str.addr == (char *)first &&
	    cfnpc->last_str.len == slen && cfnpc->delim == ldelim.unichar_val &&
	    !cfnpc->byte_oriented) /* cannot use the cache created by an earlier $ZPIECE() */
	{
		/* Have valid cache. See if piece we want already in cache */
		COUNT_EVENT(hit);
		INCR_COUNT(pskip, cfnpc->npcs);
		pcoffmax = &cfnpc->pstart[FNPC_ELEM_MAX];	/* Local end of array value */
		if (trgpcidx <= cfnpc->npcs)
		{
			/* Piece is totally in cache no scan needed */
			ldst.str.addr = (char *)first + cfnpc->pstart[trgpcidx - 1];
			ldst.str.len = cfnpc->pstart[trgpcidx] - cfnpc->pstart[trgpcidx - 1] - dlmlen;
			assert(ldst.str.len >= 0 && ldst.str.len <= src->str.len);
			*dst = ldst;
			return;
		} else
		{
			/* Not in cache but pick up scan where we left off */
			cpcidx = cfnpc->npcs;
			first = last = start + cfnpc->pstart[cpcidx];	/* First byte of next pc */
			pcoff = &cfnpc->pstart[cpcidx];
			if (pcoff == pcoffmax)
				++pcoff; 		/* No further updates to pstart array */
			++cpcidx;			/* Now past last piece and on to next one */
			COUNT_EVENT(parscan);
		}
	} else
	{
		/* The piece cache index or mval validation was incorrect.
		 * Start from the beginning
		 */
		COUNT_EVENT(miss);
		/* Need to steal a new piece cache, get "least recently reused" */
		cfnpc = (TREF(fnpca)).fnpcsteal;	/* Get next element to steal */
		if ((TREF(fnpca)).fnpcmax < cfnpc)
			cfnpc = &(TREF(fnpca)).fnpcs[0];
		(TREF(fnpca)).fnpcsteal = cfnpc + 1;	/* -> next element to steal */
		cfnpc->last_str = src->str;		/* Save validation info */
		cfnpc->delim = ldelim.unichar_val;
		cfnpc->npcs = 0;
		cfnpc->byte_oriented = FALSE;
		src->fnpc_indx = cfnpc->indx + 1;	/* Save where we are putting this element
							 * (1 based index in mval so 0 isn't so common)
							 */
		pcoff = &cfnpc->pstart[0];
		pcoffmax = &cfnpc->pstart[FNPC_ELEM_MAX];	/* Local end of array value */
		cpcidx = 1;				/* current piece index */
	}
	/* Do scan filling in offsets of pieces if they fit in the cache */
	spcidx = cpcidx;				/* Starting value for search */
	while ((cpcidx <= trgpcidx) && (last < end))
	{
		/* Once through for each piece we pass, last time through to find length of piece we want */
		first = last;				/* First char of current piece */
		while (last < end)
		{
			valid_char = UTF8_VALID(last, end, mblen);	/* Length of next char */
			if (!valid_char)
			{	/* Next character is not valid UTF8. If badchar error is not inhibited,
				 * signal it now. If it is inhibited, just treat the character as a single
				 * character and continue.
				 */
				if (!badchar_inhibit)
					utf8_badchar(0, last, end, 0, NULL);
				assert(1 == mblen);
			}
			/* Getting mblen first allows us to do quick length compare before the
			 * heavier weight memcmp call.
			 */
			assert(0 < mblen);
			if (mblen == dlmlen)
			{
				if (1 == dlmlen)
				{
					if (*last == ldelim.unibytes_val[0])			/* Shortcut - test single byte */
						break;
				} else if (0 == memcmp(last, ldelim.unibytes_val, dlmlen))	/* Longcut - for multibyte chk */
					break;
			}
			last += mblen;  		/* Find delim signaling end of piece */
		}
		last += dlmlen;				/* Bump past delim to first byte of next piece. The length of
							 * the delimiter is assumed in the pcoff array and is removed
							 * when piece length is calculated so even if we hit the end of
							 * the scanned source, we bump the pointer so this extra length
							 * is reflected in the pcoff array consistently.
							 */
		++cpcidx;				/* Next piece */
		if (pcoff < pcoffmax)
			*pcoff++ =(unsigned int)(first - start);	/* Offset to this piece */
		if (pcoff == pcoffmax)
			*pcoff++ = (unsigned int)(last - start);	/* Store start of first piece beyond what is in cache */
	}
	ldst.str.addr = (char *)first;
	/* If we scanned some chars, adjust end pointer and save end of final piece */
	if (spcidx != cpcidx)
	{
		if (pcoff < pcoffmax)
			*pcoff = (unsigned int)(last - start);		/* If not at end of cache, save start of "next" piece */
		last -= dlmlen;				/* Undo bump past last delim (existing or not)
							 * of piece for accurate string len
							 */
		/* Update count of pieces in cache */
		cfnpc->npcs = MIN((cfnpc->npcs + cpcidx - spcidx), FNPC_ELEM_MAX);
		assert(cfnpc->npcs <= FNPC_ELEM_MAX);
		assert(cfnpc->npcs > 0);
		/* If we the above loop ended prematurely because we ran out of text, we return null string */
		if (trgpcidx < cpcidx)
			ldst.str.len = INTCAST(last - first);	/* Length of piece we located */
		else
			ldst.str.len = 0;
		INCR_COUNT(pscan, cpcidx - spcidx);	/* Pieces scanned */
	} else
		ldst.str.len  = 0;
	assert(0 < cfnpc->npcs);
	assert((0 <= ldst.str.len) && (ldst.str.len <= src->str.len));
	*dst = ldst;
	return;
}