File: tagsrch.c

package info (click to toggle)
elvis 2.1i-3
  • links: PTS
  • area: non-free
  • in suites: hamm
  • size: 4,120 kB
  • ctags: 5,838
  • sloc: ansic: 53,854; sh: 811; makefile: 263
file content (710 lines) | stat: -rw-r--r-- 18,282 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
/* tagsrch.c */

/* Elvis uses this file to scan a tags file, and built a list of the matching
 * tags, sorted by name and likelyhood that they're the intended tag.
 *
 * Entry points are:
 *   void tsreset()		forget old restrictions
 *   void tsparse(text)		add new restrictions
 *   void tsadjust(tag, oper)	adjust likelyhood heuristic data
 *   void tsfile(filename)	scan a file for tags, add to taglist
 */

#include "elvis.h"

#define WEIGHT_SUCCESS	100
#define WEIGHT_FAIL	95
#define WEIGHT_AGING	10

/* These structures are used for storing a list of acceptable values for a
 * particular attribute.
 */
typedef struct value_s
{
	struct value_s	*next;		/* another possible value */
	char		*value;		/* attribute's possible value */
} value_t;


/* These structures are used for storing a list of restrictive names, and
 * their possible values.
 */
typedef struct name_s
{
	struct name_s	*next;		/* another restriction */
	char		*name;		/* attribute's name */
	long		weight;		/* 1=required, 0=optional */
	value_t		*values;	/* list of possible values */
} name_t;


#if USE_PROTOTYPES
static name_t *addrestrict(char *nametext, char *valuetext, _char_ oper);
static long likelyhood(TAG *tag, name_t *head, name_t *map[]);
static name_t *age(name_t *head);
static BOOLEAN chkrestrict(TAG *tag);

#endif /* USE PROTOTYPES */

/* These are the heads of a three of lists: one for restrictions, one for
 * attributes of recent successful searches, and one for attributes of recent
 * failed searches.  Also, there are map tables that allow a given name to be
 * found quickly via its attribute index in the current tags file.
 */
static name_t	*rhead, *rmap[MAXATTR];	/* restrictions */
static name_t	*shead, *smap[MAXATTR];	/* attributes from succeeded searches */
static name_t	*fhead, *fmap[MAXATTR];	/* attributes from failed searches */
static int	nmandatory;		/* number of mandatory restrictions */
#define NO_NAME	(((name_t *)0) + 1)


/* These store the first and last tagnames that we care about, sorted in
 * ASCII order like the tags file.  This helps us quickly skip tags that we
 * don't care about.  If there is no tagname restriction, these are NULL.
 * The longname option stores the length of the longest name.
 */
static char	*firstname, *lastname;
static int	longname;
static int	taglength;
static BOOLEAN	fulllength;


/* This function adds a name/value pair to the list of restrictions, and
 * returns a pointer to the name_t record.  It chooses which list to update
 * based on the operator.
 */
static name_t *addrestrict(nametext, valuetext, oper)
	char	*nametext;	/* text form of restrictive name */
	char	*valuetext;	/* text form of possible value, or NULL */
	_char_	oper;		/* one of {= : + -} from command line */
{
	name_t	**list;		/* the list to search */
	name_t	*name, *namelag;/* for scanning the names list */
	value_t	*value, *vlag;	/* for scanning the values list */
	long	i;
	int	len;

	/* choose a list */
	switch (oper)
	{
	  case '+':	list = &shead;	break;	/* the succeeded attributes */
	  case '-':	list = &fhead;	break;	/* the failed attributed */
	  default:	list = &rhead;		/* the restrictions */
	}

	/* search for the name in the list.  Add it if new */
	for (namelag = NULL, name = *list;
	     name && strcmp(nametext, name->name);
	     namelag = name, name = name->next)
	{
	}
	if (!name)
	{
		name = (name_t *)safealloc(1, sizeof(name_t));
		name->name = safedup(nametext);
		name->weight = 0;
		if (namelag)
			namelag->next = name;
		else
			*list = name;
	}

	/* insert the value into the list of acceptable values */
	if (valuetext)
	{
		value = (value_t *)safealloc(1, sizeof(value_t));
		value->value = safedup(valuetext);
		value->next = name->values;
		name->values = value;
	}

	/* adjust the weight */
	switch (oper)
	{
	  case '+':
	  case '-':
		if (valuetext)
		{
			name->weight = (oper == '+') ? WEIGHT_SUCCESS : WEIGHT_FAIL;
		}

		/* discard any older values which have no weight */
		for (i = name->weight, vlag = NULL, value = name->values;
		     i > 0 && value;
		     i -= WEIGHT_AGING, vlag = value, value = value->next)
		{
		}
		if (vlag)
			vlag->next = NULL;
		else
			name->values = NULL;
		while (value)
		{
			vlag = value->next;
			safefree(value->value);
			safefree(value);
			value = vlag;
		}
		break;

	  case ':':
		name->weight = 0;	/* optional */
		break;

	  case '/':
		name->weight = 2;	/* optional, but counts against mandatory */
		break;

	  case '=':
		name->weight = 1;	/* required */
		for (nmandatory = 0, namelag = rhead;
		     namelag;
		     namelag = namelag->next)
		{
			if (namelag->weight == 1)
				nmandatory++;
		}
		break;
	}

	/* if adding to the tagname attribute, then update firstname and
	 * lastname variables.
	 */
	if (!strcmp(name->name, "tagname"))
	{
		firstname = lastname = NULL;
		longname = 0;
		for (value = name->values; value; value = value->next)
		{
			if (!firstname || strcmp(firstname, value->value) > 0)
				firstname = value->value;
			if (!lastname || strcmp(lastname, value->value) < 0)
				lastname = value->value;
			len = strlen(value->value);
			if (len > longname)
				longname = len;
		}
	}

	/* return the name_t record of this item */
	return name;
}


/* Assign a likelyhood value to a tag, by comparing its attributes to those
 * of recent successful or failed tag searches.  The tag's total weight can
 * be computed by taking the likelyhood of success minus the likelyhood of
 * failure.
 *
 * In addition to returning the partial likelyhood, this function also updates
 * the smap[] or fmap[] array.
 */
static long likelyhood(tag, head, map)
	TAG	*tag;	/* a tag to check */
	name_t	*head;	/* either shead or fhead */
	name_t	*map[];	/* either smap[] or fmap[] */
{
	long	weight, total;
	name_t	*name;
	value_t	*value;
	int	i;

	/* for all attributes including the standard ones... */
	total = 0;
	for (i = 0; i < MAXATTR && tagattrname[i]; i++)
	{
		/* if this tag lacks this attribute, skip it */
		if (!tag->attr[i])
			continue;

		/* locate the name_t record */
		if (map[i])
			name = map[i];
		else
		{
			for (name = head;
			     name && strcmp(name->name, tagattrname[i]);
			     name = name->next)
			{
			}
			if (!name)
			{
				name = NO_NAME;
			}
			map[i] = name;
		}

		/* if there is no name_t record, ignore this attribute */
		if (name == NO_NAME)
		{
			continue;
		}

		/* try to find the tag's value in recent values */
		for (weight = name->weight, value = name->values;
		     value && *value->value && strcmp(value->value, tag->attr[i]);
		     weight -= WEIGHT_AGING,  value = value->next)
		{
		}
		if (value)
		{
			total += weight;
		}
	}

	/* return the total weight (for either success or failure) */
	return total;
}


/* Age the successful/failed tag search data.  This will eventually cause old
 * data to be freed.  Returns the new head of the list.  Doesn't depend on or
 * update smap[] or fmap[].
 */
static name_t *age(head)
	name_t	*head;	/* either shead or fhead */
{
	name_t	*scan, *lag;
	value_t	*value, *vlag;
	long	weight;

	/* for each named attribute... */
	for (scan = head, lag = NULL; scan; lag = scan, scan = scan->next)
	{
		/* decrement its weight */
		scan->weight -= WEIGHT_AGING;

		/* free any older values which would have zero weight */
		for (weight = scan->weight, value = scan->values, vlag = NULL;
		     value && weight > 0;
		     weight--, vlag = value, value = value->next)
		{
		}
		if (vlag)
			vlag->next = NULL;
		else
			scan->values = NULL;
		while (value)
		{
			vlag = value->next;
			safefree(value->value);
			safefree(value);
			value = vlag;
		}

		/* if this name has no values left, then delete it */
		if (!scan->values)
		{
			if (lag)
				lag->next = scan->next;
			else
				head = scan->next;
			safefree(scan->name);
			safefree(scan);
			if (lag)
				scan = lag;
			else if (head)
				scan = head;
			else
				break;
		}
	}

	/* return the new head */
	return head;
}



/* This function wipes out the restrictions list.  The succeeded and failed
 * attribute lists are unaffected.
 */
void tsreset P_((void))
{
	name_t	*nextname;
	value_t	*nextvalue;

	/* for each name... */
	while (rhead)
	{
		/* for each value */
		while (rhead->values)
		{
			/* free the value */
			nextvalue = rhead->values->next;
			safefree(rhead->values->value);
			safefree(rhead->values);
			rhead->values = nextvalue;
		}

		/* free the name */
		nextname = rhead->next;
		safefree(rhead->name);
		safefree(rhead);
		rhead = nextname;
	}

	/* clobber the rmap[] array, too */
	memset(rmap, 0, sizeof rmap);

	/* clobber the firstname and lastname variables */
	firstname = lastname = NULL;
	longname = 0;

	/* clobber the nmandatory variable */
	nmandatory = 0;
}


/* This function parses a restrictions command line.  The command line may have
 * any number of restrictions.  You can call this function repeatedly to combine
 * multiple restrictions lines; You will usually call tsreset() before the first
 * tsparse().
 *
 * A typical input: tsparse("mytag class:=DbItem,DbCustomer file:+myfile.cpp")
 *
 * Supported operators are:
 * 	name:value	Add a value for an optional attribute
 *	name:=value	Add a value for a mandatory attribute
 *	name:/value	Add a value for an optional attribute, but require the
 *			value to be a substring of the tagaddress value
 *	name:+value	Pretend name:value was part of a recent successful tag
 *	name:-value	Pretend name:value was part of a recent failed tag
 * Also, a ',' character can be used to repeat the prevous operator; e.g.,
 * "class:/DbItem,DbCust", "class:/DbItem:/DbCust" are both identical in effect
 * to "class:/DbItem class:/DbCustomer".
 *
 * THE TEXT IS CLOBBERED!
 */
void tsparse(text)
	char	*text;	/* a "name:value name:value" string. */
{
	char	*name;		/* start of a name within the text */
	char	*value;		/* start of a value within the text */
	char	*copy;		/* used while deleting backslashes */
	char	oper;		/* most recent operator character */
	char	nextoper;	/* operator for the next value */

	/* for each word (delimited by whitespace or an operator) ... */
	for (name = NULL, nextoper = oper = ' '; text && *text; oper = nextoper)
	{
		/* skip redundant whitespace */
		while (*text == ' ' || *text == '\t')
		{
			text++;
		}
		if (!*text) break;

		/* value (or maybe name?) starts here */
		value = text;

		/* skip over characters of value */
		for (copy = text; *text && !strchr(":, \t", *text); )
		{
			if (*text == '\\' && text[1])
				text++;
			*copy++ = *text++;
		}

		/* Get the NEXT operator and mark the end of the name.  ',' is
		 * treated as a repeat of previous.
		 */
		if (*text != ',')
			nextoper = *text;
		if (*text)
			*text++ = '\0';
		*copy = '\0';
		if (!nextoper || nextoper == '\t')
			nextoper = ' ';
		if (nextoper == ':' && *text && strchr("=+-/", *text))
			nextoper = *text++;

		/* Use THIS operator to decide how to handle this value */
		switch (oper)
		{
		  case ' ': /* value or name<nextoper>... */
			/* Two possible cases: If the NEXT operator is
			 * whitespace then this value is assumed to be the
			 * value of the "tagname" attribute.  Otherwise, this
			 * value is actually an attribute name which will be
			 * used for later values (up to the next whitespace)
			 */
			if (nextoper == ' ')
				(void)addrestrict("tagname", value, '=');
			else if (!*text)
				(void)addrestrict(value, "", nextoper);
			else
				name = value;
			break;

		  case '/': /* name:/value */
			/* use this value with the previously mentioned name */
			(void)addrestrict(name, value, '/');

			/* also use as a mandatory address substring */
			(void)addrestrict("tagaddress", value, '=');
			break;

		  default: /* name:value name:=value name:+value name:-value */
			/* use this value with the previously mentioned name */
			(void)addrestrict(name, value, oper);
		}
	}
}


/* Check a given tag against the restrictions.  Return True if it satisfies. */
static BOOLEAN chkrestrict(tag)
	TAG	*tag;
{
	int	mandcnt;/* number of mandatory restrictions which matched */
	name_t	*name;	/* a "name" struct for a restriction */
	value_t	*value;	/* a "value" struct from within "name"'s list */
	char	*scan;
	int	i;

	/* for each attribute... */
	for (i = mandcnt = 0; i < MAXATTR && tagattrname[i]; i++)
	{
		/* locate the name_t for this attribute.  If there is none,
		 * then ignore this attribute.
		 */
		if (rmap[i] == NO_NAME)
			continue;
		else if (rmap[i])
			name = rmap[i];
		else
		{
			for (name = rhead;
			     name && strcmp(name->name, tagattrname[i]);
			     name = name->next)
			{
			}
			if (!name)
			{
				rmap[i] = NO_NAME;
				continue;
			}
			rmap[i] = name;
		}

		/* skip the TAGADDR attribute until later */
		if (&tag->attr[i] == &tag->TAGADDR)
			continue;

		/* if required and tag doesn't have it, then reject */
		if (tag->attr[i])
			mandcnt += name->weight;
		else if (name->weight == 1)
			return False;

		/* if optional and tag doesn't have it, ignore it */
		if (!tag->attr[i])
			continue;

		/* check against all acceptable values */
		if (fulllength || tag->attr[i] != tag->TAGNAME)
		{
			for (value = name->values;
			     value && *value->value && strcmp(value->value, tag->attr[i]);
			     value = value->next)
			{
			}
		}
		else
		{
			for (value = name->values;
			     value && strncmp(value->value, tag->attr[i], (int)taglength);
			     value = value->next)
			{
			}
		}
		if (!value)
			return False;
	}

	/* As a special case, if there are tagaddress restrictions, then the
	 * tag's address field must contain one of them as a substring.
	 */
	name = rmap[2];
	if (name != NO_NAME && name && name->values && mandcnt < nmandatory)
	{
		/* for each possible address substring... */
		for (value = name->values; value; value = value->next)
		{
			/* search for the substring within the tag's address */
			i = strlen(value->value);
			if ((int)strlen(tag->TAGADDR) <= i + 2)
				continue;
			for (scan = tag->TAGADDR + 1;
			     scan[i] && 
				(isalnum(scan[-1])
					|| isalnum(scan[i])
					|| *scan != *value->value
			    		|| strncmp(scan, value->value, i));
			     scan++)
			{
			}

			/* if found, then stop looking */
			if (scan[i])
				break;
		}

		/* if no substring was found, then reject */
		if (!value)
			return False;

		/* If mandatory, count it */
		if (name->weight > 0)
			mandcnt++;
	}

	/* if some mandatory values were missing, reject it */
	if (mandcnt < nmandatory)
		return False;

	/* if nothing wrong with it, then it is acceptable */
	return True;
}




/* Adjust the histories of failed or successful searches.  Returns the head of
 * the list.  Doesn't depend on or update smap[] or fmap[].
 */
void tsadjust(tag, oper)
	TAG	*tag;	/* a tag that recently succeeded or failed */
	_char_	oper;	/* '+' if succeeded, '-' if failed */
{
	int	i;

	/* age both histories */
	shead = age(shead);
	fhead = age(fhead);

	/* for each attribute, except the standard ones... */
	for (i = 3; i < MAXATTR && tagattrname[i]; i++)
	{
		/* if this tag had such an attribute... */
		if (tag->attr[i])
		{
			/* add it to the history */
			addrestrict(tagattrname[i], tag->attr[i], oper);
		}
	}
}


/* Scan a file for tags which meet the restrictions, and add them to the list */
void tsfile(filename, maxlength)
	char	*filename;	/* name of a file to scan */
	long	maxlength;	/* maximum significant length, or 0 for all */
{
	CHAR	tagline[1000];	/* input buffer */
	BOOLEAN	allnext;	/* does tagline[] contain the whole next line?*/
	int	bytes;		/* number of bytes in tagline */
	CHAR	*src, *dst;	/* for manipulating tagline[] */
	TAG	*tag;		/* a tag parsed from tagline[] */
	int	i;

	/* clobber the rmap[], smap[], and fmap[] arrays */
	memset(rmap, 0, sizeof rmap);
	memset(smap, 0, sizeof smap);
	memset(fmap, 0, sizeof fmap);

	/* clobber the attribute names */
	tagnamereset();

	/* choose a significant length */
	if (maxlength == 0 || maxlength > longname)
		taglength = longname, fulllength = True;
	else
		taglength = maxlength, fulllength = False;

	/* open the file */
	if (!ioopen(filename, 'r', True, False, 't'))
	{
		return;
	}

	/* Make a local copy of the filename.  This is important because the
	 * value passed into this function is usually the static buffer used
	 * by the dirpath() function, and we want to use that buffer ourselves
	 * later in this function.
	 */
	filename = safedup(filename);

	/* Compare the tag of each line against the tagname */
	bytes = ioread(tagline, QTY(tagline) - 1);
	while (bytes > taglength
		&& (!lastname || CHARncmp(lastname, tagline, (size_t)taglength) >= 0))
	{
		/* disable firstname/lastname checks if tags file claims to
		 * be unsorted.
		 */
		if (lastname && *tagline == '!' && !CHARncmp(tagline, toCHAR("!_TAG_FILE_SORTED\t0\t"), 20))
			lastname = firstname = NULL;

		/* find the end of this line */
		for (src = tagline; src < &tagline[bytes] && *src != '\n'; src++)
		{
		}
		*src = '\0';

		/* if not obviously too early to be of interest, parse it and
		 * process it...
		 */
		if ((!firstname || CHARncmp(firstname, tagline, taglength) <= 0)
			&& (tag = tagparse(tochar8(tagline))) != NULL)
		{
			/* do we want to keep this tag? */
			if (*filename == '!')
			{
				/* Yes! compute its likelyhood factor */
				tag->match = likelyhood(tag, shead, smap)
						- likelyhood(tag, fhead, fmap);
				for (i = 3; i < MAXATTR; i++)
					if (tag->attr[i])
						tag->match++;

				/* save a copy of it */
				(void)tagadd(tagdup(tag));
			}
			else if (chkrestrict(tag))
			{
				/* Yes! compute its likelyhood factor */
				tag->match = likelyhood(tag, shead, smap)
						- likelyhood(tag, fhead, fmap);
				for (i = 3; i < MAXATTR; i++)
					if (tag->attr[i])
						tag->match++;

				/* replace the filename with full pathname */
				tag->TAGFILE = dirpath(dirdir(filename), tag->TAGFILE);
				/* save a copy of it */
				(void)tagadd(tagdup(tag));
			}
		}

		/* delete this line from tagline[] */
		for (dst = tagline, src++, allnext = False; src < &tagline[bytes]; )
		{
			if (*src == '\n')
				allnext = True;
			*dst++ = *src++;
		}
		bytes = (int)(dst - tagline);

		/* if the next line is incomplete, read some more text
		 * from the tags file.
		 */
		if (!allnext || bytes <= taglength)
		{
			bytes += ioread(dst, (int)QTY(tagline) - bytes - 1);
		}
	}
	safefree(filename);
	(void)ioclose();
}