File: wl_string.c

package info (click to toggle)
gwm 1.8d-2
  • links: PTS
  • area: main
  • in suites: potato, woody
  • size: 5,120 kB
  • ctags: 3,030
  • sloc: ansic: 19,617; makefile: 1,763; lisp: 437; sh: 321; ml: 21
file content (651 lines) | stat: -rw-r--r-- 13,881 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
/* Copyright 1989 GROUPE BULL -- See license conditions in file COPYRIGHT
 * Copyright 1989 Massachusetts Institute of Technology
 */
/***********************\
* 		        *
*  WOOL_OBJECT  String  *
*  BODY		        *
* 		        *
\***********************/

#include "EXTERN.h"
#include <stdio.h>
#include "wool.h"
#include "wl_number.h"
#include "wl_atom.h"
#include "wl_active.h"
#include "wl_pointer.h"
#include "wl_name.h"
#include "wl_list.h"
#include "INTERN.h"
#include "wl_string.h"

char * re_comp();

/*
 * Constructor:
 * WLString_make
 * argument 1: the string, which will be COPIED.
 */

WOOL_String 
WLString_make(s)
char           *s;		/* the string itself */
{
    WOOL_String     object;

    if (s[0] == '\0')
	return NIL_STRING;
    object = (WOOL_String)
	Malloc(sizeof(struct _WOOL_String) + strlen(s));
    zrt_put(object);
    object -> type = WLString;
    strcpy(object -> string, s);
    return object;
}

WOOL_String 
WLString_n_make(n)
int	n;
{
    WOOL_String     object = (WOOL_String)
    Malloc(sizeof(struct _WOOL_String) + n);

    zrt_put(object);
    object -> type = WLString;
    return object;
}

NIL_STRING_make()
{
    NIL_STRING = (WOOL_String) Malloc(sizeof(struct _WOOL_String));
    zrt_put(NIL_STRING);
    NIL_STRING -> type = WLString;
    NIL_STRING -> string[0] = '\0';
    increase_reference(NIL_STRING);
}

/*
 * WLString_print:
 * We print strings surrounded by double quotes.
 */

WOOL_OBJECT 
WLString_print(obj)
WOOL_String     obj;
{
    wool_puts(obj -> string);
    return (WOOL_OBJECT) obj;
}

/*
 * WLString_free
 * just frees the structure, so we use WLNumber_free.
 */

/* 
 * WLString_execute
 * just the same error as with atoms, let's use it.
 */

/*
 * WLString_equal
 * tests 2 strings for equality (returns it if true)
 */

WOOL_OBJECT
WLString_equal(s1, s2)
WOOL_String s1, s2;
{
    if (!is_a_string(s2) || strcmp(s1 -> string, s2 -> string))
	return NIL;
    else
	return (WOOL_OBJECT) s1;
}

/*
 * string manipulation routines
 */

static char *strings_temp_buffer;
static int strings_temp_buffer_size = 256;

WOOL_OBJECT
add_strings(argc,argv)
int             argc;
WOOL_String     argv[];
{
    int             required_length = 0, i;

    for (i = 0; i < argc; i++) {
	must_be_string(argv[i], i);
	if (argv[i] != (WOOL_String) NIL)
	    required_length += strlen(argv[i] -> string);
    }
    if (!strings_temp_buffer)
	strings_temp_buffer = (char *) Malloc(strings_temp_buffer_size);
    if (required_length >= strings_temp_buffer_size) {
	strings_temp_buffer_size = Max(2 * strings_temp_buffer_size,
				       required_length+1);
	strings_temp_buffer = (char *)
	    Realloc(strings_temp_buffer, strings_temp_buffer_size);
    }
    strings_temp_buffer[0] = '\0';
    for (i = 0; i < argc; i++)
	if (argv[i] != (WOOL_String) NIL)
	    strcat(strings_temp_buffer, argv[i] -> string);
    return (WOOL_OBJECT) WLString_make(strings_temp_buffer);
}

/*
 * To know if an object can be used as a string (atom, pointer or active)
 */

int 
must_be_string(obj, n)
WOOL_OBJECT	obj;
int		n;
{
    if (obj -> type != WLString
	&& obj -> type != WLAtom
	&& obj -> type != WLActive
	&& obj -> type != WLPointer
	&& obj -> type != WLName
	&& obj != NIL)
	bad_argument(obj, n, WOOL_TYPE_P_NAME(WLString));
}

int 
is_a_string(obj)
WOOL_OBJECT	obj;
{
    if (obj -> type != WLString
        && obj -> type != WLAtom
        && obj -> type != WLActive
        && obj -> type != WLPointer
        && obj -> type != WLName
        && obj != NIL)
 	return 0;
    else
	return 1;
 }


/**************************************************************************\
* 									   *
* the general  match package						   *
* (match regular-expression string [level])				   *
* returns the sub-string in the levelth enclosing \( and \) or NIL_STRING  *
* or string or NIL if no level given					   *
* 									   *
\**************************************************************************/

WOOL_String
WLString_match(argc, argv)
int	argc;
WOOL_String	argv[];
{
    int             result, i;
    char           *subst, *s, *comp_error;
    WOOL_String     wl_subst;
    WOOL_List	    wl_list;
    
    if (argc < 2)
      wool_error(BAD_NUMBER_OF_ARGS, argc);
    must_be_string(argv[0], 0);
    must_be_string(argv[1], 1);
    if ((comp_error = re_comp(argv[0] -> string)) ||
	((result = re_exec(argv[1] -> string)) == -1)) {
	if (comp_error)
	  wool_printf("%\n", comp_error);
	wool_error("match: Bad regular expression, %s", argv[0] -> string);
    }
    if (result) {
        switch (argc) {
	 case 2:
	    return argv[1];
	 case 3:
	    must_be_number(argv[2], 2);
	    if (result =
		re_subst(((WOOL_Number) argv[2]) -> number, &subst)) {
		wl_subst = WLString_n_make(result + 1);
		strncpy(wl_subst -> string, subst, result);
		wl_subst -> string[result] = '\0';
		return wl_subst;
	    } else {
		return NIL_STRING;
	    }
	 default:
	    wl_list = wool_list_make(argc - 2);
	    bzero(wl_list -> list, wl_list -> size * sizeof (WOOL_OBJECT));
	    for (i = 2; i< argc; i++) {
		must_be_number(argv[i], i);
		if (result =
		    re_subst(((WOOL_Number) argv[i]) -> number, &subst)) {
		    wl_subst = WLString_n_make(result + 1);
		    strncpy(wl_subst -> string, subst, result);
		    wl_subst -> string[result] = '\0';
		    increase_reference(wl_list -> list[i - 2] =
				       (WOOL_OBJECT) wl_subst);
		} else {
		    increase_reference(wl_list -> list[i - 2] =
				       (WOOL_OBJECT) NIL_STRING);
		}
	    }
	    return (WOOL_String) wl_list;
        }
    } else {
	return (argc == 2 ? (WOOL_String) NIL : NIL_STRING);
    }
}

/*
 * routines to do regular expression matching
 *
 * Entry points:
 *
 *	re_comp(s)
 *		char *s;
 *	 ... returns 0 if the string s was compiled successfully,
 *		     a pointer to an error message otherwise.
 *	     If passed 0 or a null string returns without changing
 *           the currently compiled re (see note 11 below).
 *
 *	re_exec(s)
 *		char *s;
 *	 ... returns 1 if the string s matches the last compiled regular
 *		       expression, 
 *		     0 if the string s failed to match the last compiled
 *		       regular expression, and
 *		    -1 if the compiled regular expression was invalid 
 *		       (indicating an internal error).
 *
 *	re_subst(n, &p)
 *		int n;
 *		char *p;
 *	 ... returns in p the string matching the nth \(, returns the
 *		    number of chars matching
 *
 * The strings passed to both re_comp and re_exec may have trailing or
 * embedded newline characters; they are terminated by nulls.
 *
 * The regular expressions recognized are described below. This description
 * is essentially the same as that for ed.
 *
 *	A regular expression specifies a set of strings of characters.
 *	A member of this set of strings is said to be matched by
 *	the regular expression.  In the following specification for
 *	regular expressions the word `character' means any character but NUL.
 *
 *	1.  Any character except a special character matches itself.
 *	    Special characters are the regular expression delimiter plus
 *	    \ [ . and sometimes ^ * $.
 *	2.  A . matches any character.
 *	3.  A \ followed by any character except a digit or ( )
 *	    matches that character.
 *	4.  A nonempty string s bracketed [s] (or [^s]) matches any
 *	    character in (or not in) s. In s, \ has no special meaning,
 *	    and ] may only appear as the first letter. A substring 
 *	    a-b, with a and b in ascending ASCII order, stands for
 *	    the inclusive range of ASCII characters.
 *	5.  A regular expression of form 1-4 followed by * matches a
 *	    sequence of 0 or more matches of the regular expression.
 *	6.  A regular expression, x, of form 1-8, bracketed \(x\)
 *	    matches what x matches.
 *	7.  A \ followed by a digit n matches a copy of the string that the
 *	    bracketed regular expression beginning with the nth \( matched.
 *	8.  A regular expression of form 1-8, x, followed by a regular
 *	    expression of form 1-7, y matches a match for x followed by
 *	    a match for y, with the x match being as long as possible
 *	    while still permitting a y match.
 *	9.  A regular expression of form 1-8 preceded by ^ (or followed
 *	    by $), is constrained to matches that begin at the left
 *	    (or end at the right) end of a line.
 *	10. A regular expression of form 1-9 picks out the longest among
 *	    the leftmost matches in a line.
 *	11. An empty regular expression stands for a copy of the last
 *	    regular expression encountered.
 */

/*
 * constants for re's
 */
#define	CBRA	1
#define	CCHR	2
#define	CDOT	4
#define	CCL	6
#define	NCCL	8
#define	CDOL	10
#define	CEOF	11
#define	CKET	12
#define	CBACK	18

#define	CSTAR	01

#define	ESIZE	512
#define	NBRA	9
#define	comerr(msg) {expbuf[0] = 0; numbra = 0; return(msg); }

static	char	expbuf[ESIZE], *braslist[NBRA], *braelist[NBRA];
static	char	circf;
static 	int	numbra;
static  int	advance();

/*
 * compile the regular expression argument into a dfa
 */

char *
re_comp(sp)
char	*sp;
{
    int    c;
    char  *ep = expbuf;
    int             cclcnt;
    char           *lastep = 0;
    char            bracket[NBRA];
    char           *bracketp = &bracket[0];
    static char    *retoolong = "Regular expression too long";

    numbra = 0;
    if (sp == 0 || *sp == '\0') {
	if (*ep == 0)
	    return ("No previous regular expression");
	return (0);
    }
    if (*sp == '^') {
	circf = 1;
	sp++;
    } else
	circf = 0;
    for (;;) {
	if (ep >= &expbuf[ESIZE])
	    comerr(retoolong);
	if ((c = *sp++) == '\0') {
	    if (bracketp != bracket)
		comerr("unmatched \\(");
	    *ep++ = CEOF;
	    *ep++ = 0;
	    return (0);
	}
	if (c != '*')
	    lastep = ep;
	switch (c) {

	case '.':
	    *ep++ = CDOT;
	    continue;

	case '*':
	    if (lastep == 0 || *lastep == CBRA || *lastep == CKET)
		goto defchar;
	    *lastep |= CSTAR;
	    continue;

	case '$':
	    if (*sp != '\0')
		goto defchar;
	    *ep++ = CDOL;
	    continue;

	case '[':
	    *ep++ = CCL;
	    *ep++ = 0;
	    cclcnt = 1;
	    if ((c = *sp++) == '^') {
		c = *sp++;
		ep[-2] = NCCL;
	    }
	    do {
		if (c == '\0')
		    comerr("missing ]");
		if (c == '-' && ep[-1] != 0) {
		    if ((c = *sp++) == ']') {
			*ep++ = '-';
			cclcnt++;
			break;
		    }
		    while (ep[-1] < c) {
			*ep = ep[-1] + 1;
			ep++;
			cclcnt++;
			if (ep >= &expbuf[ESIZE])
			    comerr(retoolong);
		    }
		}
		*ep++ = c;
		cclcnt++;
		if (ep >= &expbuf[ESIZE])
		    comerr(retoolong);
	    } while ((c = *sp++) != ']');
	    lastep[1] = cclcnt;
	    continue;

	case '\\':
	    if ((c = *sp++) == '(') {
		if (numbra >= NBRA)
		    comerr("too many \\(\\) pairs");
		*bracketp++ = numbra;
		*ep++ = CBRA;
		*ep++ = numbra++;
		continue;
	    }
	    if (c == ')') {
		if (bracketp <= bracket)
		    comerr("unmatched \\)");
		*ep++ = CKET;
		*ep++ = *--bracketp;
		continue;
	    }
	    if (c >= '1' && c < ('1' + NBRA)) {
		*ep++ = CBACK;
		*ep++ = c - '1';
		continue;
	    }
	    *ep++ = CCHR;
	    *ep++ = c;
	    continue;

    defchar:
	default:
	    *ep++ = CCHR;
	    *ep++ = c;
	}
    }
}

/* 
 * match the argument string against the compiled re
 */

int
re_exec(p1)
char	*p1;
{
    char  *p2 = expbuf;
    int    c;
    int             rv;

    for (c = 0; c < NBRA; c++) {
	braslist[c] = 0;
	braelist[c] = 0;
    }
    if (circf)
	return ((advance(p1, p2)));

    /*
     * fast check for first character 
     */
    if (*p2 == CCHR) {
	c = p2[1];
	do {
	    if (*p1 != c)
		continue;
	    if (rv = advance(p1, p2))
		return (rv);
	} while (*p1++);
	return (0);
    }

    /*
     * regular algorithm 
     */
    do
	if (rv = advance(p1, p2))
	    return (rv);
    while (*p1++);
    return (0);
}

/* 
 * try to match the next thing in the dfa
 */

static	int
advance(lp, ep)
char	*lp, *ep;
{
    char  *curlp;
    int             ct, i;
    int             rv;

    for (;;)
	switch (*ep++) {

	case CCHR:
	    if (*ep++ == *lp++)
		continue;
	    return (0);

	case CDOT:
	    if (*lp++)
		continue;
	    return (0);

	case CDOL:
	    if (*lp == '\0')
		continue;
	    return (0);

	case CEOF:
	    return (1);

	case CCL:
	    if (cclass(ep, *lp++, 1)) {
		ep += *ep;
		continue;
	    }
	    return (0);

	case NCCL:
	    if (cclass(ep, *lp++, 0)) {
		ep += *ep;
		continue;
	    }
	    return (0);

	case CBRA:
	    braslist[*ep++] = lp;
	    continue;

	case CKET:
	    braelist[*ep++] = lp;
	    continue;

	case CBACK:
	    if (braelist[i = *ep++] == 0)
		return (-1);
	    if (backref(i, lp)) {
		lp += braelist[i] - braslist[i];
		continue;
	    }
	    return (0);

	case CBACK | CSTAR:
	    if (braelist[i = *ep++] == 0)
		return (-1);
	    curlp = lp;
	    ct = braelist[i] - braslist[i];
	    while (backref(i, lp))
		lp += ct;
	    while (lp >= curlp) {
		if (rv = advance(lp, ep))
		    return (rv);
		lp -= ct;
	    }
	    continue;

	case CDOT | CSTAR:
	    curlp = lp;
	    while (*lp++);
	    goto star;

	case CCHR | CSTAR:
	    curlp = lp;
	    while (*lp++ == *ep);
	    ep++;
	    goto star;

	case CCL | CSTAR:
	case NCCL | CSTAR:
	    curlp = lp;
	    while (cclass(ep, *lp++, ep[-1] == (CCL | CSTAR)));
	    ep += *ep;
	    goto star;

    star:
	    do {
		lp--;
		if (rv = advance(lp, ep))
		    return (rv);
	    } while (lp > curlp);
	    return (0);

	default:
	    return (-1);
	}
}

backref(i, lp)
int	i;
char	*lp;
{
    char  *bp;

    bp = braslist[i];
    while (*bp++ == *lp++)
	if (bp >= braelist[i])
	    return (1);
    return (0);
}

int
cclass(set, c, af)
char	*set, c;
int	af;
{
    int    n;

    if (c == 0)
	return (0);
    n = *set++;
    while (--n)
	if (*set++ == c)
	    return (af);
    return (!af);
}

int
re_subst(n, pp)
int 	n;
char	**pp;
{
    int length;

    if(n > numbra) {
	return (int) wool_error("match: Too many \"(\" asked (%d) ", n);
    }
    *pp = braslist[n-1];
    length = braelist[n-1] - *pp;
    return length;
}