File: compiler.c

package info (click to toggle)
libindi 0.9.8.1-5.1
  • links: PTS, VCS
  • area: main
  • in suites: jessie, jessie-kfreebsd
  • size: 4,600 kB
  • ctags: 7,283
  • sloc: cpp: 34,410; ansic: 20,227; xml: 294; makefile: 13
file content (669 lines) | stat: -rw-r--r-- 18,009 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
/* module to compile and execute a c-style arithmetic expression made of INDI
 * operands. operand names must contain 2 dots and be surrounded by quotes.
 * The expression is compiled and the names of each operand are stored. The
 * values of an operand can be set later by name. Evaluation uses the last
 * known operand value.
 *
 * one reason this is so nice and tight is that all opcodes are the same size
 * (an int) and the tokens the parser returns are directly usable as opcodes.
 * constants and variables are compiled as an opcode with an offset into the
 * auxiliary consts and vars arrays.
 *
 * this is not reentrant, but new expressions can be compiled as desired.
 */

#include <stdio.h>
#include <math.h>
#include <ctype.h>
#include <stdlib.h>
#include <string.h>

static int next_token (void);
static int chk_funcs (void);
static void skip_double (void);
static int compile (int prec);
static int execute (double *result);
static int parse_fieldname (char name[], int len);

/* parser tokens and opcodes, as necessary */
enum {
    HALT,			/* HALT = 0 serves as good initial default */

    /* binary operators (precedences in table, below) */
    ADD,
    SUB,
    MULT,
    DIV,
    AND,
    OR,
    GT,
    GE,
    EQ,
    NE,
    LT,
    LE,

    /* unary ops, precedence all UNI_PREC */
    NEG,
    NOT,

    /* symantically operands, ie, constants, variables and all functions */
    CONST,
    VAR,
    ABS,
    FLOOR,
    SIN,
    COS,
    TAN,
    ASIN,
    ACOS,
    ATAN,
    PITOK,			/* built-in constant, pi */
    DEGRAD,
    RADDEG,
    LOG,
    LOG10,
    EXP,
    SQRT,
    POW,
    ATAN2,
};

/* purely tokens - never get compiled as such */
#define	LPAREN	255
#define	RPAREN	254
#define	COMMA	253
#define	ERR	(-1)

/* precedence of each of the binary operators.
 * in case of a tie, compiler associates left-to-right.
 * N.B. each entry's index must correspond to its #define!
 */
static int precedence[] = {0,5,5,6,6,2,1,4,4,3,3,4,4};
#define	UNI_PREC	7	/* unary ops have highest precedence */

/* execute-time operand stack */
#define	MAX_STACK	32
static double stack[MAX_STACK], *sp;

/* space for compiled opcodes - the "program".
 * opcodes go in lower 8 bits.
 * when an opcode has an operand (as CONST and VAR) it is really an array
 *   index in the remaining upper bits.
 */
#define	MAX_PROG 32
static int program[MAX_PROG], *pc;
#define	OP_SHIFT	8
#define	OP_MASK		0xff

/* auxiliary operand info.
 * the operands (all but lower 8 bits) of CONST and VAR are really indeces
 * into these arrays. thus, no point in making them any longer than you have
 * bits more than 8 in your machine's int to index into it, ie, make
 *    MAX_OPX < 1 << ((sizeof(int)-1)*8)
 */
#define	MAX_OPX		64	/* max number of operands */
#define	MAXFLDLEN	64	/* longest allowed operand name */
typedef struct {
    int set;			/* 1 when v has been set */
    char name[MAXFLDLEN];	/* name of operand */
    double v;			/* last known value of this operand */
} Var;
static Var vars[MAX_OPX];	/* operands */
static int nvars;		/* number of vars[] in actual use */
static double consts[MAX_OPX];	/* constants */
static int nconsts;		/* number of consts[] in actual use */

/* these are global just for easy/rapid access */
static int parens_nest;	/* to check that parens end up nested */
static char *err_msg;   /* caller provides storage; we point at it with this */
static char *cexpr, *lcexpr; /* pointers that move along caller's expression */

/* compile the given c-style expression.
 * return 0 if ok, else return -1 and a reason message in errbuf.
 */
int
compileExpr (char *exp, char *errbuf)
{
	/* init the globals.
	 * also delete any flogs used in the previous program.
	 */
	cexpr = exp;
	err_msg = errbuf;
	pc = program;
	nvars = nconsts = 0;
	parens_nest = 0;

	pc = program;
	if (compile(0) == ERR) {
	    (void) sprintf (err_msg + strlen(err_msg), " near `%.10s'", lcexpr);
	    return (-1);
	}
	if (pc == program) {
	    (void) sprintf (err_msg, "Null program");
	    return (-1);
	}
	*pc++ = HALT;
	return (0);
}

/* execute the expression previously compiled with compileExpr().
 * return 0 with *vp set to the answer if ok, else return -1 with a reason
 * why not message in errbuf.
 */
int
evalExpr (double *vp, char *errbuf)
{
	err_msg = errbuf;
	sp = stack + MAX_STACK;	/* grows towards lower addresses */
	pc = program;
	return (execute(vp));
}

/* set the value for an operand with the given name to the given value.
 * return 0 if found else -1.
 */
int
setOperand (char *name, double valu)
{
	int i;

	for (i = 0; i < nvars; i++) {
	    if (strcmp (name, vars[i].name) == 0) {
		vars[i].v = valu;
		vars[i].set = 1;
		return(0);
	    }
	}

	return (-1);
}

/* return 0 if all operands are set, else -1 */
int 
allOperandsSet ()
{
	int i;

	for (i = 0; i < nvars; i++)
	    if (!vars[i].set)
		return (-1);
	return (0);
}

/* return a malloced array of each operand name.
 * N.B. caller must free array, and not modify names.
 */
int
getAllOperands (char ***ops)
{
	int i;

	*ops = (char **) malloc (nvars * sizeof(char *));

	for (i = 0; i < nvars; i++)
	    (*ops)[i] = vars[i].name;

	return (nvars);
}

/* return a malloced array of each initialized operand name.
 * N.B. caller must free array, and not modify the names.
 */
int
getSetOperands (char ***ops)
{
	int i, n;

	*ops = (char **) malloc (nvars * sizeof(char *));

	for (n = i = 0; i < nvars; i++)
	    if (vars[i].set)
		(*ops)[n++] = vars[i].name;

	return (n);
}

/* return a malloced array of each uninitialized operand name.
 * N.B. caller must free array, and not modify the names.
 */
int
getUnsetOperands (char ***ops)
{
	int i, n;

	*ops = (char **) malloc (nvars * sizeof(char *));

	for (n = i = 0; i < nvars; i++)
	    if (!vars[i].set)
		(*ops)[n++] = vars[i].name;

	return (n);
}

/* called when each different field is written.
 * this is just called by srch_log() to hide the fact from users of srch*
 * that srch is really using our vars array to store values.
 * since this gets called for all fields, it's not an error to not find name.
 * don't stop when see the first one because a term might appear more than once.
 */
void
compiler_log (name, value)
char *name;
double value;
{
	Var *vp;

	for (vp = vars; vp < &vars[nvars]; vp++)
	    if (vp->name && strcmp (vp->name, name) == 0)
		vp->v = value;
}

/* get and return the opcode corresponding to the next token.
 * leave with lcexpr pointing at the new token, cexpr just after it.
 * also watch for mismatches parens and proper operator/operand alternation.
 */
static int
next_token ()
{
	static const char toomv[] = "More than %d variables";
	static const char toomc[] = "More than %d constants";
	static const char badop[] = "Illegal operator";
	int tok = ERR;	/* just something illegal */
	char c;

	while (isspace(c = *cexpr))
	    cexpr++;
	lcexpr = cexpr++;

	/* mainly check for a binary operator */
	switch (c) {
	case ',': tok = COMMA; break;
	case '\0': --cexpr; tok = HALT; break; /* keep returning HALT */
	case '+': tok = ADD; break; /* compiler knows when it's really unary */
	case '-': tok = SUB; break; /* compiler knows when it's really negate */
	case '*': tok = MULT; break;
	case '/': tok = DIV; break;
	case '(': parens_nest++; tok = LPAREN; break;
	case ')':
	    if (--parens_nest < 0) {
	        (void) sprintf (err_msg, "Too many right parens");
		return (ERR);
	    } else
		tok = RPAREN;
	    break;
	case '|':
	    if (*cexpr == '|') { cexpr++; tok = OR; }
            else { (void) strcpy (err_msg, badop); return (ERR); }
	    break;
	case '&':
	    if (*cexpr == '&') { cexpr++; tok = AND; }
            else { (void) strcpy (err_msg, badop); return (ERR); }
	    break;
	case '=':
	    if (*cexpr == '=') { cexpr++; tok = EQ; }
            else { (void) strcpy (err_msg, badop); return (ERR); }
	    break;
	case '!':
	    if (*cexpr == '=') { cexpr++; tok = NE; } else { tok = NOT; }
	    break;
	case '<':
	    if (*cexpr == '=') { cexpr++; tok = LE; }
	    else tok = LT;
	    break;
	case '>':
	    if (*cexpr == '=') { cexpr++; tok = GE; }
	    else tok = GT;
	    break;
	}

	if (tok != ERR)
	    return (tok);

	/* not op so check for a constant, variable or function */
	if (isdigit(c) || c == '.') {
	    /* looks like a constant.
	     * leading +- already handled
	     */
	    if (nconsts > MAX_OPX) {
		(void) sprintf (err_msg, toomc, MAX_OPX);
		return (ERR);
	    }
	    consts[nconsts] = atof (lcexpr);
	    tok = CONST | (nconsts++ << OP_SHIFT);
	    skip_double();
	} else if (isalpha(c)) {
	    /* check list of functions */
	    tok = chk_funcs();
	    if (tok == ERR) {
		(void) sprintf (err_msg, "Bad function");
		return (ERR);
	    }
	} else if (c == '"') {
	    /* a variable */
	    if (nvars > MAX_OPX) {
		(void) sprintf (err_msg, toomv, MAX_OPX);
		return (ERR);
	    }
	    if (parse_fieldname (vars[nvars].name, MAXFLDLEN) < 0) {
		(void) sprintf (err_msg, "Bad field");
		return (ERR);
	    } else
		tok = VAR | (nvars++ << OP_SHIFT);
	}

	if (tok != ERR)
	    return (tok);

	/* what the heck is it? */
	(void) sprintf (err_msg, "Syntax error");
	return (ERR);
}

/* return funtion token, else ERR.
 * if find one, update cexpr too.
 */
static int
chk_funcs()
{
	static struct {
	    char *st_name;
	    int st_tok;
	} symtab[] = {
	     /* be sure to put short names AFTER longer ones.
	      * otherwise, order does not matter.
	      */
	    {"abs", ABS},	{"floor", FLOOR},	{"acos", ACOS},
	    {"asin", ASIN},	{"atan2", ATAN2},	{"atan", ATAN},
	    {"cos", COS},	{"degrad", DEGRAD},	{"exp", EXP},
	    {"log10", LOG10},	{"log", LOG},		{"pi", PITOK},
	    {"pow", POW},	{"raddeg", RADDEG},	{"sin", SIN},
	    {"sqrt", SQRT},	{"tan", TAN},
	};
	int i;

	for (i = 0; i < sizeof(symtab)/sizeof(symtab[0]); i++) {
	    int l = strlen (symtab[i].st_name);
	    if (strncmp (lcexpr, symtab[i].st_name, l) == 0) {
		cexpr += l-1;
		return (symtab[i].st_tok);
	    }
	}
	return (ERR);
}

/* move cexpr on past a double.
 * allow sci notation.
 * no need to worry about a leading '-' or '+' but allow them after an 'e'.
 * TODO: this handles all the desired cases, but also admits a bit too much
 *   such as things like 1eee2...3. geeze; to skip a double right you almost
 *   have to go ahead and crack it!
 */
static void
skip_double()
{
	int sawe = 0;	/* so we can allow '-' or '+' right after an 'e' */

	for (;;) {
	    char c = *cexpr;
	    if (isdigit(c) || c=='.' || (sawe && (c=='-' || c=='+'))) {
		sawe = 0;
		cexpr++;
	    } else if (c == 'e') {
		sawe = 1;
		cexpr++;
	    } else
		break;
	}
}

/* call this whenever you want to dig out the next (sub)expression.
 * keep compiling instructions as long as the operators are higher precedence
 * than prec (or until see HALT, COMMA or RPAREN) then return that
 * "look-ahead" token.
 * if error, fill in a message in err_msg[] and return ERR.
 */
static int
compile (prec)
int prec;
{
	int expect_binop = 0;	/* set after we have seen any operand.
				 * used by SUB so it can tell if it really 
				 * should be taken to be a NEG instead.
				 */
	int tok = next_token ();
	int *oldpc;

	for (;;) {
	    int p;
	    if (tok == ERR)
		return (ERR);
	    if (pc - program >= MAX_PROG) {
		(void) sprintf (err_msg, "Program is too long");
		return (ERR);
	    }

	    /* check for special things like functions, constants and parens */
            switch (tok & OP_MASK) {
	    case COMMA: return (tok);
            case HALT: return (tok);
	    case ADD:
		if (expect_binop)
		    break;	/* procede with binary addition */
		/* just skip a unary positive(?) */
		tok = next_token();
		if (tok == HALT) {
		    (void) sprintf (err_msg, "Term expected after unary +");
		    return (ERR);
		}
		continue;
	    case SUB:
		if (expect_binop)
		    break;	/* procede with binary subtract */
		oldpc = pc;
		tok = compile (UNI_PREC);
		if (oldpc == pc) {
		    (void) sprintf (err_msg, "Term expected after unary -");
		    return (ERR);
		}
		*pc++ = NEG;
		expect_binop = 1;
		continue;
	    case NOT:
		oldpc = pc;
		tok = compile (UNI_PREC);
		if (oldpc == pc) {
		    (void) sprintf (err_msg, "Term expected after unary !");
		    return (ERR);
		}
		*pc++ = NOT;
		expect_binop = 1;
		continue;
	    /* one-arg functions */
            case ABS: case FLOOR: case SIN: case COS: case TAN: case ASIN:
	    case ACOS: case ATAN: case DEGRAD: case RADDEG: case LOG:
	    case LOG10: case EXP: case SQRT:
		/* eat up the function's parenthesized argument */
		if (next_token() != LPAREN) {
		    (void) sprintf (err_msg, "expecting '(' after function");
		    return (ERR);
		}
		oldpc = pc;
		if (compile (0) != RPAREN || oldpc == pc) {
		    (void) sprintf (err_msg, "1-arg function arglist error");
		    return (ERR);
		}
		*pc++ = tok;
		tok = next_token();
		expect_binop = 1;
		continue;
	    /* two-arg functions */
	    case POW: case ATAN2:
		/* eat up the function's parenthesized arguments */
		if (next_token() != LPAREN) {
		    (void) sprintf (err_msg, "Saw a built-in function: expecting (");
		    return (ERR);
		}
		oldpc = pc;
		if (compile (0) != COMMA || oldpc == pc) {
		    (void) sprintf (err_msg, "1st of 2-arg function arglist error");
		    return (ERR);
		}
		oldpc = pc;
		if (compile (0) != RPAREN || oldpc == pc) {
		    (void) sprintf (err_msg, "2nd of 2-arg function arglist error");
		    return (ERR);
		}
		*pc++ = tok;
		tok = next_token();
		expect_binop = 1;
		continue;
	    /* constants and variables are just like 0-arg functions w/o ()'s */
            case CONST:
	    case PITOK:
	    case VAR:
		*pc++ = tok;
		tok = next_token();
		expect_binop = 1;
		continue;
            case LPAREN:
		oldpc = pc;
		if (compile (0) != RPAREN) {
		    (void) sprintf (err_msg, "Unmatched left paren");
		    return (ERR);
		}
		if (oldpc == pc) {
		    (void) sprintf (err_msg, "Null expression");
		    return (ERR);
		}
		tok = next_token();
		expect_binop = 1;
		continue;
            case RPAREN:
		return (RPAREN);
            }

	    /* everything else is a binary operator */
	    if (tok < ADD || tok > LE) {
		printf ("Bug! Bogus token: %d\n", tok);
		abort();
	    }
	    p = precedence[tok];
            if (p > prec) {
                int newtok;
		oldpc = pc;
                newtok = compile (p);
		if (newtok == ERR)
		    return (ERR);
		if (oldpc == pc) {
		    (void) strcpy (err_msg, "Term or factor expected");
		    return (ERR);
		}
                *pc++ = tok;
		expect_binop = 1;
                tok = newtok;
            } else
                return (tok);
        }
}

/* "run" the program[] compiled with compile().
 * if ok, return 0 and the final result,
 * else return -1 with a reason why not message in err_msg.
 */
static int
execute(result)
double *result;
{
	int instr; 

	do {
	    instr = *pc++;
	    switch (instr & OP_MASK) {
	    /* put these in numberic order so hopefully even the dumbest
	     * compiler will choose to use a jump table, not a cascade of ifs.
	     */
	    case HALT:	break;	/* outer loop will stop us */
	    case ADD:	sp[1] = sp[1] +  sp[0]; sp++; break;
	    case SUB:	sp[1] = sp[1] -  sp[0]; sp++; break;
	    case MULT:	sp[1] = sp[1] *  sp[0]; sp++; break;
	    case DIV:	sp[1] = sp[1] /  sp[0]; sp++; break;
	    case AND:	sp[1] = sp[1] && sp[0] ? 1 : 0; sp++; break;
	    case OR:	sp[1] = sp[1] || sp[0] ? 1 : 0; sp++; break;
	    case GT:	sp[1] = sp[1] >  sp[0] ? 1 : 0; sp++; break;
	    case GE:	sp[1] = sp[1] >= sp[0] ? 1 : 0; sp++; break;
	    case EQ:	sp[1] = sp[1] == sp[0] ? 1 : 0; sp++; break;
	    case NE:	sp[1] = sp[1] != sp[0] ? 1 : 0; sp++; break;
	    case LT:	sp[1] = sp[1] <  sp[0] ? 1 : 0; sp++; break;
	    case LE:	sp[1] = sp[1] <= sp[0] ? 1 : 0; sp++; break;
	    case NEG:	*sp = -*sp; break;
	    case NOT:	*sp = (double)(*sp==0); break;
	    case CONST:	*--sp = consts[instr >> OP_SHIFT]; break;
	    case VAR:	*--sp = vars[instr>>OP_SHIFT].v; break;
	    case PITOK:	*--sp = 4.0*atan(1.0); break;
	    case ABS:	*sp = fabs (*sp); break;
	    case FLOOR:	*sp = floor (*sp); break;
	    case SIN:	*sp = sin (*sp); break;
	    case COS:	*sp = cos (*sp); break;
	    case TAN:	*sp = tan (*sp); break;
	    case ASIN:	*sp = asin (*sp); break;
	    case ACOS:	*sp = acos (*sp); break;
	    case ATAN:	*sp = atan (*sp); break;
	    case DEGRAD:*sp *= atan(1.0)/45.0; break;
	    case RADDEG:*sp *= 45.0/atan(1.0); break;
	    case LOG:	*sp = log (*sp); break;
	    case LOG10:	*sp = log10 (*sp); break;
	    case EXP:	*sp = exp (*sp); break;
	    case SQRT:	*sp = sqrt (*sp); break;
	    case POW:	sp[1] = pow (sp[1], sp[0]); sp++; break;
	    case ATAN2:	sp[1] = atan2 (sp[1], sp[0]); sp++; break;
	    default:
		(void) sprintf (err_msg, "Bug! bad opcode: 0x%x", instr);
		return (-1);
	    }
	    if (sp < stack) {
		(void) sprintf (err_msg, "Runtime stack overflow");
		return (-1);
	    } else if (sp - stack > MAX_STACK) {
		(void) sprintf (err_msg, "Bug! runtime stack underflow");
		return (-1);
	    }
	} while (instr != HALT);

	/* result should now be on top of stack */
	if (sp != &stack[MAX_STACK - 1]) {
        (void) sprintf (err_msg, "Bug! stack has %ld items",
							MAX_STACK - (sp-stack));
	    return (-1);
	}
	*result = *sp;
	return (0);
}

/* starting with lcexpr pointing at a string expected to be a field name,
 * ie, at a '"', fill into up to the next '"' into name[], including trailing 0.
 * if there IS no '"' within len-1 chars, return -1, else 0.
 * the only sanity check is the string contains two dots.
 * when return, leave lcexpr alone but move cexpr to just after the second '"'.
 */
static int
parse_fieldname (name, len)
char name[];
int len;
{
	char c = '\0';
	int ndots = 0;

	cexpr = lcexpr + 1;
	while (--len > 0 && (c = *cexpr++) != '"' && c) {
	    *name++ = c;
	    ndots += c == '.';
	}
	if (len == 0 || c != '"' || ndots != 2)
	    return (-1);
	*name = '\0';
	return (0);
}

/* For RCS Only -- Do Not Edit */
static char *rcsid[2] = {(char *)rcsid, "@(#) $RCSfile: compiler.c,v $ $Date: 2005/11/21 21:33:32 $ $Revision: 1.1 $ $Name:  $"};