File: simple-lexers.c

package info (click to toggle)
saml 970418-9
links: PTS
area: main
in suites: woody
size: 1,188 kB
ctags: 1,703
sloc: ansic: 17,186; sh: 2,573; yacc: 497; perl: 264; makefile: 242; python: 242
file content (179 lines) | stat: -rw-r--r-- 3,986 bytes
parent folder | download | duplicates (3)
/*
 * Copyright 1996 Thierry Bousch
 * Licensed under the Gnu Public License, Version 2
 *
 * $Id: simple-lexers.c,v 1.5 1996/09/15 16:03:56 bousch Exp $
 *
 * Two lexers, to read from a file or a region of memory.
 */

#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include "saml-parse.h"
#include "saml-util.h"

char *saml_token = NULL;
int (*saml_lexer)(void) = NULL;

static int toklen, tok_maxlen = 0;
static int saml_common_lexer (int (*getc)(void), void (*ungetc)(int));

/*
 * State and operations for the "file" lexer
 */
static FILE *flx_fd;

static int getc_fd (void)
{
	return getc(flx_fd);
}

static void ungetc_fd (int c)
{
	ungetc(c, flx_fd);
}

static int saml_lexer_fd (void)
{
	return saml_common_lexer(getc_fd, ungetc_fd);
}

void saml_init_lexer_fd (FILE *fd)
{
	flx_fd = fd;
	saml_lexer = saml_lexer_fd;
	tok_maxlen = saml_token ? 1+strlen(saml_token) : 0;
}

/*
 * State and operations for the "memory" lexer
 */
static unsigned char *mlx_start, *mlx_end, *mlx_curr;

static int getc_mem (void)
{
	if (mlx_curr < mlx_end)
		return (int)(*mlx_curr++);
	return EOF;
}

static void ungetc_mem (int c)
{
	if (c != EOF && mlx_curr > mlx_start)
		--mlx_curr;
}

static int saml_lexer_mem (void)
{
	return saml_common_lexer(getc_mem, ungetc_mem);
}

void saml_init_lexer_mem (const void *start, size_t length)
{
	mlx_start = mlx_curr = (unsigned char *) start;
	mlx_end = mlx_start + length;
	saml_lexer = saml_lexer_mem;
	tok_maxlen = saml_token ? 1+strlen(saml_token) : 0;
}

/*
 * The lexical analyser itself
 */
static void append_to_token (int c)
{
	if (toklen >= tok_maxlen) {
		tok_maxlen += (toklen + 16);
		saml_token = realloc(saml_token, tok_maxlen);
		if (!saml_token) panic_out_of_memory();
	}
	saml_token[toklen++] = c;
}

static int saml_common_lexer (int (*getc)(void), void (*ungetc)(int))
{
	int c, bracket;
top:
	c = (*getc)();
	switch(c) {
	    case EOF:
	    	/* End of file, return zero */
	    	return STOK_EOF;

	    case '\0': case '\r':
	    case '\n': case '\f':
	    case '\t': case '\v':
	    case ' ':
	    	/* Ignore whitespace */
	    	goto top;

	    case '.':
	    	/* Another symbol for multiplication */
	    	return '*';

	    case '0': case '1': case '2': case '3': case '4':
	    case '5': case '6': case '7': case '8': case '9':
	    	/* Number */
	    	for (toklen = 0; isdigit(c); c = (*getc)())
	    		append_to_token(c);
	    	/* Push back the first non-digit */
	    	(*ungetc)(c);
	    	append_to_token(0);
	    	return STOK_INTEGER;

	    case '[':
	    	/* FORM literal */
	    	toklen = 0;
		bracket = 1;
		append_to_token(c);
		while (bracket) {
			c = (*getc)();
			if (c == EOF)  break;
			if (c == '[')  ++bracket;
			if (c == ']')  --bracket;
			append_to_token(c);
		}
		append_to_token(0);
		return STOK_LITERAL;
		
	    case 'a': case 'b': case 'c': case 'd': case 'e':
	    case 'f': case 'g': case 'h': case 'i': case 'j':
	    case 'k': case 'l': case 'm': case 'n': case 'o':
	    case 'p': case 'q': case 'r': case 's': case 't':
	    case 'u': case 'v': case 'w': case 'x': case 'y':
	    case 'z': case '_':
	    case 'A': case 'B': case 'C': case 'D': case 'E':
	    case 'F': case 'G': case 'H': case 'I': case 'J':
	    case 'K': case 'L': case 'M': case 'N': case 'O':
	    case 'P': case 'Q': case 'R': case 'S': case 'T':
	    case 'U': case 'V': case 'W': case 'X': case 'Y':
	    case 'Z':
	    	/* Literal */
	    	toklen = 0;
next_char:
    		append_to_token(c);
		c = (*getc)();
		if (isascii(c) && (c == '_' || isalnum(c)))
			goto next_char;
		while (c == '[') {
			bracket = 1;
			append_to_token(c);
			while (bracket) {
				c = (*getc)();
				if (c == EOF)  break;
				if (c == '[')  ++bracket;
				if (c == ']')  --bracket;
				append_to_token(c);
			}
			/* Found the matching bracket or EOF */
			c = (*getc)();
		}
		/* Push back the character */
		(*ungetc)(c);
		append_to_token(0);
		return STOK_LITERAL;
	    default:
	    	/* Any other character */
	    	return c;
	}
}