1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179
|
/*
* Copyright 1996 Thierry Bousch
* Licensed under the Gnu Public License, Version 2
*
* $Id: simple-lexers.c,v 1.5 1996/09/15 16:03:56 bousch Exp $
*
* Two lexers, to read from a file or a region of memory.
*/
#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include "saml-parse.h"
#include "saml-util.h"
char *saml_token = NULL;
int (*saml_lexer)(void) = NULL;
static int toklen, tok_maxlen = 0;
static int saml_common_lexer (int (*getc)(void), void (*ungetc)(int));
/*
* State and operations for the "file" lexer
*/
static FILE *flx_fd;
static int getc_fd (void)
{
return getc(flx_fd);
}
static void ungetc_fd (int c)
{
ungetc(c, flx_fd);
}
static int saml_lexer_fd (void)
{
return saml_common_lexer(getc_fd, ungetc_fd);
}
void saml_init_lexer_fd (FILE *fd)
{
flx_fd = fd;
saml_lexer = saml_lexer_fd;
tok_maxlen = saml_token ? 1+strlen(saml_token) : 0;
}
/*
* State and operations for the "memory" lexer
*/
static unsigned char *mlx_start, *mlx_end, *mlx_curr;
static int getc_mem (void)
{
if (mlx_curr < mlx_end)
return (int)(*mlx_curr++);
return EOF;
}
static void ungetc_mem (int c)
{
if (c != EOF && mlx_curr > mlx_start)
--mlx_curr;
}
static int saml_lexer_mem (void)
{
return saml_common_lexer(getc_mem, ungetc_mem);
}
void saml_init_lexer_mem (const void *start, size_t length)
{
mlx_start = mlx_curr = (unsigned char *) start;
mlx_end = mlx_start + length;
saml_lexer = saml_lexer_mem;
tok_maxlen = saml_token ? 1+strlen(saml_token) : 0;
}
/*
* The lexical analyser itself
*/
static void append_to_token (int c)
{
if (toklen >= tok_maxlen) {
tok_maxlen += (toklen + 16);
saml_token = realloc(saml_token, tok_maxlen);
if (!saml_token) panic_out_of_memory();
}
saml_token[toklen++] = c;
}
static int saml_common_lexer (int (*getc)(void), void (*ungetc)(int))
{
int c, bracket;
top:
c = (*getc)();
switch(c) {
case EOF:
/* End of file, return zero */
return STOK_EOF;
case '\0': case '\r':
case '\n': case '\f':
case '\t': case '\v':
case ' ':
/* Ignore whitespace */
goto top;
case '.':
/* Another symbol for multiplication */
return '*';
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
/* Number */
for (toklen = 0; isdigit(c); c = (*getc)())
append_to_token(c);
/* Push back the first non-digit */
(*ungetc)(c);
append_to_token(0);
return STOK_INTEGER;
case '[':
/* FORM literal */
toklen = 0;
bracket = 1;
append_to_token(c);
while (bracket) {
c = (*getc)();
if (c == EOF) break;
if (c == '[') ++bracket;
if (c == ']') --bracket;
append_to_token(c);
}
append_to_token(0);
return STOK_LITERAL;
case 'a': case 'b': case 'c': case 'd': case 'e':
case 'f': case 'g': case 'h': case 'i': case 'j':
case 'k': case 'l': case 'm': case 'n': case 'o':
case 'p': case 'q': case 'r': case 's': case 't':
case 'u': case 'v': case 'w': case 'x': case 'y':
case 'z': case '_':
case 'A': case 'B': case 'C': case 'D': case 'E':
case 'F': case 'G': case 'H': case 'I': case 'J':
case 'K': case 'L': case 'M': case 'N': case 'O':
case 'P': case 'Q': case 'R': case 'S': case 'T':
case 'U': case 'V': case 'W': case 'X': case 'Y':
case 'Z':
/* Literal */
toklen = 0;
next_char:
append_to_token(c);
c = (*getc)();
if (isascii(c) && (c == '_' || isalnum(c)))
goto next_char;
while (c == '[') {
bracket = 1;
append_to_token(c);
while (bracket) {
c = (*getc)();
if (c == EOF) break;
if (c == '[') ++bracket;
if (c == ']') --bracket;
append_to_token(c);
}
/* Found the matching bracket or EOF */
c = (*getc)();
}
/* Push back the character */
(*ungetc)(c);
append_to_token(0);
return STOK_LITERAL;
default:
/* Any other character */
return c;
}
}
|