1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164
|
// $Id$
using System;
using System.Text;
using System.IO;
namespace Lisp {
public class Lexer {
private StreamReader stream;
private char[] buffer;
private char c;
int bufpos;
int buflen;
public class EOFException : Exception {
};
public enum TokenType {
EOF,
OPEN_PAREN,
CLOSE_PAREN,
SYMBOL,
STRING,
INTEGER,
REAL,
TRUE,
FALSE
};
private StringBuilder TokenStringBuilder;
public string TokenString {
get { return TokenStringBuilder.ToString(); }
}
public int LineNumber;
public Lexer(StreamReader stream) {
this.stream = stream;
buffer = new char[1025];
NextChar();
}
public TokenType GetNextToken() {
try {
while(Char.IsWhiteSpace(c)) {
NextChar();
if(c == '\n')
LineNumber++;
}
TokenStringBuilder = new StringBuilder();
switch(c) {
case ';': // comment
while(true) {
NextChar();
if(c == '\n') {
LineNumber++;
break;
}
}
NextChar();
return GetNextToken();
case '(':
NextChar();
return TokenType.OPEN_PAREN;
case ')':
NextChar();
return TokenType.CLOSE_PAREN;
case '"': { // string
int startline = LineNumber;
while(true) {
NextChar();
if(c == '"')
break;
if(c == '\\') {
NextChar();
switch(c) {
case 'n':
c = '\n';
break;
case 't':
c = '\t';
break;
}
}
TokenStringBuilder.Append(c);
}
NextChar();
return TokenType.STRING;
}
case '#': // constant
NextChar();
while(Char.IsLetterOrDigit(c) || c == '_') {
TokenStringBuilder.Append(c);
NextChar();
}
if(TokenString == "t")
return TokenType.TRUE;
if(TokenString == "f")
return TokenType.FALSE;
throw new Exception("Unknown constant '"
+ TokenString + "'");
default:
if(Char.IsDigit(c) || c == '-') {
bool have_nondigits = false;
bool have_digits = false;
int have_floating_point = 0;
do {
if(Char.IsDigit(c))
have_digits = true;
else if(c == '.')
have_floating_point++;
else if(Char.IsLetter(c) || c == '_')
have_nondigits = true;
TokenStringBuilder.Append(c);
NextChar();
} while(!Char.IsWhiteSpace(c) && c != '\"' && c != '('
&& c != ')' && c != ';');
if(have_nondigits || !have_digits
|| have_floating_point > 1)
return TokenType.SYMBOL;
else if(have_floating_point == 1)
return TokenType.REAL;
else
return TokenType.INTEGER;
} else {
do {
TokenStringBuilder.Append(c);
NextChar();
} while(!Char.IsWhiteSpace(c) && c != '\"' && c != '('
&& c != ')' && c != ';');
return TokenType.SYMBOL;
}
}
} catch(EOFException) {
return TokenType.EOF;
}
}
private void NextChar() {
if(bufpos >= buflen) {
if(!stream.BaseStream.CanRead)
throw new EOFException();
buflen = stream.Read(buffer, 0, 1024);
bufpos = 0;
// following hack appends an additional ' ' at the end of the file
// to avoid problems when parsing symbols/elements and a sudden EOF:
// This way we can avoid the need for an unget function.
if(!stream.BaseStream.CanRead) {
buffer[buflen] = ' ';
++buflen;
}
}
c = buffer[bufpos++];
}
}
}
|