1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171
|
using System;
using System.Text;
using System.Globalization;
namespace Monodoc.Ecma
{
public class EcmaUrlTokenizer : yyParser.yyInput
{
const char EndOfStream = (char)0;
string input;
object val;
int current_token;
int current_pos;
int real_current_pos;
int identCount = 0;
public EcmaUrlTokenizer (string input)
{
this.input = input;
}
static bool is_identifier_start_character (char c)
{
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || Char.IsLetter (c);
}
static bool is_identifier_part_character (char c)
{
if (c >= 'a' && c <= 'z')
return true;
if (c >= 'A' && c <= 'Z')
return true;
if (c == '_' || (c >= '0' && c <= '9'))
return true;
if (c < 0x80)
return false;
return Char.IsLetter (c) || Char.GetUnicodeCategory (c) == UnicodeCategory.ConnectorPunctuation;
}
public bool advance ()
{
return Peek () != EndOfStream;
}
public Object Value {
get {
return val;
}
}
public Object value ()
{
return val;
}
public int token ()
{
int token = xtoken ();
//Console.WriteLine ("Current token {0} with value {1}", token, val == null ? "(none)" : val.ToString ());
if (token == Token.ERROR) {
throw new Exception (string.Format ("Error at position {0} parsing url '{0}'", current_pos, input));
}
current_token = token;
return token;
}
int xtoken ()
{
char next = Read ();
while (char.IsWhiteSpace (next))
next = Read ();
current_pos++;
val = null;
switch (next) {
case ',':
return Token.COMMA;
case '.':
return Token.DOT;
case '{':
case '<':
return Token.OP_GENERICS_LT;
case '}':
case '>':
return Token.OP_GENERICS_GT;
case '`':
return Token.OP_GENERICS_BACKTICK;
case '(':
return Token.OP_OPEN_PAREN;
case ')':
return Token.OP_CLOSE_PAREN;
case '+':
return Token.INNER_TYPE_SEPARATOR;
case ':':
return Token.COLON;
case '/':
return Token.SLASH_SEPARATOR;
case '[':
return Token.OP_ARRAY_OPEN;
case ']':
return Token.OP_ARRAY_CLOSE;
case '*':
return Token.STAR;
case '&':
return Token.REF_ARG;
case '@':
return Token.OUT_ARG;
case '$':
return Token.EXPLICIT_IMPL_SEP;
default:
return TokenizeIdentifierOrNumber (next);
}
}
int TokenizeIdentifierOrNumber (char current)
{
// We must first return the expression type which is a uppercase letter and a colon
if (current_pos < 2) {
val = null;
return (int)current;
}
if (is_identifier_start_character (current) || current == '*') {
unsafe {
// identifier length is artificially limited to 1024 bytes by implementations
char* pIdent = stackalloc char[512];
*pIdent = current;
identCount = 1;
char peek;
while ((peek = Peek ()) != EndOfStream && is_identifier_part_character (peek)) {
*(pIdent + identCount) = Read ();
++current_pos;
++identCount;
}
val = new string ((char*)pIdent, 0, identCount);
return Token.IDENTIFIER;
}
} else if (char.IsDigit (current)) {
val = current - '0';
return Token.DIGIT;
} else {
val = null;
return Token.ERROR;
}
}
char Read ()
{
try {
return input[real_current_pos++];
} catch {
return EndOfStream;
}
}
char Peek ()
{
try {
return input[real_current_pos];
} catch {
return EndOfStream;
}
}
}
}
|