File: ml.lex

package info (click to toggle)
mlton 20100608-2
  • links: PTS
  • area: main
  • in suites: squeeze
  • size: 34,980 kB
  • ctags: 69,089
  • sloc: ansic: 18,421; lisp: 2,879; makefile: 1,570; sh: 1,325; pascal: 256; asm: 97
file content (173 lines) | stat: -rw-r--r-- 9,216 bytes parent folder | download | duplicates (7)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
(* Copyright 1989 by AT&T Bell Laboratories *)
open ErrorMsg
type svalue = Tokens.svalue
type pos = int
type lexresult = (svalue,pos) Tokens.token
type lexarg = {comLevel : int ref, 
               lineNum : int ref,
               linePos : int list ref, (* offsets of lines in file *)
                charlist : string list ref,
                stringstart : int ref, (* start of current string or comment*)
               err : pos*pos -> ErrorMsg.severity -> string->unit}
type arg = lexarg
type ('a,'b) token = ('a,'b) Tokens.token
val eof = fn ({comLevel,err,linePos,stringstart,lineNum,charlist}:lexarg) => 
           let val pos = Integer.max(!stringstart+2, hd(!linePos))
            in if !comLevel>0 then err (!stringstart,pos) COMPLAIN
                                         "unclosed comment" 
                              else ();
               Tokens.EOF(pos,pos)
           end  
fun addString (charlist,s:string) = charlist := s :: (!charlist)
fun makeString charlist = (implode(rev(!charlist)) before charlist := nil)
fun makeHexInt sign s = let
      fun digit d = if (d < Ascii.uc_a) then (d - Ascii.zero)
            else (10 + (if (d < Ascii.lc_a) then (d - Ascii.uc_a) else (d - Ascii.lc_a)))
      in
        revfold (fn (c,a) => sign(a*16, digit(ord c))) (explode s) 0
      end
fun makeInt sign s =
    revfold (fn (c,a) => sign(a*10, ord c - Ascii.zero)) (explode s) 0
%% 
%s A S F;
%header (functor MLLexFun(structure Tokens : ML_TOKENS));
%arg ({comLevel,lineNum,err,linePos,charlist,stringstart});
idchars=[A-Za-z'_0-9];
id=[A-Za-z'_]{idchars}*;
ws=("\012"|[\t\ ])*;
sym=[!%&$+/:<=>?@~|#*`]|\\|\-|\^;
num=[0-9]+;
frac="."{num};
exp="E"(~?){num};
real=(~?)(({num}{frac}?{exp})|({num}{frac}{exp}?));
hexnum=[0-9a-fA-F]+;
%%
<INITIAL>{ws}   => (continue());
<INITIAL>\n     => (inc lineNum; linePos := yypos :: !linePos; continue());
<INITIAL>"*"    => (Tokens.ASTERISK(yypos,yypos+1));
<INITIAL>"|"    => (Tokens.BAR(yypos,yypos+1));
<INITIAL>":"    => (Tokens.COLON(yypos,yypos+1));
<INITIAL>"="    => (Tokens.EQUAL(yypos,yypos+1));
<INITIAL>"_"    => (Tokens.WILD(yypos,yypos+1));
<INITIAL>"#"    => (Tokens.HASH(yypos,yypos+1));
<INITIAL>","    => (Tokens.COMMA(yypos,yypos+1));
<INITIAL>"{"    => (Tokens.LBRACE(yypos,yypos+1));
<INITIAL>"}"    => (Tokens.RBRACE(yypos,yypos+1));
<INITIAL>"["    => (Tokens.LBRACKET(yypos,yypos+1));
<INITIAL>"]"    => (Tokens.RBRACKET(yypos,yypos+1));
<INITIAL>";"    => (Tokens.SEMICOLON(yypos,yypos+1));
<INITIAL>"("    => (Tokens.LPAREN(yypos,yypos+1));
<INITIAL>")"    => (Tokens.RPAREN(yypos,yypos+1));
<INITIAL>"and"  => (Tokens.AND(yypos,yypos+3));
<INITIAL>"abstraction"  => (Tokens.ABSTRACTION(yypos,yypos+11));
<INITIAL>"abstype"      => (Tokens.ABSTYPE(yypos,yypos+7));
<INITIAL>"->"           => (Tokens.ARROW(yypos,yypos+2));
<INITIAL>"as"           => (Tokens.AS(yypos,yypos+2));
<INITIAL>"case"         => (Tokens.CASE(yypos,yypos+4));
<INITIAL>"datatype"     => (Tokens.DATATYPE(yypos,yypos+8));
<INITIAL>"."            => (Tokens.DOT(yypos,yypos+1));
<INITIAL>"..."          => (Tokens.DOTDOTDOT(yypos,yypos+3));
<INITIAL>"else"         => (Tokens.ELSE(yypos,yypos+4));
<INITIAL>"end"          => (Tokens.END(yypos,yypos+3));
<INITIAL>"eqtype"       => (Tokens.EQTYPE(yypos,yypos+6));
<INITIAL>"exception"    => (Tokens.EXCEPTION(yypos,yypos+9));
<INITIAL>"do"           => (Tokens.DO(yypos,yypos+2));
<INITIAL>"=>"           => (Tokens.DARROW(yypos,yypos+2));
<INITIAL>"fn"           => (Tokens.FN(yypos,yypos+2));
<INITIAL>"fun"          => (Tokens.FUN(yypos,yypos+3));
<INITIAL>"functor"      => (Tokens.FUNCTOR(yypos,yypos+7));
<INITIAL>"handle"       => (Tokens.HANDLE(yypos,yypos+6));
<INITIAL>"if"           => (Tokens.IF(yypos,yypos+2));
<INITIAL>"in"           => (Tokens.IN(yypos,yypos+2));
<INITIAL>"include"      => (Tokens.INCLUDE(yypos,yypos+7));
<INITIAL>"infix"        => (Tokens.INFIX(yypos,yypos+5));
<INITIAL>"infixr"       => (Tokens.INFIXR(yypos,yypos+6));
<INITIAL>"let"          => (Tokens.LET(yypos,yypos+3));
<INITIAL>"local"        => (Tokens.LOCAL(yypos,yypos+5));
<INITIAL>"nonfix"       => (Tokens.NONFIX(yypos,yypos+6));
<INITIAL>"of"           => (Tokens.OF(yypos,yypos+2));
<INITIAL>"op"           => (Tokens.OP(yypos,yypos+2));
<INITIAL>"open"         => (Tokens.OPEN(yypos,yypos+4));
<INITIAL>"overload"     => (Tokens.OVERLOAD(yypos,yypos+8));
<INITIAL>"raise"        => (Tokens.RAISE(yypos,yypos+5));
<INITIAL>"rec"          => (Tokens.REC(yypos,yypos+3));
<INITIAL>"sharing"      => (Tokens.SHARING(yypos,yypos+7));
<INITIAL>"sig"          => (Tokens.SIG(yypos,yypos+3));
<INITIAL>"signature"    => (Tokens.SIGNATURE(yypos,yypos+9));
<INITIAL>"struct"       => (Tokens.STRUCT(yypos,yypos+6));
<INITIAL>"structure"    => (Tokens.STRUCTURE(yypos,yypos+9));
<INITIAL>"then"         => (Tokens.THEN(yypos,yypos+4));
<INITIAL>"type"         => (Tokens.TYPE(yypos,yypos+4));
<INITIAL>"val"          => (Tokens.VAL(yypos,yypos+3));
<INITIAL>"while"        => (Tokens.WHILE(yypos,yypos+5));
<INITIAL>"with"         => (Tokens.WITH(yypos,yypos+4));
<INITIAL>"withtype"     => (Tokens.WITHTYPE(yypos,yypos+8));
<INITIAL>"orelse"       => (Tokens.ORELSE(yypos,yypos+6));
<INITIAL>"andalso"      => (Tokens.ANDALSO(yypos,yypos+7));
<INITIAL>"import"       => (Tokens.IMPORT(yypos,yypos+6));
<INITIAL>"'"{idchars}*  => (Tokens.TYVAR(yytext, yypos, yypos+size yytext));
<INITIAL>({sym}+|{id})  => (Tokens.ID(yytext, yypos, yypos+size yytext));
<INITIAL>{real} => (Tokens.REAL(yytext,yypos,yypos+size yytext));
<INITIAL>[1-9][0-9]* => (Tokens.INT(makeInt (op +) yytext
                    handle Overflow => (err (yypos,yypos+size yytext)
                                          COMPLAIN "integer too large"; 1),
                        yypos,yypos+size yytext));
<INITIAL>{num}  => (Tokens.INT0(makeInt (op +) yytext
                    handle Overflow => (err (yypos,yypos+size yytext)
                                          COMPLAIN "integer too large"; 0),
                        yypos,yypos+size yytext));
<INITIAL>~{num} => (Tokens.INT0(makeInt (op -)
                                        (substring(yytext,1,size(yytext)-1))
                    handle Overflow => (err (yypos,yypos+size yytext)
                                         COMPLAIN "integer too large"; 0),
                        yypos,yypos+size yytext));
<INITIAL>"0x"{hexnum} => (
                    Tokens.INT0(makeHexInt (op +) (substring(yytext, 2, size(yytext)-2))
                        handle Overflow => (err (yypos,yypos+size yytext)
                                            COMPLAIN "integer too large"; 0),
                      yypos, yypos+size yytext));
<INITIAL>"~0x"{hexnum} => (
                    Tokens.INT0(makeHexInt (op -) (substring(yytext, 3, size(yytext)-3))
                        handle Overflow => (err (yypos,yypos+size yytext)
                                            COMPLAIN "integer too large"; 0),
                      yypos, yypos+size yytext));
<INITIAL>\"     => (charlist := [""]; stringstart := yypos;
                        YYBEGIN S; continue());
<INITIAL>"(*"   => (YYBEGIN A; stringstart := yypos; comLevel := 1; continue());
<INITIAL>\h     => (err (yypos,yypos) COMPLAIN "non-Ascii character"; continue());
<INITIAL>.      => (err (yypos,yypos) COMPLAIN "illegal token"; continue());
<A>"(*"         => (inc comLevel; continue());
<A>\n           => (inc lineNum; linePos := yypos :: !linePos; continue());
<A>"*)" => (dec comLevel; if !comLevel=0 then YYBEGIN INITIAL else (); continue());
<A>.            => (continue());
<S>\"           => (YYBEGIN INITIAL; Tokens.STRING(makeString charlist,
                                !stringstart,yypos+1));
<S>\n           => (err (!stringstart,yypos) COMPLAIN "unclosed string";
                    inc lineNum; linePos := yypos :: !linePos;
                    YYBEGIN INITIAL; Tokens.STRING(makeString charlist,!stringstart,yypos));
<S>[^"\\\n]*    => (addString(charlist,yytext); continue());
<S>\\\n         => (inc lineNum; linePos := yypos :: !linePos;
                    YYBEGIN F; continue());
<S>\\[\ \t]     => (YYBEGIN F; continue());
<F>\n           => (inc lineNum; linePos := yypos :: !linePos; continue());
<F>{ws}         => (continue());
<F>\\           => (YYBEGIN S; stringstart := yypos; continue());
<F>.            => (err (!stringstart,yypos) COMPLAIN "unclosed string"; 
                    YYBEGIN INITIAL; Tokens.STRING(makeString charlist,!stringstart,yypos+1));
<S>\\t          => (addString(charlist,"\t"); continue());
<S>\\n          => (addString(charlist,"\n"); continue());
<S>\\\\         => (addString(charlist,"\\"); continue());
<S>\\\"         => (addString(charlist,chr(Ascii.dquote)); continue());
<S>\\\^[@-_]    => (addString(charlist,chr(ordof(yytext,2)-ord("@"))); continue());
<S>\\[0-9]{3}   =>
 (let val x = ordof(yytext,1)*100
             +ordof(yytext,2)*10
             +ordof(yytext,3)
             -(Ascii.zero*111)
  in (if x>255
      then err (yypos,yypos+4) COMPLAIN "illegal ascii escape"
      else addString(charlist,chr x);
      continue())
  end);
<S>\\           => (err (yypos,yypos+1) COMPLAIN "illegal string escape"; 
                    continue());