1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175
|
--
-- Lexical syntax for Haskell 98.
--
-- (c) Simon Marlow 2003, with the caveat that much of this is
-- translated directly from the syntax in the Haskell 98 report.
--
-- This isn't a complete Haskell 98 lexer - it doesn't handle layout
-- for one thing. However, it could be adapted with a small
-- amount of effort.
--
{
module Main (main) where
import Data.Char (chr)
}
%wrapper "monad"
$whitechar = [ \t\n\r\f\v]
$special = [\(\)\,\;\[\]\`\{\}]
$ascdigit = 0-9
$unidigit = [] -- TODO
$digit = [$ascdigit $unidigit]
$ascsymbol = [\!\#\$\%\&\*\+\.\/\<\=\>\?\@\\\^\|\-\~]
$unisymbol = [] -- TODO
$symbol = [$ascsymbol $unisymbol] # [$special \_\:\"\']
$large = [A-Z \xc0-\xd6 \xd8-\xde]
$small = [a-z \xdf-\xf6 \xf8-\xff \_]
$alpha = [$small $large]
$graphic = [$small $large $symbol $digit $special \:\"\']
$octit = 0-7
$hexit = [0-9 A-F a-f]
$idchar = [$alpha $digit \']
$symchar = [$symbol \:]
$nl = [\n\r]
@reservedid =
as|case|class|data|default|deriving|do|else|hiding|if|
import|in|infix|infixl|infixr|instance|let|module|newtype|
of|qualified|then|type|where
@reservedop =
".." | ":" | "::" | "=" | \\ | "|" | "<-" | "->" | "@" | "~" | "=>"
@varid = $small $idchar*
@conid = $large $idchar*
@varsym = $symbol $symchar*
@consym = \: $symchar*
@decimal = $digit+
@octal = $octit+
@hexadecimal = $hexit+
@exponent = [eE] [\-\+] @decimal
$cntrl = [$large \@\[\\\]\^\_]
@ascii = \^ $cntrl | NUL | SOH | STX | ETX | EOT | ENQ | ACK
| BEL | BS | HT | LF | VT | FF | CR | SO | SI | DLE
| DC1 | DC2 | DC3 | DC4 | NAK | SYN | ETB | CAN | EM
| SUB | ESC | FS | GS | RS | US | SP | DEL
$charesc = [abfnrtv\\\"\'\&]
@escape = \\ ($charesc | @ascii | @decimal | o @octal | x @hexadecimal)
@gap = \\ $whitechar+ \\
@string = $graphic # [\"\\] | " " | @escape | @gap
haskell :-
<0> $white+ { skip }
<0> "--"\-*[^$symbol].* { skip }
"{-" { nested_comment }
<0> $special { mkL LSpecial }
<0> @reservedid { mkL LReservedId }
<0> @conid \. @varid { mkL LQVarId }
<0> @conid \. @conid { mkL LQConId }
<0> @varid { mkL LVarId }
<0> @conid { mkL LConId }
<0> @reservedop { mkL LReservedOp }
<0> @conid \. @varsym { mkL LVarSym }
<0> @conid \. @consym { mkL LConSym }
<0> @varsym { mkL LVarSym }
<0> @consym { mkL LConSym }
<0> @decimal
| 0[oO] @octal
| 0[xX] @hexadecimal { mkL LInteger }
<0> @decimal \. @decimal @exponent?
| @decimal @exponent { mkL LFloat }
<0> \' ($graphic # [\'\\] | " " | @escape) \'
{ mkL LChar }
<0> \" @string* \" { mkL LString }
{
data Lexeme = L AlexPosn LexemeClass String
data LexemeClass
= LInteger
| LFloat
| LChar
| LString
| LSpecial
| LReservedId
| LReservedOp
| LVarId
| LQVarId
| LConId
| LQConId
| LVarSym
| LQVarSym
| LConSym
| LQConSym
| LEOF
deriving Eq
mkL :: LexemeClass -> AlexInput -> Int -> Alex Lexeme
mkL c (p,_,_,str) len = return (L p c (take len str))
nested_comment :: AlexInput -> Int -> Alex Lexeme
nested_comment _ _ = do
input <- alexGetInput
go 1 input
where go 0 input = do alexSetInput input; alexMonadScan
go n input = do
case alexGetByte input of
Nothing -> err input
Just (c,input) -> do
case chr (fromIntegral c) of
'-' -> do
let temp = input
case alexGetByte input of
Nothing -> err input
Just (125,input) -> go (n-1) input
Just (45, input) -> go n temp
Just (c,input) -> go n input
'\123' -> do
case alexGetByte input of
Nothing -> err input
Just (c,input) | c == fromIntegral (ord '-') -> go (n+1) input
Just (c,input) -> go n input
c -> go n input
err input = do alexSetInput input; lexError "error in nested comment"
lexError s = do
(p,c,_,input) <- alexGetInput
alexError (showPosn p ++ ": " ++ s ++
(if (not (null input))
then " before " ++ show (head input)
else " at end of file"))
scanner str = runAlex str $ do
let loop i = do tok@(L _ cl _) <- alexMonadScan;
if cl == LEOF
then return i
else do loop $! (i+1)
loop 0
alexEOF = return (L undefined LEOF "")
showPosn (AlexPn _ line col) = show line ++ ':': show col
main = do
s <- getContents
print (scanner s)
}
|