File: haskell.x

package info (click to toggle)
alex 3.5.4.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 824 kB
  • sloc: haskell: 4,772; makefile: 148; yacc: 56; ansic: 4
file content (175 lines) | stat: -rw-r--r-- 4,384 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
--
-- Lexical syntax for Haskell 98.
--
-- (c) Simon Marlow 2003, with the caveat that much of this is
-- translated directly from the syntax in the Haskell 98 report.
--
-- This isn't a complete Haskell 98 lexer - it doesn't handle layout
-- for one thing.  However, it could be adapted with a small
-- amount of effort.
--

{
module Main (main) where
import Data.Char (chr)
}

%wrapper "monad"

$whitechar = [ \t\n\r\f\v]
$special   = [\(\)\,\;\[\]\`\{\}]

$ascdigit  = 0-9
$unidigit  = [] -- TODO
$digit     = [$ascdigit $unidigit]

$ascsymbol = [\!\#\$\%\&\*\+\.\/\<\=\>\?\@\\\^\|\-\~]
$unisymbol = [] -- TODO
$symbol    = [$ascsymbol $unisymbol] # [$special \_\:\"\']

$large     = [A-Z \xc0-\xd6 \xd8-\xde]
$small     = [a-z \xdf-\xf6 \xf8-\xff \_]
$alpha     = [$small $large]

$graphic   = [$small $large $symbol $digit $special \:\"\']

$octit	   = 0-7
$hexit     = [0-9 A-F a-f]
$idchar    = [$alpha $digit \']
$symchar   = [$symbol \:]
$nl        = [\n\r]

@reservedid = 
	as|case|class|data|default|deriving|do|else|hiding|if|
	import|in|infix|infixl|infixr|instance|let|module|newtype|
	of|qualified|then|type|where

@reservedop =
	".." | ":" | "::" | "=" | \\ | "|" | "<-" | "->" | "@" | "~" | "=>"

@varid  = $small $idchar*
@conid  = $large $idchar*
@varsym = $symbol $symchar*
@consym = \: $symchar*

@decimal     = $digit+
@octal       = $octit+
@hexadecimal = $hexit+
@exponent    = [eE] [\-\+] @decimal

$cntrl   = [$large \@\[\\\]\^\_]
@ascii   = \^ $cntrl | NUL | SOH | STX | ETX | EOT | ENQ | ACK
	 | BEL | BS | HT | LF | VT | FF | CR | SO | SI | DLE
	 | DC1 | DC2 | DC3 | DC4 | NAK | SYN | ETB | CAN | EM
	 | SUB | ESC | FS | GS | RS | US | SP | DEL
$charesc = [abfnrtv\\\"\'\&]
@escape  = \\ ($charesc | @ascii | @decimal | o @octal | x @hexadecimal)
@gap     = \\ $whitechar+ \\
@string  = $graphic # [\"\\] | " " | @escape | @gap

haskell :-

<0> $white+			{ skip }
<0> "--"\-*[^$symbol].*		{ skip }

"{-"				{ nested_comment }

<0> $special			{ mkL LSpecial }

<0> @reservedid			{ mkL LReservedId }
<0> @conid \. @varid		{ mkL LQVarId }
<0> @conid \. @conid		{ mkL LQConId }
<0> @varid			{ mkL LVarId }
<0> @conid			{ mkL LConId }

<0> @reservedop			{ mkL LReservedOp }
<0> @conid \. @varsym		{ mkL LVarSym }
<0> @conid \. @consym		{ mkL LConSym }
<0> @varsym			{ mkL LVarSym }
<0> @consym			{ mkL LConSym }

<0> @decimal 
  | 0[oO] @octal
  | 0[xX] @hexadecimal		{ mkL LInteger }

<0> @decimal \. @decimal @exponent?
  | @decimal @exponent		{ mkL LFloat }

<0> \' ($graphic # [\'\\] | " " | @escape) \'
				{ mkL LChar }

<0> \" @string* \"		{ mkL LString }

{
data Lexeme = L AlexPosn LexemeClass String

data LexemeClass
  = LInteger
  | LFloat
  | LChar
  | LString
  | LSpecial
  | LReservedId
  | LReservedOp
  | LVarId
  | LQVarId
  | LConId
  | LQConId
  | LVarSym
  | LQVarSym
  | LConSym
  | LQConSym
  | LEOF
  deriving Eq
  
mkL :: LexemeClass -> AlexInput -> Int -> Alex Lexeme
mkL c (p,_,_,str) len = return (L p c (take len str))

nested_comment :: AlexInput -> Int -> Alex Lexeme
nested_comment _ _ = do
  input <- alexGetInput
  go 1 input
  where go 0 input = do alexSetInput input; alexMonadScan
	go n input = do
          case alexGetByte input of
	    Nothing  -> err input
	    Just (c,input) -> do
              case chr (fromIntegral c) of
	    	'-' -> do
                  let temp = input
                  case alexGetByte input of
		    Nothing  -> err input
                    Just (125,input) -> go (n-1) input
                    Just (45, input) -> go n temp
                    Just (c,input)   -> go n input
	     	'\123' -> do
                  case alexGetByte input of
		    Nothing  -> err input
                    Just (c,input) | c == fromIntegral (ord '-') -> go (n+1) input
		    Just (c,input)   -> go n input
	    	c -> go n input

        err input = do alexSetInput input; lexError "error in nested comment"  

lexError s = do
  (p,c,_,input) <- alexGetInput
  alexError (showPosn p ++ ": " ++ s ++ 
		   (if (not (null input))
		     then " before " ++ show (head input)
		     else " at end of file"))

scanner str = runAlex str $ do
  let loop i = do tok@(L _ cl _) <- alexMonadScan; 
		  if cl == LEOF
			then return i
			else do loop $! (i+1)
  loop 0

alexEOF = return (L undefined LEOF "")

showPosn (AlexPn _ line col) = show line ++ ':': show col

main = do
  s <- getContents
  print (scanner s)
}