File: ParseMonad.hs

package info (click to toggle)
haskell-haskell-src 1.0.4.1-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 204 kB
  • sloc: haskell: 1,741; makefile: 2
file content (301 lines) | stat: -rw-r--r-- 9,891 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
-----------------------------------------------------------------------------
-- |
-- Module      :  Language.Haskell.ParseMonad
-- Copyright   :  (c) The GHC Team, 1997-2000
-- License     :  BSD-3-Clause
--
-- Maintainer  :  Andreas Abel
-- Stability   :  stable
-- Portability :  portable
--
-- Monads for the Haskell parser and lexer.
--
-----------------------------------------------------------------------------

{-# LANGUAGE CPP #-}

#if __GLASGOW_HASKELL__ >= 902
{-# OPTIONS_GHC -fno-warn-incomplete-uni-patterns #-}
#endif

module Language.Haskell.ParseMonad(
                -- * Parsing
                P, ParseResult(..), atSrcLoc, LexContext(..),
                ParseMode(..), defaultParseMode,
                runParserWithMode, runParser,
                getSrcLoc, pushCurrentContext, popContext,
                -- * Lexing
                Lex(runL), getInput, discard, lexNewline, lexTab, lexWhile,
                alternative, checkBOL, setBOL, startToken, getOffside,
                pushContextL, popContextL
        ) where

import           Control.Applicative     as App
import           Control.Monad           (ap, liftM)
import qualified Control.Monad.Fail      as Fail
import           Data.Semigroup          as Semi
import           Language.Haskell.Syntax (SrcLoc (..))

-- | The result of a parse.
data ParseResult a
        = ParseOk a             -- ^ The parse succeeded, yielding a value.
        | ParseFailed SrcLoc String
                                -- ^ The parse failed at the specified
                                -- source location, with an error message.
        deriving Show

instance Functor ParseResult where
  fmap f (ParseOk x)           = ParseOk $ f x
  fmap _ (ParseFailed loc msg) = ParseFailed loc msg

instance App.Applicative ParseResult where
  pure = ParseOk
  ParseOk f           <*> x = f <$> x
  ParseFailed loc msg <*> _ = ParseFailed loc msg

instance Monad ParseResult where
  return = pure
  ParseOk x           >>= f = f x
  ParseFailed loc msg >>= _ = ParseFailed loc msg

-- TODO: relax constraint to 'Semigroup s => Semigroup (ParseResult
-- s)' in the long distant future

-- | @since 1.0.3.0
instance Monoid m => Semi.Semigroup (ParseResult m) where
  ParseOk x <> ParseOk y = ParseOk $ x `mappend` y
  ParseOk _ <> err       = err
  err       <> _         = err -- left-biased

instance Monoid m => Monoid (ParseResult m) where
  mempty = ParseOk mempty
  mappend = (<>)

-- internal version
data ParseStatus a = Ok ParseState a | Failed SrcLoc String
        deriving Show

data LexContext = NoLayout | Layout Int
        deriving (Eq,Ord,Show)

type ParseState = [LexContext]

indentOfParseState :: ParseState -> Int
indentOfParseState (Layout n:_) = n
indentOfParseState _            = 0

-- | Static parameters governing a parse.
-- More to come later, e.g. literate mode, language extensions.

data ParseMode = ParseMode {
                                -- | original name of the file being parsed
                parseFilename :: String
                }

-- | Default parameters for a parse,
-- currently just a marker for an unknown filename.

defaultParseMode :: ParseMode
defaultParseMode = ParseMode {
                parseFilename = "<unknown>"
                }

-- | Monad for parsing

newtype P a = P { runP ::
                        String          -- input string
                     -> Int             -- current column
                     -> Int             -- current line
                     -> SrcLoc          -- location of last token read
                     -> ParseState      -- layout info.
                     -> ParseMode       -- parse parameters
                     -> ParseStatus a
                }

runParserWithMode :: ParseMode -> P a -> String -> ParseResult a
runParserWithMode mode (P m) s = case m s 0 1 start [] mode of
        Ok _ a         -> ParseOk a
        Failed loc msg -> ParseFailed loc msg
    where start = SrcLoc {
                srcFilename = parseFilename mode,
                srcLine = 1,
                srcColumn = 1
        }

runParser :: P a -> String -> ParseResult a
runParser = runParserWithMode defaultParseMode

-- | @since 1.0.2.0
instance Functor P where
        fmap = liftM

-- | @since 1.0.2.0
instance Applicative P where
        pure a = P $ \_i _x _y _l s _m -> Ok s a
        (<*>) = ap

instance Monad P where
        return = pure
        P m >>= k = P $ \i x y l s mode ->
                case m i x y l s mode of
                    Failed loc msg -> Failed loc msg
                    Ok s' a        -> runP (k a) i x y l s' mode
#if !(MIN_VERSION_base(4,13,0))
        fail = Fail.fail
#endif

-- | @since 1.0.3.0
instance Fail.MonadFail P where
        fail s = P $ \_r _col _line loc _stk _m -> Failed loc s

atSrcLoc :: P a -> SrcLoc -> P a
P m `atSrcLoc` loc = P $ \i x y _l -> m i x y loc

getSrcLoc :: P SrcLoc
getSrcLoc = P $ \_i _x _y l s _m -> Ok s l

-- Enter a new layout context.  If we are already in a layout context,
-- ensure that the new indent is greater than the indent of that context.
-- (So if the source loc is not to the right of the current indent, an
-- empty list {} will be inserted.)

pushCurrentContext :: P ()
pushCurrentContext = do
        loc <- getSrcLoc
        indent <- currentIndent
        pushContext (Layout (max (indent+1) (srcColumn loc)))

currentIndent :: P Int
currentIndent = P $ \_r _x _y _loc stk _mode -> Ok stk (indentOfParseState stk)

pushContext :: LexContext -> P ()
pushContext ctxt =
--trace ("pushing lexical scope: " ++ show ctxt ++"\n") $
        P $ \_i _x _y _l s _m -> Ok (ctxt:s) ()

popContext :: P ()
popContext = P $ \_i _x _y _l stk _m ->
      case stk of
        (_:s) -> --trace ("popping lexical scope, context now "++show s ++ "\n") $
            Ok s ()
        []    -> error "Internal error: empty context in popContext"

-- Monad for lexical analysis:
-- a continuation-passing version of the parsing monad

newtype Lex r a = Lex { runL :: (a -> P r) -> P r }

-- | @since 1.0.2.0
instance Functor (Lex r) where
        fmap = liftM

-- | @since 1.0.2.0
instance Applicative (Lex r) where
        pure a = Lex $ \k -> k a
        (<*>) = ap
        Lex v *> Lex w = Lex $ \k -> v (\_ -> w k)

instance Monad (Lex r) where
        return = pure
        Lex v >>= f = Lex $ \k -> v (\a -> runL (f a) k)
        (>>) = (*>)
#if !(MIN_VERSION_base(4,13,0))
        fail = Fail.fail
#endif

-- | @since 1.0.3.0
instance Fail.MonadFail (Lex r) where
        fail s = Lex $ \_ -> Fail.fail s

-- Operations on this monad

getInput :: Lex r String
getInput = Lex $ \cont -> P $ \r -> runP (cont r) r

-- | Discard some input characters (these must not include tabs or newlines).

discard :: Int -> Lex r ()
discard n = Lex $ \cont -> P $ \r x -> runP (cont ()) (drop n r) (x+n)

-- | Discard the next character, which must be a newline.

lexNewline :: Lex a ()
lexNewline = Lex $ \cont -> P $ \(_:r) _x y -> runP (cont ()) r 1 (y+1)

-- | Discard the next character, which must be a tab.

lexTab :: Lex a ()
lexTab = Lex $ \cont -> P $ \(_:r) x -> runP (cont ()) r (nextTab x)

nextTab :: Int -> Int
nextTab x = x + (tAB_LENGTH - (x-1) `mod` tAB_LENGTH)

tAB_LENGTH :: Int
tAB_LENGTH = 8

-- Consume and return the largest string of characters satisfying p

lexWhile :: (Char -> Bool) -> Lex a String
lexWhile p = Lex $ \cont -> P $ \r x ->
        let (cs,rest) = span p r in
        runP (cont cs) rest (x + length cs)

-- An alternative scan, to which we can return if subsequent scanning
-- is unsuccessful.

alternative :: Lex a v -> Lex a (Lex a v)
alternative (Lex v) = Lex $ \cont -> P $ \r x y ->
        runP (cont (Lex $ \cont' -> P $ \_r _x _y ->
                runP (v cont') r x y)) r x y

-- The source location is the coordinates of the previous token,
-- or, while scanning a token, the start of the current token.

-- col is the current column in the source file.
-- We also need to remember between scanning tokens whether we are
-- somewhere at the beginning of the line before the first token.
-- This could be done with an extra Bool argument to the P monad,
-- but as a hack we use a col value of 0 to indicate this situation.

-- Setting col to 0 is used in two places: just after emitting a virtual
-- close brace due to layout, so that next time through we check whether
-- we also need to emit a semi-colon, and at the beginning of the file,
-- by runParser, to kick off the lexer.
-- Thus when col is zero, the true column can be taken from the loc.

checkBOL :: Lex a Bool
checkBOL = Lex $ \cont -> P $ \r x y loc ->
                if x == 0 then runP (cont True) r (srcColumn loc) y loc
                        else runP (cont False) r x y loc

setBOL :: Lex a ()
setBOL = Lex $ \cont -> P $ \r _ -> runP (cont ()) r 0

-- Set the loc to the current position

startToken :: Lex a ()
startToken = Lex $ \cont -> P $ \s x y _ stk mode ->
        let loc = SrcLoc {
                srcFilename = parseFilename mode,
                srcLine = y,
                srcColumn = x
        } in
        runP (cont ()) s x y loc stk mode

-- Current status with respect to the offside (layout) rule:
-- LT: we are to the left of the current indent (if any)
-- EQ: we are at the current indent (if any)
-- GT: we are to the right of the current indent, or not subject to layout

getOffside :: Lex a Ordering
getOffside = Lex $ \cont -> P $ \r x y loc stk ->
                runP (cont (compare x (indentOfParseState stk))) r x y loc stk

pushContextL :: LexContext -> Lex a ()
pushContextL ctxt = Lex $ \cont -> P $ \r x y loc stk ->
                runP (cont ()) r x y loc (ctxt:stk)

popContextL :: String -> Lex a ()
popContextL fn = Lex $ \cont -> P $ \r x y loc stk -> case stk of
                (_:ctxt) -> runP (cont ()) r x y loc ctxt
                []       -> error ("Internal error: empty context in " ++ fn)