1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301
|
-----------------------------------------------------------------------------
-- |
-- Module : Language.Haskell.ParseMonad
-- Copyright : (c) The GHC Team, 1997-2000
-- License : BSD-3-Clause
--
-- Maintainer : Andreas Abel
-- Stability : stable
-- Portability : portable
--
-- Monads for the Haskell parser and lexer.
--
-----------------------------------------------------------------------------
{-# LANGUAGE CPP #-}
#if __GLASGOW_HASKELL__ >= 902
{-# OPTIONS_GHC -fno-warn-incomplete-uni-patterns #-}
#endif
module Language.Haskell.ParseMonad(
-- * Parsing
P, ParseResult(..), atSrcLoc, LexContext(..),
ParseMode(..), defaultParseMode,
runParserWithMode, runParser,
getSrcLoc, pushCurrentContext, popContext,
-- * Lexing
Lex(runL), getInput, discard, lexNewline, lexTab, lexWhile,
alternative, checkBOL, setBOL, startToken, getOffside,
pushContextL, popContextL
) where
import Control.Applicative as App
import Control.Monad (ap, liftM)
import qualified Control.Monad.Fail as Fail
import Data.Semigroup as Semi
import Language.Haskell.Syntax (SrcLoc (..))
-- | The result of a parse.
data ParseResult a
= ParseOk a -- ^ The parse succeeded, yielding a value.
| ParseFailed SrcLoc String
-- ^ The parse failed at the specified
-- source location, with an error message.
deriving Show
instance Functor ParseResult where
fmap f (ParseOk x) = ParseOk $ f x
fmap _ (ParseFailed loc msg) = ParseFailed loc msg
instance App.Applicative ParseResult where
pure = ParseOk
ParseOk f <*> x = f <$> x
ParseFailed loc msg <*> _ = ParseFailed loc msg
instance Monad ParseResult where
return = pure
ParseOk x >>= f = f x
ParseFailed loc msg >>= _ = ParseFailed loc msg
-- TODO: relax constraint to 'Semigroup s => Semigroup (ParseResult
-- s)' in the long distant future
-- | @since 1.0.3.0
instance Monoid m => Semi.Semigroup (ParseResult m) where
ParseOk x <> ParseOk y = ParseOk $ x `mappend` y
ParseOk _ <> err = err
err <> _ = err -- left-biased
instance Monoid m => Monoid (ParseResult m) where
mempty = ParseOk mempty
mappend = (<>)
-- internal version
data ParseStatus a = Ok ParseState a | Failed SrcLoc String
deriving Show
data LexContext = NoLayout | Layout Int
deriving (Eq,Ord,Show)
type ParseState = [LexContext]
indentOfParseState :: ParseState -> Int
indentOfParseState (Layout n:_) = n
indentOfParseState _ = 0
-- | Static parameters governing a parse.
-- More to come later, e.g. literate mode, language extensions.
data ParseMode = ParseMode {
-- | original name of the file being parsed
parseFilename :: String
}
-- | Default parameters for a parse,
-- currently just a marker for an unknown filename.
defaultParseMode :: ParseMode
defaultParseMode = ParseMode {
parseFilename = "<unknown>"
}
-- | Monad for parsing
newtype P a = P { runP ::
String -- input string
-> Int -- current column
-> Int -- current line
-> SrcLoc -- location of last token read
-> ParseState -- layout info.
-> ParseMode -- parse parameters
-> ParseStatus a
}
runParserWithMode :: ParseMode -> P a -> String -> ParseResult a
runParserWithMode mode (P m) s = case m s 0 1 start [] mode of
Ok _ a -> ParseOk a
Failed loc msg -> ParseFailed loc msg
where start = SrcLoc {
srcFilename = parseFilename mode,
srcLine = 1,
srcColumn = 1
}
runParser :: P a -> String -> ParseResult a
runParser = runParserWithMode defaultParseMode
-- | @since 1.0.2.0
instance Functor P where
fmap = liftM
-- | @since 1.0.2.0
instance Applicative P where
pure a = P $ \_i _x _y _l s _m -> Ok s a
(<*>) = ap
instance Monad P where
return = pure
P m >>= k = P $ \i x y l s mode ->
case m i x y l s mode of
Failed loc msg -> Failed loc msg
Ok s' a -> runP (k a) i x y l s' mode
#if !(MIN_VERSION_base(4,13,0))
fail = Fail.fail
#endif
-- | @since 1.0.3.0
instance Fail.MonadFail P where
fail s = P $ \_r _col _line loc _stk _m -> Failed loc s
atSrcLoc :: P a -> SrcLoc -> P a
P m `atSrcLoc` loc = P $ \i x y _l -> m i x y loc
getSrcLoc :: P SrcLoc
getSrcLoc = P $ \_i _x _y l s _m -> Ok s l
-- Enter a new layout context. If we are already in a layout context,
-- ensure that the new indent is greater than the indent of that context.
-- (So if the source loc is not to the right of the current indent, an
-- empty list {} will be inserted.)
pushCurrentContext :: P ()
pushCurrentContext = do
loc <- getSrcLoc
indent <- currentIndent
pushContext (Layout (max (indent+1) (srcColumn loc)))
currentIndent :: P Int
currentIndent = P $ \_r _x _y _loc stk _mode -> Ok stk (indentOfParseState stk)
pushContext :: LexContext -> P ()
pushContext ctxt =
--trace ("pushing lexical scope: " ++ show ctxt ++"\n") $
P $ \_i _x _y _l s _m -> Ok (ctxt:s) ()
popContext :: P ()
popContext = P $ \_i _x _y _l stk _m ->
case stk of
(_:s) -> --trace ("popping lexical scope, context now "++show s ++ "\n") $
Ok s ()
[] -> error "Internal error: empty context in popContext"
-- Monad for lexical analysis:
-- a continuation-passing version of the parsing monad
newtype Lex r a = Lex { runL :: (a -> P r) -> P r }
-- | @since 1.0.2.0
instance Functor (Lex r) where
fmap = liftM
-- | @since 1.0.2.0
instance Applicative (Lex r) where
pure a = Lex $ \k -> k a
(<*>) = ap
Lex v *> Lex w = Lex $ \k -> v (\_ -> w k)
instance Monad (Lex r) where
return = pure
Lex v >>= f = Lex $ \k -> v (\a -> runL (f a) k)
(>>) = (*>)
#if !(MIN_VERSION_base(4,13,0))
fail = Fail.fail
#endif
-- | @since 1.0.3.0
instance Fail.MonadFail (Lex r) where
fail s = Lex $ \_ -> Fail.fail s
-- Operations on this monad
getInput :: Lex r String
getInput = Lex $ \cont -> P $ \r -> runP (cont r) r
-- | Discard some input characters (these must not include tabs or newlines).
discard :: Int -> Lex r ()
discard n = Lex $ \cont -> P $ \r x -> runP (cont ()) (drop n r) (x+n)
-- | Discard the next character, which must be a newline.
lexNewline :: Lex a ()
lexNewline = Lex $ \cont -> P $ \(_:r) _x y -> runP (cont ()) r 1 (y+1)
-- | Discard the next character, which must be a tab.
lexTab :: Lex a ()
lexTab = Lex $ \cont -> P $ \(_:r) x -> runP (cont ()) r (nextTab x)
nextTab :: Int -> Int
nextTab x = x + (tAB_LENGTH - (x-1) `mod` tAB_LENGTH)
tAB_LENGTH :: Int
tAB_LENGTH = 8
-- Consume and return the largest string of characters satisfying p
lexWhile :: (Char -> Bool) -> Lex a String
lexWhile p = Lex $ \cont -> P $ \r x ->
let (cs,rest) = span p r in
runP (cont cs) rest (x + length cs)
-- An alternative scan, to which we can return if subsequent scanning
-- is unsuccessful.
alternative :: Lex a v -> Lex a (Lex a v)
alternative (Lex v) = Lex $ \cont -> P $ \r x y ->
runP (cont (Lex $ \cont' -> P $ \_r _x _y ->
runP (v cont') r x y)) r x y
-- The source location is the coordinates of the previous token,
-- or, while scanning a token, the start of the current token.
-- col is the current column in the source file.
-- We also need to remember between scanning tokens whether we are
-- somewhere at the beginning of the line before the first token.
-- This could be done with an extra Bool argument to the P monad,
-- but as a hack we use a col value of 0 to indicate this situation.
-- Setting col to 0 is used in two places: just after emitting a virtual
-- close brace due to layout, so that next time through we check whether
-- we also need to emit a semi-colon, and at the beginning of the file,
-- by runParser, to kick off the lexer.
-- Thus when col is zero, the true column can be taken from the loc.
checkBOL :: Lex a Bool
checkBOL = Lex $ \cont -> P $ \r x y loc ->
if x == 0 then runP (cont True) r (srcColumn loc) y loc
else runP (cont False) r x y loc
setBOL :: Lex a ()
setBOL = Lex $ \cont -> P $ \r _ -> runP (cont ()) r 0
-- Set the loc to the current position
startToken :: Lex a ()
startToken = Lex $ \cont -> P $ \s x y _ stk mode ->
let loc = SrcLoc {
srcFilename = parseFilename mode,
srcLine = y,
srcColumn = x
} in
runP (cont ()) s x y loc stk mode
-- Current status with respect to the offside (layout) rule:
-- LT: we are to the left of the current indent (if any)
-- EQ: we are at the current indent (if any)
-- GT: we are to the right of the current indent, or not subject to layout
getOffside :: Lex a Ordering
getOffside = Lex $ \cont -> P $ \r x y loc stk ->
runP (cont (compare x (indentOfParseState stk))) r x y loc stk
pushContextL :: LexContext -> Lex a ()
pushContextL ctxt = Lex $ \cont -> P $ \r x y loc stk ->
runP (cont ()) r x y loc (ctxt:stk)
popContextL :: String -> Lex a ()
popContextL fn = Lex $ \cont -> P $ \r x y loc stk -> case stk of
(_:ctxt) -> runP (cont ()) r x y loc ctxt
[] -> error ("Internal error: empty context in " ++ fn)
|