1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455
|
{-
GLR_Lib.lhs
$Id: GLR_Lib.lhs,v 1.5 2005/08/03 13:42:23 paulcc Exp $
-}
{-
Parser driver for the GLR parser.
(c) University of Durham, Ben Medlock 2001
-- initial code, for structure parsing
(c) University of Durham, Paul Callaghan 2004-05
-- extension to semantic rules
-- shifting to chart data structure
-- supporting hidden left recursion
-- many optimisations
-}
{- supplied by Happy
<> module XYZ (
<> lexer -- conditional
-}
-- probable, but might want to parametrise
, doParse
, TreeDecode(..), decode -- only for tree decode
, LabelDecode(..) -- only for label decode
-- standard exports
, Tokens
, GLRResult(..)
, NodeMap
, RootNode
, ForestId
, GSymbol(..)
, Branch(..)
, GSem(..)
)
where
#if !defined(__GLASGOW_HASKELL__)
# error This code isn't being built with GHC.
#endif
import Data.Char
import qualified Data.Map as Map
import Control.Applicative (Applicative(..))
import Control.Monad (foldM, ap)
import Data.Maybe (fromJust)
import Data.List (insertBy, nub, maximumBy, partition, find, groupBy, delete)
import GHC.Prim
import GHC.Exts
#if defined(HAPPY_DEBUG)
import System.IO
import System.IO.Unsafe
import Text.PrettyPrint
#endif
{- these inserted by Happy -}
fakeimport DATA
{- borrowed from GenericTemplate.hs -}
#define ILIT(n) n#
#define BANG !
#define IBOX(n) (I# (n))
#define FAST_INT Int#
#if __GLASGOW_HASKELL__ >= 708
#define ULT(n,m) (isTrue# (n <# m))
#define GTE(n,m) (isTrue# (n >=# m))
#define UEQ(n,m) (isTrue# (n ==# m))
#else
#define ULT(n,m) (n <# m)
#define GTE(n,m) (n >=# m)
#define UEQ(n,m) (n ==# m)
#endif
#define PLUS(n,m) (n +# m)
#define MINUS(n,m) (n -# m)
#define TIMES(n,m) (n *# m)
#define NEGATE(n) (negateInt# (n))
#define IF_GHC(x) (x)
#if defined(HAPPY_DEBUG)
#define DEBUG_TRACE(s) (happyTrace (s) $ return ())
happyTrace string expr = unsafePerformIO $ do
hPutStr stderr string
return expr
#else
#define DEBUG_TRACE(s) {- nothing -}
#endif
doParse = glr_parse
----------------------------------------------------------------------------
-- Main data types
-- A forest is a map of `spans' to branches, where a span is a start position,
-- and end position, and a grammatical category for that interval. Branches
-- are lists of conjunctions of symbols which can be matched in that span.
-- Note that tokens are stored as part of the spans.
type Forest = Map.Map ForestId [Branch]
---
-- End result of parsing:
-- - successful parse with rooted forest
-- - else syntax error or premature eof
type NodeMap = [(ForestId, [Branch])]
type RootNode = ForestId
type Tokens = [[(Int, GSymbol)]] -- list of ambiguous lexemes
data GLRResult
= ParseOK RootNode Forest -- forest with root
| ParseError Tokens Forest -- partial forest with bad input
| ParseEOF Forest -- partial forest (missing input)
-----------------------
-- Forest to simplified output
forestResult :: Int -> Forest -> GLRResult
forestResult length f
= case roots of
[] -> ParseEOF f
[r] -> ParseOK r f
rs@(_:_) -> error $ "multiple roots in forest, = " ++ show rs
++ unlines (map show ns_map)
where
ns_map = Map.toList f
roots = [ r | (r@(0,sz,sym),_) <- ns_map
, sz == length
, sym == top_symbol ]
----------------------------------------------------------------------------
glr_parse :: [[UserDefTok]] -> GLRResult
glr_parse toks
= case runST Map.empty [0..] (tp toks) of
(f,Left ts) -> ParseError ts f
-- Error within sentence
(f,Right ss) -> forestResult (length toks) f
-- Either good parse or EOF
where
tp tss = doActions [initTS 0]
$ zipWith (\i ts -> [(i, t) | t <- ts]) [0..]
$ [ [ HappyTok {-j-} t | (j,t) <- zip [0..] ts ] | ts <- tss ]
++ [[HappyEOF]]
---
type PM a = ST Forest [Int] a
type FStack = TStack ForestId
---
-- main function
doActions :: [FStack] -> Tokens -> PM (Either Tokens [FStack])
doActions ss [] -- no more tokens (this is ok)
= return (Right ss) -- return the stacks (may be empty)
doActions stks (tok:toks)
= do
stkss <- sequence [ do
stks' <- reduceAll [] tok_form stks
shiftAll tok_form stks'
| tok_form <- tok ]
let new_stks = merge $ concat stkss
DEBUG_TRACE(unlines $ ("Stacks after R*/S pass" ++ show tok)
: map show new_stks)
case new_stks of -- did this token kill stacks?
[] -> case toks of
[] -> return $ Right [] -- ok if no more tokens
_:_ -> return $ Left (tok:toks) -- not ok if some input left
_ -> doActions new_stks toks
reduceAll
:: [GSymbol] -> (Int, GSymbol) -> [FStack] -> PM [(FStack, Int)]
reduceAll _ tok [] = return []
reduceAll cyclic_names itok@(i,tok) (stk:stks)
= do
case action this_state tok of
Accept -> reduceAll [] itok stks
Error -> reduceAll [] itok stks
Shift st rs -> do { ss <- redAll rs ; return $ (stk,st) : ss }
Reduce rs -> redAll rs
where
this_state = top stk
redAll rs
= do
let reds = [ (bf fids,stk',m)
| (m,n,bf) <- rs
, not (n == 0 && m `elem` cyclic_names) -- remove done ones
, (fids,stk') <- pop n stk
]
-- WARNING: incomplete if more than one Empty in a prod(!)
-- WARNING: can avoid by splitting emps/non-emps
DEBUG_TRACE(unlines $ ("Packing reds = " ++ show (length reds))
: map show reds)
stks' <- foldM (pack i) stks reds
let new_cyclic = [ m | (m,0,_) <- rs
, UEQ(this_state, goto this_state m)
, m `notElem` cyclic_names ]
reduceAll (cyclic_names ++ new_cyclic) itok $ merge stks'
shiftAll :: (Int, GSymbol) -> [(FStack, Int)] -> PM [FStack]
shiftAll tok [] = return []
shiftAll (j,tok) stks
= do
let end = j + 1
let key = end `seq` (j,end,tok)
newNode key
let mss = [ (stk, st)
| ss@((_,st):_) <- groupBy (\a b -> snd a == snd b) stks
, stk <- merge $ map fst ss ]
stks' <- sequence [ do { nid <- getID ; return (push key st nid stk) }
| (stk,IBOX(st)) <- mss ]
return stks'
pack
:: Int -> [FStack] -> (Branch, FStack, GSymbol) -> PM [FStack]
pack e_i stks (fids,stk,m)
| ULT(st, ILIT(0))
= return stks
| otherwise
= do
let s_i = endpoint stk
let key = (s_i,e_i,m)
DEBUG_TRACE( unlines
$ ("Pack at " ++ show key ++ " " ++ show fids)
: ("**" ++ show stk)
: map show stks)
duplicate <- addBranch key fids
let stack_matches = [ s | s <- stks
, UEQ(top s, st)
, let (k,s') = case ts_tail s of x:_ -> x
, stk == s'
, k == key
] -- look for first obvious packing site
let appears_in = not $ null stack_matches
DEBUG_TRACE( unlines
$ ("Stack Matches: " ++ show (length stack_matches))
: map show stack_matches)
DEBUG_TRACE( if not (duplicate && appears_in) then "" else
unlines
$ ("DROP:" ++ show (IBOX(st),key) ++ " -- " ++ show stk)
: "*****"
: map show stks)
if duplicate && appears_in
then return stks -- because already there
else do
nid <- getID
case stack_matches of
[] -> return $ insertStack (push key st nid stk) stks
-- No prior stacks
s:_ -> return $ insertStack (push key st nid stk) (delete s stks)
-- pack into an existing stack
where
st = goto (top stk) m
---
-- record an entry
-- - expected: "i" will contain a token
newNode :: ForestId -> PM ()
newNode i
= chgS $ \f -> ((), Map.insert i [] f)
---
-- add a new branch
-- - due to packing, we check to see if a branch is already there
-- - return True if the branch is already there
addBranch :: ForestId -> Branch -> PM Bool
addBranch i b
= do
f <- useS id
case Map.lookup i f of
Nothing -> chgS $ \f -> (False, Map.insert i [b] f)
Just bs | b `elem` bs -> return True
| otherwise -> chgS $ \f -> (True, Map.insert i (b:bs) f)
---
-- only for use with nodes that exist
getBranches :: ForestId -> PM [Branch]
getBranches i
= useS $ \s -> Map.findWithDefault no_such_node i s
where
no_such_node = error $ "No such node in Forest: " ++ show i
-----------------------------------------------------------------------------
-- Auxiliary functions
(<>) x y = (x,y) -- syntactic sugar
-- Tomita stack
-- - basic idea taken from Peter Ljungloef's Licentiate thesis
data TStack a
= TS { top :: FAST_INT -- state
, ts_id :: FAST_INT -- ID
, stoup :: !(Maybe a) -- temp holding place, for left rec.
, ts_tail :: ![(a,TStack a)] -- [(element on arc , child)]
}
instance Show a => Show (TStack a) where
show ts
= "St" ++ show (IBOX(top ts))
#if defined(HAPPY_DEBUG)
++ "\n" ++ render (spp $ ts_tail ts)
where
spp ss = nest 2
$ vcat [ vcat [text (show (v,IBOX(top s))), spp (ts_tail s)]
| (v,s) <- ss ]
#endif
---
-- id uniquely identifies a stack
instance Eq (TStack a) where
s1 == s2 = UEQ(ts_id s1, ts_id s2)
--instance Ord (TStack a) where
-- s1 `compare` s2 = IBOX(ts_id s1) `compare` IBOX(ts_id s2)
---
-- Nothing special done for insertion
-- - NB merging done at strategic points
insertStack :: TStack a -> [TStack a] -> [TStack a]
insertStack = (:)
---
initTS :: Int -> TStack a
initTS IBOX(id) = TS ILIT(0) id Nothing []
---
push :: ForestId -> FAST_INT -> Int -> TStack ForestId -> TStack ForestId
push x@(s_i,e_i,m) st IBOX(id) stk
= TS st id stoup [(x,stk)]
where
-- only fill stoup for cyclic states that don't consume input
stoup | s_i == e_i && UEQ(st, goto st m) = Just x
| otherwise = Nothing
---
pop :: Int -> TStack a -> [([a],TStack a)]
pop 0 ts = [([],ts)]
pop 1 st@TS{stoup=Just x}
= pop 1 st{stoup=Nothing} ++ [ ([x],st) ]
pop n ts = [ (xs ++ [x] , stk')
| (x,stk) <- ts_tail ts
, (xs,stk') <- pop (n-1) stk ]
---
popF :: TStack a -> TStack a
popF ts = case ts_tail ts of (_,c):_ -> c
---
endpoint stk
= case ts_tail stk of
[] -> 0
((_,e_i,_),_):_ -> e_i
---
merge :: (Eq a, Show a) => [TStack a] -> [TStack a]
merge stks
= [ TS st id ss (nub ch)
| IBOX(st) <- nub (map (\s -> IBOX(top s)) stks)
, let ch = concat [ x | TS st2 _ _ x <- stks, UEQ(st,st2) ]
ss = mkss [ s | TS st2 _ s _ <- stks, UEQ(st,st2) ]
(BANG IBOX(id)) = head [ IBOX(i) | TS st2 i _ _ <- stks, UEQ(st,st2) ]
-- reuse of id is ok, since merge discards old stacks
]
where
mkss s = case nub [ x | Just x <- s ] of
[] -> Nothing
[x] -> Just x
xs -> error $ unlines $ ("Stoup merge: " ++ show xs)
: map show stks
----------------------------------------------------------------------------
-- Monad
-- TODO (pcc): combine the s/i, or use the modern libraries - might be faster?
-- but some other things are much, much, much more expensive!
data ST s i a = MkST (s -> i -> (a,s,i))
instance Functor (ST s i) where
fmap f (MkST sf)
= MkST $ \s i -> case sf s i of (a,s',i') -> (f a,s',i')
instance Applicative (ST s i) where
pure a = MkST $ \s i -> (a,s,i)
(<*>) = ap
instance Monad (ST s i) where
return = pure
MkST sf >>= k
= MkST $ \s i ->
case sf s i of
(a,s',i') -> let (MkST sf') = k a in sf' s' i'
runST :: s -> i -> ST s i a -> (s,a)
runST s i (MkST sf) = case sf s i of
(a,s,_) -> (s,a)
chgS :: (s -> (a,s)) -> ST s i a
chgS sf = MkST $ \s i -> let (a,s') = sf s in (a,s',i)
useS :: (s -> b) -> ST s i b
useS fn = MkST $ \s i -> (fn s,s,i)
getID :: ST s [Int] Int
getID = MkST $ \s (i:is) -> (i,s,is)
|