1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437
|
-- A bunch of megaparsec helpers for re-parsing etc.
-- I think these are generic apart from the HledgerParseError name.
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE FlexibleInstances #-} -- new
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE StandaloneDeriving #-} -- new
module Text.Megaparsec.Custom (
-- * Custom parse error types
HledgerParseErrorData,
HledgerParseErrors,
-- * Failing with an arbitrary source position
parseErrorAt,
parseErrorAtRegion,
-- * Re-parsing
SourceExcerpt,
getExcerptText,
excerpt_,
reparseExcerpt,
-- * Pretty-printing custom parse errors
customErrorBundlePretty,
-- * "Final" parse errors
FinalParseError,
FinalParseError',
FinalParseErrorBundle,
FinalParseErrorBundle',
-- * Constructing "final" parse errors
finalError,
finalFancyFailure,
finalFail,
finalCustomFailure,
-- * Pretty-printing "final" parse errors
finalErrorBundlePretty,
attachSource,
-- * Handling parse errors from include files with "final" parse errors
parseIncludeFile,
)
where
import Control.Monad.Except (ExceptT, MonadError, catchError, throwError)
import Control.Monad.State.Strict (StateT, evalStateT)
import Control.Monad.Trans.Class (lift)
import qualified Data.List.NonEmpty as NE
import Data.Monoid (Alt(..))
import qualified Data.Set as S
import Data.Text (Text)
import Text.Megaparsec
--- * Custom parse error types
-- | Custom error data for hledger parsers. Specialised for a 'Text' parse stream.
-- ReparseableTextParseErrorData ?
data HledgerParseErrorData
-- | Fail with a message at a specific source position interval. The
-- interval must be contained within a single line.
= ErrorFailAt Int -- Starting offset
Int -- Ending offset
String -- Error message
-- | Re-throw parse errors obtained from the "re-parsing" of an excerpt
-- of the source text.
| ErrorReparsing
(NE.NonEmpty (ParseError Text HledgerParseErrorData)) -- Source fragment parse errors
deriving (Show, Eq, Ord)
-- | A specialised version of ParseErrorBundle:
-- a non-empty collection of hledger parse errors,
-- equipped with PosState to help pretty-print them.
-- Specialised for a 'Text' parse stream.
type HledgerParseErrors = ParseErrorBundle Text HledgerParseErrorData
-- We require an 'Ord' instance for 'CustomError' so that they may be
-- stored in a 'Set'. The actual instance is inconsequential, so we just
-- derive it, but the derived instance requires an (orphan) instance for
-- 'ParseError'. Hopefully this does not cause any trouble.
deriving instance Ord (ParseError Text HledgerParseErrorData)
-- Note: the pretty-printing of our 'HledgerParseErrorData' type is only partally
-- defined in its 'ShowErrorComponent' instance; we perform additional
-- adjustments in 'customErrorBundlePretty'.
instance ShowErrorComponent HledgerParseErrorData where
showErrorComponent (ErrorFailAt _ _ errMsg) = errMsg
showErrorComponent (ErrorReparsing _) = "" -- dummy value
errorComponentLen (ErrorFailAt startOffset endOffset _) =
endOffset - startOffset
errorComponentLen (ErrorReparsing _) = 1 -- dummy value
--- * Failing with an arbitrary source position
-- | Fail at a specific source position, given by the raw offset from the
-- start of the input stream (the number of tokens processed at that
-- point).
parseErrorAt :: Int -> String -> HledgerParseErrorData
parseErrorAt offset = ErrorFailAt offset (offset+1)
-- | Fail at a specific source interval, given by the raw offsets of its
-- endpoints from the start of the input stream (the numbers of tokens
-- processed at those points).
--
-- Note that care must be taken to ensure that the specified interval does
-- not span multiple lines of the input source. This will not be checked.
parseErrorAtRegion
:: Int -- ^ Start offset
-> Int -- ^ End end offset
-> String -- ^ Error message
-> HledgerParseErrorData
parseErrorAtRegion startOffset endOffset msg =
if startOffset < endOffset
then ErrorFailAt startOffset endOffset msg'
else ErrorFailAt startOffset (startOffset+1) msg'
where
msg' = "\n" ++ msg
--- * Re-parsing
-- | A fragment of source suitable for "re-parsing". The purpose of this
-- data type is to preserve the content and source position of the excerpt
-- so that parse errors raised during "re-parsing" may properly reference
-- the original source.
data SourceExcerpt = SourceExcerpt Int -- Offset of beginning of excerpt
Text -- Fragment of source file
-- | Get the raw text of a source excerpt.
getExcerptText :: SourceExcerpt -> Text
getExcerptText (SourceExcerpt _ txt) = txt
-- | 'excerpt_ p' applies the given parser 'p' and extracts the portion of
-- the source consumed by 'p', along with the source position of this
-- portion. This is the only way to create a source excerpt suitable for
-- "re-parsing" by 'reparseExcerpt'.
-- This function could be extended to return the result of 'p', but we don't
-- currently need this.
excerpt_ :: MonadParsec HledgerParseErrorData Text m => m a -> m SourceExcerpt
excerpt_ p = do
offset <- getOffset
(!txt, _) <- match p
pure $ SourceExcerpt offset txt
-- | 'reparseExcerpt s p' "re-parses" the source excerpt 's' using the
-- parser 'p'. Parse errors raised by 'p' will be re-thrown at the source
-- position of the source excerpt.
--
-- In order for the correct source file to be displayed when re-throwing
-- parse errors, we must ensure that the source file during the use of
-- 'reparseExcerpt s p' is the same as that during the use of 'excerpt_'
-- that generated the source excerpt 's'. However, we can usually expect
-- this condition to be satisfied because, at the time of writing, the
-- only changes of source file in the codebase take place through include
-- files, and the parser for include files neither accepts nor returns
-- 'SourceExcerpt's.
reparseExcerpt
:: Monad m
=> SourceExcerpt
-> ParsecT HledgerParseErrorData Text m a
-> ParsecT HledgerParseErrorData Text m a
reparseExcerpt (SourceExcerpt offset txt) p = do
(_, res) <- lift $ runParserT' p (offsetInitialState offset txt)
case res of
Right result -> pure result
Left errBundle -> customFailure $ ErrorReparsing $ bundleErrors errBundle
where
offsetInitialState :: Int -> s ->
#if MIN_VERSION_megaparsec(8,0,0)
State s e
#else
State s
#endif
offsetInitialState initialOffset s = State
{ stateInput = s
, stateOffset = initialOffset
, statePosState = PosState
{ pstateInput = s
, pstateOffset = initialOffset
, pstateSourcePos = initialPos ""
, pstateTabWidth = defaultTabWidth
, pstateLinePrefix = ""
}
#if MIN_VERSION_megaparsec(8,0,0)
, stateParseErrors = []
#endif
}
--- * Pretty-printing custom parse errors
-- | Pretty-print our custom parse errors. It is necessary to use this
-- instead of 'errorBundlePretty' when custom parse errors are thrown.
--
-- This function intercepts our custom parse errors and applies final
-- adjustments ('finalizeCustomError') before passing them to
-- 'errorBundlePretty'. These adjustments are part of the implementation
-- of the behaviour of our custom parse errors.
--
-- Note: We must ensure that the offset of the 'PosState' of the provided
-- 'ParseErrorBundle' is no larger than the offset specified by a
-- 'ErrorFailAt' constructor. This is guaranteed if this offset is set to
-- 0 (that is, the beginning of the source file), which is the
-- case for 'ParseErrorBundle's returned from 'runParserT'.
customErrorBundlePretty :: HledgerParseErrors -> String
customErrorBundlePretty errBundle =
let errBundle' = errBundle { bundleErrors =
NE.sortWith errorOffset $ -- megaparsec requires that the list of errors be sorted by their offsets
bundleErrors errBundle >>= finalizeCustomError }
in errorBundlePretty errBundle'
where
finalizeCustomError
:: ParseError Text HledgerParseErrorData -> NE.NonEmpty (ParseError Text HledgerParseErrorData)
finalizeCustomError err = case findCustomError err of
Nothing -> pure err
Just errFailAt@(ErrorFailAt startOffset _ _) ->
-- Adjust the offset
pure $ FancyError startOffset $ S.singleton $ ErrorCustom errFailAt
Just (ErrorReparsing errs) ->
-- Extract and finalize the inner errors
errs >>= finalizeCustomError
-- If any custom errors are present, arbitrarily take the first one
-- (since only one custom error should be used at a time).
findCustomError :: ParseError Text HledgerParseErrorData -> Maybe HledgerParseErrorData
findCustomError err = case err of
FancyError _ errSet ->
finds (\case {ErrorCustom e -> Just e; _ -> Nothing}) errSet
_ -> Nothing
finds :: (Foldable t) => (a -> Maybe b) -> t a -> Maybe b
finds f = getAlt . foldMap (Alt . f)
--- * "Final" parse errors
--
-- | A type representing "final" parse errors that cannot be backtracked
-- from and are guaranteed to halt parsing. The anti-backtracking
-- behaviour is implemented by an 'ExceptT' layer in the parser's monad
-- stack, using this type as the 'ExceptT' error type.
--
-- We have three goals for this type:
-- (1) it should be possible to convert any parse error into a "final"
-- parse error,
-- (2) it should be possible to take a parse error thrown from an include
-- file and re-throw it in the parent file, and
-- (3) the pretty-printing of "final" parse errors should be consistent
-- with that of ordinary parse errors, but should also report a stack of
-- files for errors thrown from include files.
--
-- In order to pretty-print a "final" parse error (goal 3), it must be
-- bundled with include filepaths and its full source text. When a "final"
-- parse error is thrown from within a parser, we do not have access to
-- the full source, so we must hold the parse error until it can be joined
-- with its source (and include filepaths, if it was thrown from an
-- include file) by the parser's caller.
--
-- A parse error with include filepaths and its full source text is
-- represented by the 'FinalParseErrorBundle' type, while a parse error in
-- need of either include filepaths, full source text, or both is
-- represented by the 'FinalParseError' type.
data FinalParseError' e
-- a parse error thrown as a "final" parse error
= FinalError (ParseError Text e)
-- a parse error obtained from running a parser, e.g. using 'runParserT'
| FinalBundle (ParseErrorBundle Text e)
-- a parse error thrown from an include file
| FinalBundleWithStack (FinalParseErrorBundle' e)
deriving (Show)
type FinalParseError = FinalParseError' HledgerParseErrorData
-- We need a 'Monoid' instance for 'FinalParseError' so that 'ExceptT
-- FinalParseError m' is an instance of Alternative and MonadPlus, which
-- is needed to use some parser combinators, e.g. 'many'.
--
-- This monoid instance simply takes the first (left-most) error.
instance Semigroup (FinalParseError' e) where
e <> _ = e
instance Monoid (FinalParseError' e) where
mempty = FinalError $ FancyError 0 $
S.singleton (ErrorFail "default parse error")
mappend = (<>)
-- | A type bundling a 'ParseError' with its full source text, filepath,
-- and stack of include files. Suitable for pretty-printing.
--
-- Megaparsec's 'ParseErrorBundle' type already bundles a parse error with
-- its full source text and filepath, so we just add a stack of include
-- files.
data FinalParseErrorBundle' e = FinalParseErrorBundle'
{ finalErrorBundle :: ParseErrorBundle Text e
, includeFileStack :: [FilePath]
} deriving (Show)
type FinalParseErrorBundle = FinalParseErrorBundle' HledgerParseErrorData
--- * Constructing and throwing final parse errors
-- | Convert a "regular" parse error into a "final" parse error.
finalError :: ParseError Text e -> FinalParseError' e
finalError = FinalError
-- | Like megaparsec's 'fancyFailure', but as a "final" parse error.
finalFancyFailure
:: (MonadParsec e s m, MonadError (FinalParseError' e) m)
=> S.Set (ErrorFancy e) -> m a
finalFancyFailure errSet = do
offset <- getOffset
throwError $ FinalError $ FancyError offset errSet
-- | Like 'fail', but as a "final" parse error.
finalFail
:: (MonadParsec e s m, MonadError (FinalParseError' e) m) => String -> m a
finalFail = finalFancyFailure . S.singleton . ErrorFail
-- | Like megaparsec's 'customFailure', but as a "final" parse error.
finalCustomFailure
:: (MonadParsec e s m, MonadError (FinalParseError' e) m) => e -> m a
finalCustomFailure = finalFancyFailure . S.singleton . ErrorCustom
--- * Pretty-printing "final" parse errors
-- | Pretty-print a "final" parse error: print the stack of include files,
-- then apply the pretty-printer for parse error bundles. Note that
-- 'attachSource' must be used on a "final" parse error before it can be
-- pretty-printed.
finalErrorBundlePretty :: FinalParseErrorBundle' HledgerParseErrorData -> String
finalErrorBundlePretty bundle =
concatMap showIncludeFilepath (includeFileStack bundle)
<> customErrorBundlePretty (finalErrorBundle bundle)
where
showIncludeFilepath path = "in file included from " <> path <> ",\n"
-- | Supply a filepath and source text to a "final" parse error so that it
-- can be pretty-printed. You must ensure that you provide the appropriate
-- source text and filepath.
attachSource
:: FilePath -> Text -> FinalParseError' e -> FinalParseErrorBundle' e
attachSource filePath sourceText finalParseError = case finalParseError of
-- A parse error thrown directly with the 'FinalError' constructor
-- requires both source and filepath.
FinalError err ->
let bundle = ParseErrorBundle
{ bundleErrors = err NE.:| []
, bundlePosState = initialPosState filePath sourceText }
in FinalParseErrorBundle'
{ finalErrorBundle = bundle
, includeFileStack = [] }
-- A 'ParseErrorBundle' already has the appropriate source and filepath
-- and so needs neither.
FinalBundle peBundle -> FinalParseErrorBundle'
{ finalErrorBundle = peBundle
, includeFileStack = [] }
-- A parse error from a 'FinalParseErrorBundle' was thrown from an
-- include file, so we add the filepath to the stack.
FinalBundleWithStack fpeBundle -> fpeBundle
{ includeFileStack = filePath : includeFileStack fpeBundle }
--- * Handling parse errors from include files with "final" parse errors
-- | Parse a file with the given parser and initial state, discarding the
-- final state and re-throwing any parse errors as "final" parse errors.
parseIncludeFile
:: Monad m
=> StateT st (ParsecT HledgerParseErrorData Text (ExceptT FinalParseError m)) a
-> st
-> FilePath
-> Text
-> StateT st (ParsecT HledgerParseErrorData Text (ExceptT FinalParseError m)) a
parseIncludeFile parser initialState filepath text =
catchError parser' handler
where
parser' = do
eResult <- lift $ lift $
runParserT (evalStateT parser initialState) filepath text
case eResult of
Left parseErrorBundle -> throwError $ FinalBundle parseErrorBundle
Right result -> pure result
-- Attach source and filepath of the include file to its parse errors
handler e = throwError $ FinalBundleWithStack $ attachSource filepath text e
--- * Helpers
-- Like megaparsec's 'initialState', but instead for 'PosState'. Used when
-- constructing 'ParseErrorBundle's. The values for "tab width" and "line
-- prefix" are taken from 'initialState'.
initialPosState :: FilePath -> Text -> PosState Text
initialPosState filePath sourceText = PosState
{ pstateInput = sourceText
, pstateOffset = 0
, pstateSourcePos = initialPos filePath
, pstateTabWidth = defaultTabWidth
, pstateLinePrefix = "" }
|