File: Custom.hs

package info (click to toggle)
haskell-hledger-lib 1.32.3-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 1,336 kB
  • sloc: haskell: 14,109; makefile: 7
file content (437 lines) | stat: -rw-r--r-- 15,976 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
-- A bunch of megaparsec helpers for re-parsing etc.
-- I think these are generic apart from the HledgerParseError name.

{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE FlexibleInstances #-} -- new
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE StandaloneDeriving #-} -- new

module Text.Megaparsec.Custom (
  -- * Custom parse error types
  HledgerParseErrorData,
  HledgerParseErrors,

  -- * Failing with an arbitrary source position
  parseErrorAt,
  parseErrorAtRegion,

  -- * Re-parsing
  SourceExcerpt,
  getExcerptText,

  excerpt_,
  reparseExcerpt,

  -- * Pretty-printing custom parse errors
  customErrorBundlePretty,


  -- * "Final" parse errors
  FinalParseError,
  FinalParseError',
  FinalParseErrorBundle,
  FinalParseErrorBundle',

  -- * Constructing "final" parse errors
  finalError,
  finalFancyFailure,
  finalFail,
  finalCustomFailure,

  -- * Pretty-printing "final" parse errors
  finalErrorBundlePretty,
  attachSource,

  -- * Handling parse errors from include files with "final" parse errors
  parseIncludeFile,
)
where

import Control.Monad.Except (ExceptT, MonadError, catchError, throwError)
import Control.Monad.State.Strict (StateT, evalStateT)
import Control.Monad.Trans.Class (lift)
import qualified Data.List.NonEmpty as NE
import Data.Monoid (Alt(..))
import qualified Data.Set as S
import Data.Text (Text)
import Text.Megaparsec


--- * Custom parse error types

-- | Custom error data for hledger parsers. Specialised for a 'Text' parse stream.
-- ReparseableTextParseErrorData ?
data HledgerParseErrorData
  -- | Fail with a message at a specific source position interval. The
  -- interval must be contained within a single line.
  = ErrorFailAt Int -- Starting offset
                Int -- Ending offset
                String -- Error message
  -- | Re-throw parse errors obtained from the "re-parsing" of an excerpt
  -- of the source text.
  | ErrorReparsing
      (NE.NonEmpty (ParseError Text HledgerParseErrorData)) -- Source fragment parse errors
  deriving (Show, Eq, Ord)

-- | A specialised version of ParseErrorBundle: 
-- a non-empty collection of hledger parse errors, 
-- equipped with PosState to help pretty-print them.
-- Specialised for a 'Text' parse stream.
type HledgerParseErrors = ParseErrorBundle Text HledgerParseErrorData

-- We require an 'Ord' instance for 'CustomError' so that they may be
-- stored in a 'Set'. The actual instance is inconsequential, so we just
-- derive it, but the derived instance requires an (orphan) instance for
-- 'ParseError'. Hopefully this does not cause any trouble.

deriving instance Ord (ParseError Text HledgerParseErrorData)

-- Note: the pretty-printing of our 'HledgerParseErrorData' type is only partally
-- defined in its 'ShowErrorComponent' instance; we perform additional
-- adjustments in 'customErrorBundlePretty'.

instance ShowErrorComponent HledgerParseErrorData where
  showErrorComponent (ErrorFailAt _ _ errMsg) = errMsg
  showErrorComponent (ErrorReparsing _) = "" -- dummy value

  errorComponentLen (ErrorFailAt startOffset endOffset _) =
    endOffset - startOffset
  errorComponentLen (ErrorReparsing _) = 1 -- dummy value


--- * Failing with an arbitrary source position

-- | Fail at a specific source position, given by the raw offset from the
-- start of the input stream (the number of tokens processed at that
-- point).

parseErrorAt :: Int -> String -> HledgerParseErrorData
parseErrorAt offset = ErrorFailAt offset (offset+1)

-- | Fail at a specific source interval, given by the raw offsets of its
-- endpoints from the start of the input stream (the numbers of tokens
-- processed at those points).
--
-- Note that care must be taken to ensure that the specified interval does
-- not span multiple lines of the input source. This will not be checked.

parseErrorAtRegion
  :: Int    -- ^ Start offset
  -> Int    -- ^ End end offset
  -> String -- ^ Error message
  -> HledgerParseErrorData
parseErrorAtRegion startOffset endOffset msg =
  if startOffset < endOffset
    then ErrorFailAt startOffset endOffset msg'
    else ErrorFailAt startOffset (startOffset+1) msg'
  where
    msg' = "\n" ++ msg


--- * Re-parsing

-- | A fragment of source suitable for "re-parsing". The purpose of this
-- data type is to preserve the content and source position of the excerpt
-- so that parse errors raised during "re-parsing" may properly reference
-- the original source.

data SourceExcerpt = SourceExcerpt Int  -- Offset of beginning of excerpt
                                   Text -- Fragment of source file

-- | Get the raw text of a source excerpt.

getExcerptText :: SourceExcerpt -> Text
getExcerptText (SourceExcerpt _ txt) = txt

-- | 'excerpt_ p' applies the given parser 'p' and extracts the portion of
-- the source consumed by 'p', along with the source position of this
-- portion. This is the only way to create a source excerpt suitable for
-- "re-parsing" by 'reparseExcerpt'.

-- This function could be extended to return the result of 'p', but we don't
-- currently need this.

excerpt_ :: MonadParsec HledgerParseErrorData Text m => m a -> m SourceExcerpt
excerpt_ p = do
  offset <- getOffset
  (!txt, _) <- match p
  pure $ SourceExcerpt offset txt

-- | 'reparseExcerpt s p' "re-parses" the source excerpt 's' using the
-- parser 'p'. Parse errors raised by 'p' will be re-thrown at the source
-- position of the source excerpt.
--
-- In order for the correct source file to be displayed when re-throwing
-- parse errors, we must ensure that the source file during the use of
-- 'reparseExcerpt s p' is the same as that during the use of 'excerpt_'
-- that generated the source excerpt 's'. However, we can usually expect
-- this condition to be satisfied because, at the time of writing, the
-- only changes of source file in the codebase take place through include
-- files, and the parser for include files neither accepts nor returns
-- 'SourceExcerpt's.

reparseExcerpt
  :: Monad m
  => SourceExcerpt
  -> ParsecT HledgerParseErrorData Text m a
  -> ParsecT HledgerParseErrorData Text m a
reparseExcerpt (SourceExcerpt offset txt) p = do
  (_, res) <- lift $ runParserT' p (offsetInitialState offset txt)
  case res of
    Right result -> pure result
    Left errBundle -> customFailure $ ErrorReparsing $ bundleErrors errBundle

  where
    offsetInitialState :: Int -> s ->
#if MIN_VERSION_megaparsec(8,0,0)
      State s e
#else
      State s
#endif
    offsetInitialState initialOffset s = State
      { stateInput  = s
      , stateOffset = initialOffset
      , statePosState = PosState
        { pstateInput = s
        , pstateOffset = initialOffset
        , pstateSourcePos = initialPos ""
        , pstateTabWidth = defaultTabWidth
        , pstateLinePrefix = ""
        }
#if MIN_VERSION_megaparsec(8,0,0)
      , stateParseErrors = []
#endif
      }

--- * Pretty-printing custom parse errors

-- | Pretty-print our custom parse errors. It is necessary to use this
-- instead of 'errorBundlePretty' when custom parse errors are thrown.
--
-- This function intercepts our custom parse errors and applies final
-- adjustments ('finalizeCustomError') before passing them to
-- 'errorBundlePretty'. These adjustments are part of the implementation
-- of the behaviour of our custom parse errors.
--
-- Note: We must ensure that the offset of the 'PosState' of the provided
-- 'ParseErrorBundle' is no larger than the offset specified by a
-- 'ErrorFailAt' constructor. This is guaranteed if this offset is set to
-- 0 (that is, the beginning of the source file), which is the
-- case for 'ParseErrorBundle's returned from 'runParserT'.

customErrorBundlePretty :: HledgerParseErrors -> String
customErrorBundlePretty errBundle =
  let errBundle' = errBundle { bundleErrors =
        NE.sortWith errorOffset $ -- megaparsec requires that the list of errors be sorted by their offsets
        bundleErrors errBundle >>= finalizeCustomError }
  in  errorBundlePretty errBundle'

  where
    finalizeCustomError
      :: ParseError Text HledgerParseErrorData -> NE.NonEmpty (ParseError Text HledgerParseErrorData)
    finalizeCustomError err = case findCustomError err of
      Nothing -> pure err

      Just errFailAt@(ErrorFailAt startOffset _ _) ->
        -- Adjust the offset
        pure $ FancyError startOffset $ S.singleton $ ErrorCustom errFailAt

      Just (ErrorReparsing errs) ->
        -- Extract and finalize the inner errors
        errs >>= finalizeCustomError

    -- If any custom errors are present, arbitrarily take the first one
    -- (since only one custom error should be used at a time).
    findCustomError :: ParseError Text HledgerParseErrorData -> Maybe HledgerParseErrorData
    findCustomError err = case err of
      FancyError _ errSet ->
        finds (\case {ErrorCustom e -> Just e; _ -> Nothing}) errSet
      _ -> Nothing

    finds :: (Foldable t) => (a -> Maybe b) -> t a -> Maybe b
    finds f = getAlt . foldMap (Alt . f)


--- * "Final" parse errors
--
-- | A type representing "final" parse errors that cannot be backtracked
-- from and are guaranteed to halt parsing. The anti-backtracking
-- behaviour is implemented by an 'ExceptT' layer in the parser's monad
-- stack, using this type as the 'ExceptT' error type.
--
-- We have three goals for this type:
-- (1) it should be possible to convert any parse error into a "final"
-- parse error,
-- (2) it should be possible to take a parse error thrown from an include
-- file and re-throw it in the parent file, and
-- (3) the pretty-printing of "final" parse errors should be consistent
-- with that of ordinary parse errors, but should also report a stack of
-- files for errors thrown from include files.
--
-- In order to pretty-print a "final" parse error (goal 3), it must be
-- bundled with include filepaths and its full source text. When a "final"
-- parse error is thrown from within a parser, we do not have access to
-- the full source, so we must hold the parse error until it can be joined
-- with its source (and include filepaths, if it was thrown from an
-- include file) by the parser's caller.
--
-- A parse error with include filepaths and its full source text is
-- represented by the 'FinalParseErrorBundle' type, while a parse error in
-- need of either include filepaths, full source text, or both is
-- represented by the 'FinalParseError' type.

data FinalParseError' e
  -- a parse error thrown as a "final" parse error
  = FinalError           (ParseError Text e)
  -- a parse error obtained from running a parser, e.g. using 'runParserT'
  | FinalBundle          (ParseErrorBundle Text e)
  -- a parse error thrown from an include file
  | FinalBundleWithStack (FinalParseErrorBundle' e)
  deriving (Show)

type FinalParseError = FinalParseError' HledgerParseErrorData

-- We need a 'Monoid' instance for 'FinalParseError' so that 'ExceptT
-- FinalParseError m' is an instance of Alternative and MonadPlus, which
-- is needed to use some parser combinators, e.g. 'many'.
--
-- This monoid instance simply takes the first (left-most) error.

instance Semigroup (FinalParseError' e) where
  e <> _ = e

instance Monoid (FinalParseError' e) where
  mempty = FinalError $ FancyError 0 $
            S.singleton (ErrorFail "default parse error")
  mappend = (<>)

-- | A type bundling a 'ParseError' with its full source text, filepath,
-- and stack of include files. Suitable for pretty-printing.
--
-- Megaparsec's 'ParseErrorBundle' type already bundles a parse error with
-- its full source text and filepath, so we just add a stack of include
-- files.

data FinalParseErrorBundle' e = FinalParseErrorBundle'
  { finalErrorBundle :: ParseErrorBundle Text e
  , includeFileStack :: [FilePath]
  } deriving (Show)

type FinalParseErrorBundle = FinalParseErrorBundle' HledgerParseErrorData


--- * Constructing and throwing final parse errors

-- | Convert a "regular" parse error into a "final" parse error.

finalError :: ParseError Text e -> FinalParseError' e
finalError = FinalError

-- | Like megaparsec's 'fancyFailure', but as a "final" parse error.

finalFancyFailure
  :: (MonadParsec e s m, MonadError (FinalParseError' e) m)
  => S.Set (ErrorFancy e) -> m a
finalFancyFailure errSet = do
  offset <- getOffset
  throwError $ FinalError $ FancyError offset errSet

-- | Like 'fail', but as a "final" parse error.

finalFail
  :: (MonadParsec e s m, MonadError (FinalParseError' e) m) => String -> m a
finalFail = finalFancyFailure . S.singleton . ErrorFail

-- | Like megaparsec's 'customFailure', but as a "final" parse error.

finalCustomFailure
  :: (MonadParsec e s m, MonadError (FinalParseError' e) m) => e -> m a
finalCustomFailure = finalFancyFailure . S.singleton . ErrorCustom


--- * Pretty-printing "final" parse errors

-- | Pretty-print a "final" parse error: print the stack of include files,
-- then apply the pretty-printer for parse error bundles. Note that
-- 'attachSource' must be used on a "final" parse error before it can be
-- pretty-printed.

finalErrorBundlePretty :: FinalParseErrorBundle' HledgerParseErrorData -> String
finalErrorBundlePretty bundle =
     concatMap showIncludeFilepath (includeFileStack bundle)
  <> customErrorBundlePretty (finalErrorBundle bundle)
  where
    showIncludeFilepath path = "in file included from " <> path <> ",\n"

-- | Supply a filepath and source text to a "final" parse error so that it
-- can be pretty-printed. You must ensure that you provide the appropriate
-- source text and filepath.

attachSource
  :: FilePath -> Text -> FinalParseError' e -> FinalParseErrorBundle' e
attachSource filePath sourceText finalParseError = case finalParseError of

  -- A parse error thrown directly with the 'FinalError' constructor
  -- requires both source and filepath.
  FinalError err ->
    let bundle = ParseErrorBundle
          { bundleErrors = err NE.:| []
          , bundlePosState = initialPosState filePath sourceText }
    in  FinalParseErrorBundle'
          { finalErrorBundle = bundle
          , includeFileStack  = [] }

  -- A 'ParseErrorBundle' already has the appropriate source and filepath
  -- and so needs neither.
  FinalBundle peBundle -> FinalParseErrorBundle'
    { finalErrorBundle = peBundle
    , includeFileStack = [] }

  -- A parse error from a 'FinalParseErrorBundle' was thrown from an
  -- include file, so we add the filepath to the stack.
  FinalBundleWithStack fpeBundle -> fpeBundle
    { includeFileStack = filePath : includeFileStack fpeBundle }


--- * Handling parse errors from include files with "final" parse errors

-- | Parse a file with the given parser and initial state, discarding the
-- final state and re-throwing any parse errors as "final" parse errors.

parseIncludeFile
  :: Monad m
  => StateT st (ParsecT HledgerParseErrorData Text (ExceptT FinalParseError m)) a
  -> st
  -> FilePath
  -> Text
  -> StateT st (ParsecT HledgerParseErrorData Text (ExceptT FinalParseError m)) a
parseIncludeFile parser initialState filepath text =
  catchError parser' handler
  where
    parser' = do
      eResult <- lift $ lift $
                  runParserT (evalStateT parser initialState) filepath text
      case eResult of
        Left parseErrorBundle -> throwError $ FinalBundle parseErrorBundle
        Right result -> pure result

    -- Attach source and filepath of the include file to its parse errors
    handler e = throwError $ FinalBundleWithStack $ attachSource filepath text e


--- * Helpers

-- Like megaparsec's 'initialState', but instead for 'PosState'. Used when
-- constructing 'ParseErrorBundle's. The values for "tab width" and "line
-- prefix" are taken from 'initialState'.

initialPosState :: FilePath -> Text -> PosState Text
initialPosState filePath sourceText = PosState
  { pstateInput      = sourceText
  , pstateOffset     = 0
  , pstateSourcePos  = initialPos filePath
  , pstateTabWidth   = defaultTabWidth
  , pstateLinePrefix = "" }