File: TDFA.hs

package info (click to toggle)
haskell-regex 1.1.0.2-4
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 424 kB
  • sloc: haskell: 4,533; makefile: 3
file content (487 lines) | stat: -rw-r--r-- 16,316 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
{-# LANGUAGE NoImplicitPrelude          #-}
{-# LANGUAGE RecordWildCards            #-}
{-# LANGUAGE MultiParamTypeClasses      #-}
{-# LANGUAGE TypeSynonymInstances       #-}
{-# LANGUAGE FlexibleInstances          #-}
{-# LANGUAGE FlexibleContexts           #-}
{-# LANGUAGE CPP                        #-}
#if __GLASGOW_HASKELL__ >= 800
{-# OPTIONS_GHC -fno-warn-redundant-constraints #-}
{-# LANGUAGE TemplateHaskellQuotes      #-}
#else
{-# LANGUAGE QuasiQuotes                #-}
{-# LANGUAGE TemplateHaskell            #-}
#endif
{-# OPTIONS_GHC -fno-warn-orphans               #-}
{-# OPTIONS_GHC -fno-warn-unused-imports        #-}

module Text.RE.ZeInternals.TDFA
  ( -- * About
    -- $about

    -- * RE Type
    RE
  , regexType
  , reOptions
  , reSource
  , reCaptureNames
  , reRegex
  -- * IsOptions Class and REOptions Type
  , IsOption(..)
  , REOptions
  , defaultREOptions
  , noPreludeREOptions
  , unpackSimpleREOptions
  -- * Compiling Regular Expressions
  , compileRegex
  , compileRegexWith
  , compileRegexWithOptions
  , compileRegexWithOptionsForQQ
  -- * Compiling Search-Replace Templates
  , compileSearchReplace
  , compileSearchReplaceWith
  , compileSearchReplaceWithOptions
  -- * Escaping String
  , escape
  , escapeWith
  , escapeWithOptions
  , escapeREString
  -- * Macros Standard Environment
  , prelude
  , preludeEnv
  , preludeTestsFailing
  , preludeTable
  , preludeSummary
  , preludeSources
  , preludeSource
  -- * The Quasi Quoters
  , re
  , reMS
  , reMI
  , reBS
  , reBI
  , reMultilineSensitive
  , reMultilineInsensitive
  , reBlockSensitive
  , reBlockInsensitive
  , re_
  , cp
  ) where

import           Control.Monad.Fail
import           Data.Functor.Identity
import           Language.Haskell.TH
import           Language.Haskell.TH.Quote
import           Prelude.Compat                       hiding (fail)
import           Text.RE.REOptions
import           Text.RE.Replace
import           Text.RE.TestBench
import           Text.RE.Tools
import           Text.RE.ZeInternals
import           Text.RE.ZeInternals.Types.Poss
import           Text.Regex.TDFA


-- | the RE type for this back end representing a well-formed, compiled
-- RE
data RE =
  RE
    { _re_options :: !REOptions
    , _re_source  :: !String
    , _re_cnames  :: !CaptureNames
    , _re_regex   :: !Regex
    }

-- | some functions in the "Text.RE.TestBench" need the back end to
-- be passed dynamically as a 'RegexType' parameters: use 'regexType'
-- for this
regexType :: RegexType
regexType =
  mkTDFA $ \txt env md -> txt =~ mdRegexSource regexType ExclCaptures env md

-- | extract the 'REOptions' from the @RE@
reOptions :: RE -> REOptions
reOptions = _re_options

-- | extract the RE source string from the @RE@
reSource :: RE -> String
reSource = _re_source

-- | extract the 'CaptureNames' from the @RE@
reCaptureNames :: RE -> CaptureNames
reCaptureNames = _re_cnames

-- | extract the back end compiled 'Regex' type from the @RE@
reRegex :: RE -> Regex
reRegex = _re_regex


------------------------------------------------------------------------
-- IsOption & REOptions
------------------------------------------------------------------------

-- | a number of types can be used to encode 'REOptions_', each of which
-- is made a member of this class
class IsOption o where
  -- | convert the @o@ type into an @REOptions@
  makeREOptions :: o -> REOptions

-- | and the REOptions for this back end (see "Text.RE.REOptions"
-- for details)
type REOptions = REOptions_ RE CompOption ExecOption

instance IsOption SimpleREOptions where
  makeREOptions    = unpackSimpleREOptions

instance IsOption (Macros RE) where
  makeREOptions ms = REOptions ms def_comp_option def_exec_option

instance IsOption CompOption where
  makeREOptions co = REOptions prelude co def_exec_option

instance IsOption ExecOption where
  makeREOptions eo = REOptions prelude def_comp_option eo

instance IsOption REOptions where
  makeREOptions    = id

instance IsOption () where
  makeREOptions _  = unpackSimpleREOptions minBound

-- | the default 'REOptions'
defaultREOptions :: REOptions
defaultREOptions = makeREOptions (minBound::SimpleREOptions)

-- | the default 'REOptions' but with no RE macros defined
noPreludeREOptions :: REOptions
noPreludeREOptions = defaultREOptions { optionsMacs = emptyMacros }

-- | convert a universal 'SimpleReOptions' into the 'REOptions' used
-- by this back end
unpackSimpleREOptions :: SimpleREOptions -> REOptions
unpackSimpleREOptions sro =
  REOptions
    { optionsMacs = prelude
    , optionsComp = comp
    , optionsExec = defaultExecOpt
    }
  where
    comp = defaultCompOpt
      { caseSensitive = cs
      , multiline     = ml
      }

    (ml,cs) = case sro of
        MultilineSensitive    -> (,) True  True
        MultilineInsensitive  -> (,) True  False
        BlockSensitive        -> (,) False True
        BlockInsensitive      -> (,) False False


------------------------------------------------------------------------
-- Compiling Regular Expressions
------------------------------------------------------------------------

-- | compile a 'String' into a 'RE' with the default options,
-- generating an error if the RE is not well formed
compileRegex :: (Functor m,Monad m,MonadFail m) => String -> m RE
compileRegex = compileRegexWith minBound

-- | compile a 'String' into a 'RE' using the given @SimpleREOptions@,
-- generating an error if the RE is not well formed
compileRegexWith :: (Functor m,Monad m,MonadFail m) => SimpleREOptions -> String -> m RE
compileRegexWith = compileRegexWithOptions

-- | compile a 'String' into a 'RE' using the given @SimpleREOptions@,
-- generating an error if the RE is not well formed
compileRegexWithOptions :: (IsOption o, Functor m, Monad   m, MonadFail m)
                        => o
                        -> String
                        -> m RE
compileRegexWithOptions = compileRegex_ RPM_raw . makeREOptions

-- | compile a 'String' into a 'RE' for q quasi quoter, using the given
-- @SimpleREOptions@, generating an error if the RE is not well formed
compileRegexWithOptionsForQQ :: (IsOption o, Functor m, Monad   m,MonadFail m)
                             => o
                             -> String
                             -> m RE
compileRegexWithOptionsForQQ = compileRegex_ RPM_qq . makeREOptions


------------------------------------------------------------------------
-- Compiling Search Replace Templates
------------------------------------------------------------------------

-- | compile a SearchReplace template generating errors if the RE or
-- the template are not well formed, all capture references being checked
compileSearchReplace :: (Monad m,MonadFail m,Functor m,IsRegex RE s)
                     => String
                     -> String
                     -> m (SearchReplace RE s)
compileSearchReplace = compileSearchReplaceWith minBound

-- | compile a SearchReplace template, with simple options, generating
-- errors if the RE or the template are not well formed, all capture
-- references being checked
compileSearchReplaceWith :: (Monad m,MonadFail m,Functor m,IsRegex RE s)
                         => SimpleREOptions
                         -> String
                         -> String
                         -> m (SearchReplace RE s)
compileSearchReplaceWith sro = compileSearchAndReplace_ packR $ poss2either . compileRegexWith sro

-- | compile a SearchReplace template, with general options, generating
-- errors if the RE or the template are not well formed, all capture
-- references being checked
compileSearchReplaceWithOptions :: (Monad m,MonadFail m,Functor m,IsRegex RE s)
                                => REOptions
                                -> String
                                -> String
                                -> m (SearchReplace RE s)
compileSearchReplaceWithOptions os = compileSearchAndReplace_ packR $ poss2either . compileRegexWithOptions os


------------------------------------------------------------------------
-- Escaping Strings
------------------------------------------------------------------------

-- | convert a string into a RE that matches that string, and apply it
-- to an argument continuation function to make up the RE string to be
-- compiled; e.g., to compile a RE that will only match the string:
--
--  @maybe undefined id . escape ((\"^\"++) . (++\"$\"))@
--
escape :: (Functor m,Monad m,MonadFail m)
       => (String->String)
       -> String
       -> m RE
escape = escapeWith minBound

-- | a variant of 'escape' where the 'SimpleREOptions' are specified
escapeWith :: (Functor m,Monad m,MonadFail m)
           => SimpleREOptions
           -> (String->String)
           -> String
           -> m RE
escapeWith = escapeWithOptions

-- | a variant of 'escapeWith' that allows an 'IsOption' RE option
-- to be specified
escapeWithOptions :: ( IsOption o, Functor m, Monad m,MonadFail m)
                  => o
                  -> (String->String)
                  -> String
                  -> m RE
escapeWithOptions o f = compileRegexWithOptions o . f . escapeREString


------------------------------------------------------------------------
-- Macro Standard Environment
------------------------------------------------------------------------

-- | the standard table of 'Macros' used to compile REs (which can be
-- extended or replace: see "Text.RE.TestBench")
prelude :: Macros RE
prelude = runIdentity $ preludeMacros mk regexType ExclCaptures
  where
    mk = Identity . unsafeCompileRegex_ RPM_raw noPreludeREOptions

-- | the standard 'MacroEnv' for this back end (see "Text.RE.TestBench")
preludeEnv :: MacroEnv
preludeEnv = preludeMacroEnv regexType

-- | the macros in the standard environment that are failing their tests
-- (checked by the test suite to be empty)
preludeTestsFailing :: [MacroID]
preludeTestsFailing = badMacros $ preludeMacroEnv regexType

-- | a table the standard macros in markdown format
preludeTable :: String
preludeTable = preludeMacroTable regexType

-- | a summary of the macros in the standard environment for this back
-- end in plain text
preludeSummary :: PreludeMacro -> String
preludeSummary = preludeMacroSummary regexType

-- | a listing of the RE text for each macro in the standard environment
-- with all macros expanded to normal form
preludeSources :: String
preludeSources = preludeMacroSources regexType

-- | the prelude source of a given macro in the standard environment
preludeSource :: PreludeMacro -> String
preludeSource = preludeMacroSource regexType


------------------------------------------------------------------------
-- Quasi Quoters
------------------------------------------------------------------------

-- | @[re| ... |]@, is equivalent to @[reMultilineSensitive| ... |]@,
-- compiling a case-sensitive, multi-line RE
re                      :: QuasiQuoter
re                       = re' $ Just minBound

-- | @[reMultilineSensitive| ... |]@, compiles a case-sensitive, multi-line RE
reMultilineSensitive    :: QuasiQuoter
reMultilineSensitive     = re' $ Just  MultilineSensitive

-- | @[reMultilineInsensitive| ... |]@, compiles a case-insensitive, multi-line RE
reMultilineInsensitive  :: QuasiQuoter
reMultilineInsensitive   = re' $ Just  MultilineInsensitive

-- | @[reMultilineInsensitive| ... |]@, compiles a case-sensitive, non-multi-line RE
reBlockSensitive        :: QuasiQuoter
reBlockSensitive         = re' $ Just  BlockSensitive

-- | @[reMultilineInsensitive| ... |]@, compiles a case-insensitive, non-multi-line RE
reBlockInsensitive      :: QuasiQuoter
reBlockInsensitive       = re' $ Just  BlockInsensitive

-- | @[reMS| ... |]@ is a shorthand for @[reMultilineSensitive| ... |]@
reMS                     :: QuasiQuoter
reMS                     = reMultilineSensitive

-- | @[reMI| ... |]@ is a shorthand for @[reMultilineInsensitive| ... |]@
reMI                    :: QuasiQuoter
reMI                     = reMultilineInsensitive

-- | @[reBS| ... |]@ is a shorthand for @[reBlockSensitive| ... |]@
reBS                    :: QuasiQuoter
reBS                     = reBlockSensitive

-- | @[reBI| ... |]@ is a shorthand for @[reBlockInsensitive| ... |]@
reBI                    :: QuasiQuoter
reBI                     = reBlockInsensitive

-- | @[re_| ... |]@ compiles a RE to produce a function that takes
-- the RE options (e.g., a 'SimpleREOptions' value) and yields the
-- RE compiled with those options. For example,
--
--   @countMatches $ s *=~ [re_|[0-9a-f]+|] MultilineInsensitive@
--
-- counts the number of hexadecimal digit strings in 's', allowing
-- for upper- or lower-case hex digits (which is entirely equivalent
-- in this example to just using @[reMultilineInsensitive|[0-9a-f]+|]@).
re_                     :: QuasiQuoter
re_                      = re'   Nothing


------------------------------------------------------------------------
-- re Helpers
------------------------------------------------------------------------

re' :: Maybe SimpleREOptions -> QuasiQuoter
re' mb = case mb of
  Nothing  ->
    (qq0 "re'")
      { quoteExp = parse minBound (\rs->[|flip unsafeCompileRegex rs|])
      }
  Just sro ->
    (qq0 "re'")
      { quoteExp = parse sro (\rs->[|unsafeCompileRegexSimple sro rs|])
      }
  where
    parse :: SimpleREOptions -> (String->Q Exp) -> String -> Q Exp
    parse sro mk rs = poss error (\_->mk rs) $ compileRegex_ RPM_qq os rs
      where
        os = unpackSimpleREOptions sro

data RegexParseMode
  = RPM_qq
  | RPM_raw
  deriving (Eq,Show)

unsafeCompileRegexSimple :: SimpleREOptions -> String -> RE
unsafeCompileRegexSimple sro re_s = unsafeCompileRegex_ RPM_qq os re_s
  where
    os = unpackSimpleREOptions sro

unsafeCompileRegex :: IsOption o
                   => o
                   -> String
                   -> RE
unsafeCompileRegex = unsafeCompileRegex_ RPM_qq . makeREOptions

unsafeCompileRegex_ :: RegexParseMode -> REOptions -> String -> RE
unsafeCompileRegex_ rpm os = poss oops id . compileRegex_ rpm os
  where
    oops = error . ("unsafeCompileRegex: " ++)

compileRegex_ :: (Functor m,MonadFail m,Monad m)
              => RegexParseMode
              -> REOptions
              -> String
              -> m RE
compileRegex_ rpm os re_s = uncurry mk <$> compileRegex' rpm os re_s
  where
    mk cnms rx =
      RE
        { _re_options = os
        , _re_source  = re_s
        , _re_cnames  = cnms
        , _re_regex   = rx
        }

compileRegex' :: (Functor m,MonadFail m,Monad m)
              => RegexParseMode
              -> REOptions
              -> String
              -> m (CaptureNames,Regex)
compileRegex' rpm REOptions{..} s0 = do
    ((_,cnms),s2) <- either fail return $ extractNamedCaptures s1
    (,) cnms <$> makeRegexOptsM optionsComp optionsExec s2
  where
    s1 = expandMacros reSource optionsMacs $ pp s0

    pp = case rpm of
      RPM_qq  -> qq_prep
      RPM_raw -> id


------------------------------------------------------------------------
-- Preprocessing Literal REs
------------------------------------------------------------------------

qq_prep :: String -> String
qq_prep s0 = case s0 of
    ""  -> ""
    c:s -> case c of
      '\\' -> backslash s
      _    -> c : qq_prep s
  where
    backslash s1 = case s1 of
      ""  -> "\\"
      c:s -> case c of
        'a'  -> '\a'    : qq_prep s
        'b'  -> '\b'    : qq_prep s
        'f'  -> '\f'    : qq_prep s
        'n'  -> '\n'    : qq_prep s
        'r'  -> '\r'    : qq_prep s
        't'  -> '\t'    : qq_prep s
        'v'  -> '\v'    : qq_prep s
        _    -> '\\': c : qq_prep s


------------------------------------------------------------------------
-- Options Helpers
------------------------------------------------------------------------

def_comp_option :: CompOption
def_comp_option = optionsComp defaultREOptions

def_exec_option :: ExecOption
def_exec_option = optionsExec defaultREOptions


------------------------------------------------------------------------
-- Haddock Sections
------------------------------------------------------------------------

-- $about
--
-- This module provides the regex PCRE back end. Most of the functions that
-- you will need for day to day use are provided by the primary API modules
-- (e.g., "Text.RE.TDFA.Text").