1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487
|
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE TypeSynonymInstances #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE CPP #-}
#if __GLASGOW_HASKELL__ >= 800
{-# OPTIONS_GHC -fno-warn-redundant-constraints #-}
{-# LANGUAGE TemplateHaskellQuotes #-}
#else
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE TemplateHaskell #-}
#endif
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# OPTIONS_GHC -fno-warn-unused-imports #-}
module Text.RE.ZeInternals.TDFA
( -- * About
-- $about
-- * RE Type
RE
, regexType
, reOptions
, reSource
, reCaptureNames
, reRegex
-- * IsOptions Class and REOptions Type
, IsOption(..)
, REOptions
, defaultREOptions
, noPreludeREOptions
, unpackSimpleREOptions
-- * Compiling Regular Expressions
, compileRegex
, compileRegexWith
, compileRegexWithOptions
, compileRegexWithOptionsForQQ
-- * Compiling Search-Replace Templates
, compileSearchReplace
, compileSearchReplaceWith
, compileSearchReplaceWithOptions
-- * Escaping String
, escape
, escapeWith
, escapeWithOptions
, escapeREString
-- * Macros Standard Environment
, prelude
, preludeEnv
, preludeTestsFailing
, preludeTable
, preludeSummary
, preludeSources
, preludeSource
-- * The Quasi Quoters
, re
, reMS
, reMI
, reBS
, reBI
, reMultilineSensitive
, reMultilineInsensitive
, reBlockSensitive
, reBlockInsensitive
, re_
, cp
) where
import Control.Monad.Fail
import Data.Functor.Identity
import Language.Haskell.TH
import Language.Haskell.TH.Quote
import Prelude.Compat hiding (fail)
import Text.RE.REOptions
import Text.RE.Replace
import Text.RE.TestBench
import Text.RE.Tools
import Text.RE.ZeInternals
import Text.RE.ZeInternals.Types.Poss
import Text.Regex.TDFA
-- | the RE type for this back end representing a well-formed, compiled
-- RE
data RE =
RE
{ _re_options :: !REOptions
, _re_source :: !String
, _re_cnames :: !CaptureNames
, _re_regex :: !Regex
}
-- | some functions in the "Text.RE.TestBench" need the back end to
-- be passed dynamically as a 'RegexType' parameters: use 'regexType'
-- for this
regexType :: RegexType
regexType =
mkTDFA $ \txt env md -> txt =~ mdRegexSource regexType ExclCaptures env md
-- | extract the 'REOptions' from the @RE@
reOptions :: RE -> REOptions
reOptions = _re_options
-- | extract the RE source string from the @RE@
reSource :: RE -> String
reSource = _re_source
-- | extract the 'CaptureNames' from the @RE@
reCaptureNames :: RE -> CaptureNames
reCaptureNames = _re_cnames
-- | extract the back end compiled 'Regex' type from the @RE@
reRegex :: RE -> Regex
reRegex = _re_regex
------------------------------------------------------------------------
-- IsOption & REOptions
------------------------------------------------------------------------
-- | a number of types can be used to encode 'REOptions_', each of which
-- is made a member of this class
class IsOption o where
-- | convert the @o@ type into an @REOptions@
makeREOptions :: o -> REOptions
-- | and the REOptions for this back end (see "Text.RE.REOptions"
-- for details)
type REOptions = REOptions_ RE CompOption ExecOption
instance IsOption SimpleREOptions where
makeREOptions = unpackSimpleREOptions
instance IsOption (Macros RE) where
makeREOptions ms = REOptions ms def_comp_option def_exec_option
instance IsOption CompOption where
makeREOptions co = REOptions prelude co def_exec_option
instance IsOption ExecOption where
makeREOptions eo = REOptions prelude def_comp_option eo
instance IsOption REOptions where
makeREOptions = id
instance IsOption () where
makeREOptions _ = unpackSimpleREOptions minBound
-- | the default 'REOptions'
defaultREOptions :: REOptions
defaultREOptions = makeREOptions (minBound::SimpleREOptions)
-- | the default 'REOptions' but with no RE macros defined
noPreludeREOptions :: REOptions
noPreludeREOptions = defaultREOptions { optionsMacs = emptyMacros }
-- | convert a universal 'SimpleReOptions' into the 'REOptions' used
-- by this back end
unpackSimpleREOptions :: SimpleREOptions -> REOptions
unpackSimpleREOptions sro =
REOptions
{ optionsMacs = prelude
, optionsComp = comp
, optionsExec = defaultExecOpt
}
where
comp = defaultCompOpt
{ caseSensitive = cs
, multiline = ml
}
(ml,cs) = case sro of
MultilineSensitive -> (,) True True
MultilineInsensitive -> (,) True False
BlockSensitive -> (,) False True
BlockInsensitive -> (,) False False
------------------------------------------------------------------------
-- Compiling Regular Expressions
------------------------------------------------------------------------
-- | compile a 'String' into a 'RE' with the default options,
-- generating an error if the RE is not well formed
compileRegex :: (Functor m,Monad m,MonadFail m) => String -> m RE
compileRegex = compileRegexWith minBound
-- | compile a 'String' into a 'RE' using the given @SimpleREOptions@,
-- generating an error if the RE is not well formed
compileRegexWith :: (Functor m,Monad m,MonadFail m) => SimpleREOptions -> String -> m RE
compileRegexWith = compileRegexWithOptions
-- | compile a 'String' into a 'RE' using the given @SimpleREOptions@,
-- generating an error if the RE is not well formed
compileRegexWithOptions :: (IsOption o, Functor m, Monad m, MonadFail m)
=> o
-> String
-> m RE
compileRegexWithOptions = compileRegex_ RPM_raw . makeREOptions
-- | compile a 'String' into a 'RE' for q quasi quoter, using the given
-- @SimpleREOptions@, generating an error if the RE is not well formed
compileRegexWithOptionsForQQ :: (IsOption o, Functor m, Monad m,MonadFail m)
=> o
-> String
-> m RE
compileRegexWithOptionsForQQ = compileRegex_ RPM_qq . makeREOptions
------------------------------------------------------------------------
-- Compiling Search Replace Templates
------------------------------------------------------------------------
-- | compile a SearchReplace template generating errors if the RE or
-- the template are not well formed, all capture references being checked
compileSearchReplace :: (Monad m,MonadFail m,Functor m,IsRegex RE s)
=> String
-> String
-> m (SearchReplace RE s)
compileSearchReplace = compileSearchReplaceWith minBound
-- | compile a SearchReplace template, with simple options, generating
-- errors if the RE or the template are not well formed, all capture
-- references being checked
compileSearchReplaceWith :: (Monad m,MonadFail m,Functor m,IsRegex RE s)
=> SimpleREOptions
-> String
-> String
-> m (SearchReplace RE s)
compileSearchReplaceWith sro = compileSearchAndReplace_ packR $ poss2either . compileRegexWith sro
-- | compile a SearchReplace template, with general options, generating
-- errors if the RE or the template are not well formed, all capture
-- references being checked
compileSearchReplaceWithOptions :: (Monad m,MonadFail m,Functor m,IsRegex RE s)
=> REOptions
-> String
-> String
-> m (SearchReplace RE s)
compileSearchReplaceWithOptions os = compileSearchAndReplace_ packR $ poss2either . compileRegexWithOptions os
------------------------------------------------------------------------
-- Escaping Strings
------------------------------------------------------------------------
-- | convert a string into a RE that matches that string, and apply it
-- to an argument continuation function to make up the RE string to be
-- compiled; e.g., to compile a RE that will only match the string:
--
-- @maybe undefined id . escape ((\"^\"++) . (++\"$\"))@
--
escape :: (Functor m,Monad m,MonadFail m)
=> (String->String)
-> String
-> m RE
escape = escapeWith minBound
-- | a variant of 'escape' where the 'SimpleREOptions' are specified
escapeWith :: (Functor m,Monad m,MonadFail m)
=> SimpleREOptions
-> (String->String)
-> String
-> m RE
escapeWith = escapeWithOptions
-- | a variant of 'escapeWith' that allows an 'IsOption' RE option
-- to be specified
escapeWithOptions :: ( IsOption o, Functor m, Monad m,MonadFail m)
=> o
-> (String->String)
-> String
-> m RE
escapeWithOptions o f = compileRegexWithOptions o . f . escapeREString
------------------------------------------------------------------------
-- Macro Standard Environment
------------------------------------------------------------------------
-- | the standard table of 'Macros' used to compile REs (which can be
-- extended or replace: see "Text.RE.TestBench")
prelude :: Macros RE
prelude = runIdentity $ preludeMacros mk regexType ExclCaptures
where
mk = Identity . unsafeCompileRegex_ RPM_raw noPreludeREOptions
-- | the standard 'MacroEnv' for this back end (see "Text.RE.TestBench")
preludeEnv :: MacroEnv
preludeEnv = preludeMacroEnv regexType
-- | the macros in the standard environment that are failing their tests
-- (checked by the test suite to be empty)
preludeTestsFailing :: [MacroID]
preludeTestsFailing = badMacros $ preludeMacroEnv regexType
-- | a table the standard macros in markdown format
preludeTable :: String
preludeTable = preludeMacroTable regexType
-- | a summary of the macros in the standard environment for this back
-- end in plain text
preludeSummary :: PreludeMacro -> String
preludeSummary = preludeMacroSummary regexType
-- | a listing of the RE text for each macro in the standard environment
-- with all macros expanded to normal form
preludeSources :: String
preludeSources = preludeMacroSources regexType
-- | the prelude source of a given macro in the standard environment
preludeSource :: PreludeMacro -> String
preludeSource = preludeMacroSource regexType
------------------------------------------------------------------------
-- Quasi Quoters
------------------------------------------------------------------------
-- | @[re| ... |]@, is equivalent to @[reMultilineSensitive| ... |]@,
-- compiling a case-sensitive, multi-line RE
re :: QuasiQuoter
re = re' $ Just minBound
-- | @[reMultilineSensitive| ... |]@, compiles a case-sensitive, multi-line RE
reMultilineSensitive :: QuasiQuoter
reMultilineSensitive = re' $ Just MultilineSensitive
-- | @[reMultilineInsensitive| ... |]@, compiles a case-insensitive, multi-line RE
reMultilineInsensitive :: QuasiQuoter
reMultilineInsensitive = re' $ Just MultilineInsensitive
-- | @[reMultilineInsensitive| ... |]@, compiles a case-sensitive, non-multi-line RE
reBlockSensitive :: QuasiQuoter
reBlockSensitive = re' $ Just BlockSensitive
-- | @[reMultilineInsensitive| ... |]@, compiles a case-insensitive, non-multi-line RE
reBlockInsensitive :: QuasiQuoter
reBlockInsensitive = re' $ Just BlockInsensitive
-- | @[reMS| ... |]@ is a shorthand for @[reMultilineSensitive| ... |]@
reMS :: QuasiQuoter
reMS = reMultilineSensitive
-- | @[reMI| ... |]@ is a shorthand for @[reMultilineInsensitive| ... |]@
reMI :: QuasiQuoter
reMI = reMultilineInsensitive
-- | @[reBS| ... |]@ is a shorthand for @[reBlockSensitive| ... |]@
reBS :: QuasiQuoter
reBS = reBlockSensitive
-- | @[reBI| ... |]@ is a shorthand for @[reBlockInsensitive| ... |]@
reBI :: QuasiQuoter
reBI = reBlockInsensitive
-- | @[re_| ... |]@ compiles a RE to produce a function that takes
-- the RE options (e.g., a 'SimpleREOptions' value) and yields the
-- RE compiled with those options. For example,
--
-- @countMatches $ s *=~ [re_|[0-9a-f]+|] MultilineInsensitive@
--
-- counts the number of hexadecimal digit strings in 's', allowing
-- for upper- or lower-case hex digits (which is entirely equivalent
-- in this example to just using @[reMultilineInsensitive|[0-9a-f]+|]@).
re_ :: QuasiQuoter
re_ = re' Nothing
------------------------------------------------------------------------
-- re Helpers
------------------------------------------------------------------------
re' :: Maybe SimpleREOptions -> QuasiQuoter
re' mb = case mb of
Nothing ->
(qq0 "re'")
{ quoteExp = parse minBound (\rs->[|flip unsafeCompileRegex rs|])
}
Just sro ->
(qq0 "re'")
{ quoteExp = parse sro (\rs->[|unsafeCompileRegexSimple sro rs|])
}
where
parse :: SimpleREOptions -> (String->Q Exp) -> String -> Q Exp
parse sro mk rs = poss error (\_->mk rs) $ compileRegex_ RPM_qq os rs
where
os = unpackSimpleREOptions sro
data RegexParseMode
= RPM_qq
| RPM_raw
deriving (Eq,Show)
unsafeCompileRegexSimple :: SimpleREOptions -> String -> RE
unsafeCompileRegexSimple sro re_s = unsafeCompileRegex_ RPM_qq os re_s
where
os = unpackSimpleREOptions sro
unsafeCompileRegex :: IsOption o
=> o
-> String
-> RE
unsafeCompileRegex = unsafeCompileRegex_ RPM_qq . makeREOptions
unsafeCompileRegex_ :: RegexParseMode -> REOptions -> String -> RE
unsafeCompileRegex_ rpm os = poss oops id . compileRegex_ rpm os
where
oops = error . ("unsafeCompileRegex: " ++)
compileRegex_ :: (Functor m,MonadFail m,Monad m)
=> RegexParseMode
-> REOptions
-> String
-> m RE
compileRegex_ rpm os re_s = uncurry mk <$> compileRegex' rpm os re_s
where
mk cnms rx =
RE
{ _re_options = os
, _re_source = re_s
, _re_cnames = cnms
, _re_regex = rx
}
compileRegex' :: (Functor m,MonadFail m,Monad m)
=> RegexParseMode
-> REOptions
-> String
-> m (CaptureNames,Regex)
compileRegex' rpm REOptions{..} s0 = do
((_,cnms),s2) <- either fail return $ extractNamedCaptures s1
(,) cnms <$> makeRegexOptsM optionsComp optionsExec s2
where
s1 = expandMacros reSource optionsMacs $ pp s0
pp = case rpm of
RPM_qq -> qq_prep
RPM_raw -> id
------------------------------------------------------------------------
-- Preprocessing Literal REs
------------------------------------------------------------------------
qq_prep :: String -> String
qq_prep s0 = case s0 of
"" -> ""
c:s -> case c of
'\\' -> backslash s
_ -> c : qq_prep s
where
backslash s1 = case s1 of
"" -> "\\"
c:s -> case c of
'a' -> '\a' : qq_prep s
'b' -> '\b' : qq_prep s
'f' -> '\f' : qq_prep s
'n' -> '\n' : qq_prep s
'r' -> '\r' : qq_prep s
't' -> '\t' : qq_prep s
'v' -> '\v' : qq_prep s
_ -> '\\': c : qq_prep s
------------------------------------------------------------------------
-- Options Helpers
------------------------------------------------------------------------
def_comp_option :: CompOption
def_comp_option = optionsComp defaultREOptions
def_exec_option :: ExecOption
def_exec_option = optionsExec defaultREOptions
------------------------------------------------------------------------
-- Haddock Sections
------------------------------------------------------------------------
-- $about
--
-- This module provides the regex PCRE back end. Most of the functions that
-- you will need for day to day use are provided by the primary API modules
-- (e.g., "Text.RE.TDFA.Text").
|