1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197
|
{-# OPTIONS_GHC -fno-warn-name-shadowing #-}
-----------------------------------------------------------------------------
-- |
-- Module : Text.Regex
-- Copyright : (c) Chris Kuklewicz 2006, derived from (c) The University of Glasgow 2001
-- License : BSD-style (see the file LICENSE)
--
-- Maintainer : hvr@gnu.org
-- Stability : experimental
-- Portability : non-portable (regex-base needs MPTC+FD)
--
-- Regular expression matching. Uses the POSIX regular expression
-- interface in "Text.Regex.Posix".
--
---------------------------------------------------------------------------
--
-- Modified by Chris Kuklewicz to be a thin layer over the regex-posix
-- package, and moved into a regex-compat package.
--
module Text.Regex (
-- * Regular expressions
Regex,
mkRegex,
mkRegexWithOpts,
matchRegex,
matchRegexAll,
subRegex,
splitRegex
) where
import Data.Array((!))
import Data.Bits((.|.))
import Text.Regex.Base(RegexMaker(makeRegexOpts),defaultExecOpt,RegexLike(matchAll,matchAllText),RegexContext(matchM),MatchText)
import Text.Regex.Posix(Regex,compNewline,compIgnoreCase,compExtended)
-- | Makes a regular expression with the default options (multi-line,
-- case-sensitive). The syntax of regular expressions is
-- otherwise that of @egrep@ (i.e. POSIX \"extended\" regular
-- expressions).
mkRegex :: String -> Regex
mkRegex s = makeRegexOpts opt defaultExecOpt s
where opt = compExtended .|. compNewline
-- | Makes a regular expression, where the multi-line and
-- case-sensitive options can be changed from the default settings.
mkRegexWithOpts
:: String -- ^ The regular expression to compile.
-> Bool -- ^ 'True' iff @\'^\'@ and @\'$\'@ match the beginning and
-- end of individual lines respectively, and @\'.\'@ does /not/
-- match the newline character.
-> Bool -- ^ 'True' iff matching is case-sensitive.
-> Regex -- ^ Returns: the compiled regular expression.
mkRegexWithOpts s single_line case_sensitive
= let opt = (if single_line then (compNewline .|.) else id) .
(if case_sensitive then id else (compIgnoreCase .|.)) $
compExtended
in makeRegexOpts opt defaultExecOpt s
-- | Match a regular expression against a string.
matchRegex
:: Regex -- ^ The regular expression.
-> String -- ^ The string to match against.
-> Maybe [String] -- ^ Returns: @'Just' strs@ if the match succeeded
-- (and @strs@ is the list of subexpression matches),
-- or 'Nothing' otherwise.
matchRegex p str = fmap (\(_,_,_,str) -> str) (matchRegexAll p str)
-- | Match a regular expression against a string, returning more information
-- about the match.
matchRegexAll
:: Regex -- ^ The regular expression.
-> String -- ^ The string to match against.
-> Maybe ( String, String, String, [String] )
-- ^ Returns: 'Nothing' if the match failed, or:
--
-- > Just ( everything before match,
-- > portion matched,
-- > everything after the match,
-- > subexpression matches )
matchRegexAll p str = matchM p str
{- | Replaces every occurrence of the given regexp with the replacement string.
In the replacement string, @\"\\1\"@ refers to the first substring;
@\"\\2\"@ to the second, etc; and @\"\\0\"@ to the entire match.
@\"\\\\\\\\\"@ will insert a literal backslash.
This does not advance if the regex matches an empty string. This
misfeature is here to match the behavior of the original
@Text.Regex@ API.
-}
subRegex :: Regex -- ^ Search pattern
-> String -- ^ Input string
-> String -- ^ Replacement text
-> String -- ^ Output string
subRegex _ "" _ = ""
subRegex regexp inp repl =
let compile _i str [] = \ _m -> (str++)
compile i str (("\\",(off,len)):rest) =
let i' = off+len
pre = take (off-i) str
str' = drop (i'-i) str
in if null str' then \ _m -> (pre ++) . ('\\':)
else \ m -> (pre ++) . ('\\' :) . compile i' str' rest m
compile i str ((xstr,(off,len)):rest) =
let i' = off+len
pre = take (off-i) str
str' = drop (i'-i) str
x = read xstr
in if null str' then \ m -> (pre ++) . (fst (m ! x) ++)
else \ m -> (pre ++) . (fst (m ! x) ++) . compile i' str' rest m
compiled :: MatchText String -> String -> String
compiled = compile 0 repl findrefs where
-- bre matches a backslash then capture either a backslash or some digits
bre = mkRegex "\\\\(\\\\|[0-9]+)"
findrefs = map (\m -> (fst (m ! 1), snd (m ! 0))) (matchAllText bre repl)
go _i str [] = str
go i str (m:ms) =
let (_, (off, len)) = m ! 0
i' = off+len
pre = take (off-i) str
str' = drop (i'-i) str
in if null str' then pre ++ (compiled m "")
else pre ++ (compiled m (go i' str' ms))
in go 0 inp (matchAllText regexp inp)
{- | Splits a string based on a regular expression. The regular expression
should identify one delimiter.
This does not advance and produces an infinite list of @[]@ if the regex
matches an empty string. This misfeature is here to match the
behavior of the original @Text.Regex@ API.
-}
splitRegex :: Regex -> String -> [String]
splitRegex _ [] = []
splitRegex delim strIn =
let matches = map (! 0) (matchAll delim strIn)
go _i str [] = str : []
go i str ((off,len):rest) =
let i' = off+len
firstline = take (off-i) str
remainder = drop (i'-i) str
in seq i' $
if null remainder then [firstline,""]
else firstline : go i' remainder rest
in go 0 strIn matches
{-
-- These are the older versions which failed on (correct answer:)
-- let r = mkRegex "^(.)" in subRegex2 r "abc\ndef" "|\\1"
-- "|abc\n|def"
subRegex :: Regex -- ^ Search pattern
-> String -- ^ Input string
-> String -- ^ Replacement text
-> String -- ^ Output string
subRegex _ "" _ = ""
subRegex regexp inp repl =
let -- bre matches a backslash then capture either a backslash or some digits
bre = mkRegex "\\\\(\\\\|[0-9]+)"
lookup _ [] _ = []
lookup [] _ _ = []
lookup match repl groups =
case matchRegexAll bre repl of
Nothing -> repl
Just (lead, _, trail, bgroups) ->
let newval =
if (head bgroups) == "\\"
then "\\"
else let index :: Int
index = (read (head bgroups)) - 1
in if index == -1
then match
else groups !! index
in lead ++ newval ++ lookup match trail groups
in case matchRegexAll regexp inp of
Nothing -> inp
Just (lead, match, trail, groups) ->
lead ++ lookup match repl groups ++ (subRegex regexp trail repl)
splitRegex :: Regex -> String -> [String]
splitRegex _ [] = []
splitRegex delim strIn = loop strIn where
loop str = case matchOnceText delim str of
Nothing -> [str]
Just (firstline, _, remainder) ->
if null remainder
then [firstline,""]
else firstline : loop remainder
-}
|