File: Regex.hs

package info (click to toggle)
haskell-regex-compat 0.95.2.1-3
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 84 kB
  • sloc: haskell: 85; makefile: 3
file content (197 lines) | stat: -rw-r--r-- 7,644 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
{-# OPTIONS_GHC -fno-warn-name-shadowing #-}
-----------------------------------------------------------------------------
-- |
-- Module      :  Text.Regex
-- Copyright   :  (c) Chris Kuklewicz 2006, derived from (c) The University of Glasgow 2001
-- License     :  BSD-style (see the file LICENSE)
--
-- Maintainer  :  hvr@gnu.org
-- Stability   :  experimental
-- Portability :  non-portable (regex-base needs MPTC+FD)
--
-- Regular expression matching.  Uses the POSIX regular expression
-- interface in "Text.Regex.Posix".
--
---------------------------------------------------------------------------

--
-- Modified by Chris Kuklewicz to be a thin layer over the regex-posix
-- package, and moved into a regex-compat package.
--
module Text.Regex (
    -- * Regular expressions
    Regex,
    mkRegex,
    mkRegexWithOpts,
    matchRegex,
    matchRegexAll,
    subRegex,
    splitRegex
  ) where

import Data.Array((!))
import Data.Bits((.|.))
import Text.Regex.Base(RegexMaker(makeRegexOpts),defaultExecOpt,RegexLike(matchAll,matchAllText),RegexContext(matchM),MatchText)
import Text.Regex.Posix(Regex,compNewline,compIgnoreCase,compExtended)

-- | Makes a regular expression with the default options (multi-line,
-- case-sensitive).  The syntax of regular expressions is
-- otherwise that of @egrep@ (i.e. POSIX \"extended\" regular
-- expressions).
mkRegex :: String -> Regex
mkRegex s = makeRegexOpts opt defaultExecOpt s
  where opt = compExtended .|. compNewline

-- | Makes a regular expression, where the multi-line and
-- case-sensitive options can be changed from the default settings.
mkRegexWithOpts
   :: String  -- ^ The regular expression to compile.
   -> Bool    -- ^ 'True' iff @\'^\'@ and @\'$\'@ match the beginning and
              -- end of individual lines respectively, and @\'.\'@ does /not/
              -- match the newline character.
   -> Bool    -- ^ 'True' iff matching is case-sensitive.
   -> Regex   -- ^ Returns: the compiled regular expression.

mkRegexWithOpts s single_line case_sensitive
  = let opt = (if single_line then (compNewline .|.) else id) .
              (if case_sensitive then id else (compIgnoreCase .|.)) $
              compExtended
    in makeRegexOpts opt defaultExecOpt s

-- | Match a regular expression against a string.
matchRegex
   :: Regex     -- ^ The regular expression.
   -> String    -- ^ The string to match against.
   -> Maybe [String]    -- ^ Returns: @'Just' strs@ if the match succeeded
                        -- (and @strs@ is the list of subexpression matches),
                        -- or 'Nothing' otherwise.
matchRegex p str = fmap (\(_,_,_,str) -> str) (matchRegexAll p str)

-- | Match a regular expression against a string, returning more information
-- about the match.
matchRegexAll
   :: Regex     -- ^ The regular expression.
   -> String    -- ^ The string to match against.
   -> Maybe ( String, String, String, [String] )
                -- ^ Returns: 'Nothing' if the match failed, or:
                --
                -- >  Just ( everything before match,
                -- >         portion matched,
                -- >         everything after the match,
                -- >         subexpression matches )

matchRegexAll p str = matchM p str

{- | Replaces every occurrence of the given regexp with the replacement string.

In the replacement string, @\"\\1\"@ refers to the first substring;
@\"\\2\"@ to the second, etc; and @\"\\0\"@ to the entire match.
@\"\\\\\\\\\"@ will insert a literal backslash.

This does not advance if the regex matches an empty string.  This
misfeature is here to match the behavior of the original
@Text.Regex@ API.
-}

subRegex :: Regex                          -- ^ Search pattern
         -> String                         -- ^ Input string
         -> String                         -- ^ Replacement text
         -> String                         -- ^ Output string
subRegex _ "" _ = ""
subRegex regexp inp repl =
  let compile _i str [] = \ _m ->  (str++)
      compile i str (("\\",(off,len)):rest) =
        let i' = off+len
            pre = take (off-i) str
            str' = drop (i'-i) str
        in if null str' then \ _m -> (pre ++) . ('\\':)
             else \  m -> (pre ++) . ('\\' :) . compile i' str' rest m
      compile i str ((xstr,(off,len)):rest) =
        let i' = off+len
            pre = take (off-i) str
            str' = drop (i'-i) str
            x = read xstr
        in if null str' then \ m -> (pre ++) . (fst (m ! x) ++)
             else \ m -> (pre ++) . (fst (m ! x) ++) . compile i' str' rest m
      compiled :: MatchText String -> String -> String
      compiled = compile 0 repl findrefs where
        -- bre matches a backslash then capture either a backslash or some digits
        bre = mkRegex "\\\\(\\\\|[0-9]+)"
        findrefs = map (\m -> (fst (m ! 1), snd (m ! 0))) (matchAllText bre repl)
      go _i str [] = str
      go i str (m:ms) =
        let (_, (off, len)) = m ! 0
            i' = off+len
            pre = take (off-i) str
            str' = drop (i'-i) str
        in if null str' then pre ++ (compiled m "")
             else pre ++ (compiled m (go i' str' ms))
  in go 0 inp (matchAllText regexp inp)

{- | Splits a string based on a regular expression.  The regular expression
should identify one delimiter.

This does not advance and produces an infinite list of @[]@ if the regex
matches an empty string.  This misfeature is here to match the
behavior of the original @Text.Regex@ API.
-}

splitRegex :: Regex -> String -> [String]
splitRegex _ [] = []
splitRegex delim strIn =
  let matches = map (! 0) (matchAll delim strIn)
      go _i str [] = str : []
      go i str ((off,len):rest) =
        let i' = off+len
            firstline = take (off-i) str
            remainder = drop (i'-i) str
        in seq i' $
           if null remainder then [firstline,""]
             else firstline : go i' remainder rest
  in go 0 strIn matches

{-

-- These are the older versions which failed on (correct answer:)
-- let r = mkRegex "^(.)" in subRegex2 r "abc\ndef" "|\\1"
-- "|abc\n|def"

subRegex :: Regex                          -- ^ Search pattern
      -> String                         -- ^ Input string
      -> String                         -- ^ Replacement text
      -> String                         -- ^ Output string
subRegex _ "" _ = ""
subRegex regexp inp repl =
  let -- bre matches a backslash then capture either a backslash or some digits
      bre = mkRegex "\\\\(\\\\|[0-9]+)"
      lookup _ [] _ = []
      lookup [] _ _ = []
      lookup match repl groups =
        case matchRegexAll bre repl of
          Nothing -> repl
          Just (lead, _, trail, bgroups) ->
            let newval =
                 if (head bgroups) == "\\"
                   then "\\"
                   else let index :: Int
                            index = (read (head bgroups)) - 1
                        in if index == -1
                             then match
                             else groups !! index
            in lead ++ newval ++ lookup match trail groups
  in case matchRegexAll regexp inp of
       Nothing -> inp
       Just (lead, match, trail, groups) ->
         lead ++ lookup match repl groups ++ (subRegex regexp trail repl)

splitRegex :: Regex -> String -> [String]
splitRegex _ [] = []
splitRegex delim strIn = loop strIn where
  loop str = case matchOnceText delim str of
                Nothing -> [str]
                Just (firstline, _, remainder) ->
                  if null remainder
                    then [firstline,""]
                    else firstline : loop remainder

-}