File: Main.hs

package info (click to toggle)
haskell-regex-tdfa 1.3.2.2-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 436 kB
  • sloc: haskell: 4,250; makefile: 3
file content (209 lines) | stat: -rw-r--r-- 7,989 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
{-# LANGUAGE CPP                        #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE MonoLocalBinds             #-}

#if __GLASGOW_HASKELL__ >= 902
{-# OPTIONS_GHC -Wno-incomplete-uni-patterns #-}
#endif

module Main where

import           Control.Applicative  as App
import           Control.Monad
import qualified Control.Monad.Fail   as Fail
import           Data.Array
import qualified Data.ByteString      as BS
import qualified Data.ByteString.UTF8 as UTF8
import           Data.List            (isInfixOf, mapAccumL, sort)
import           Data.String
import           Data.Typeable
import           Data.Version         ()
import           System.Directory     (getDirectoryContents)
import           System.Environment
import           System.Exit
import           System.FilePath      ((</>))
import           Text.Regex.Base

import qualified Text.Regex.TDFA      as TDFA

default(Int)

type RSource = String
type RType = String -- can be changed to any Extract instance
newtype RegexSource = RegexSource {unSource :: RSource} deriving Show
newtype RegexStringOf a = RegexString {unString :: a} deriving Show
type RegexString = RegexStringOf RType

dictionary :: [Char]
dictionary = ['a'..'c']++['A'..'C']++"_"


type A = Array Int (Int,Int)

maxItems :: Int
maxItems=100

testOne :: t -> (t -> t1 -> Array Int (Int, Int)) -> t1 -> String
testOne s op r =
  let foo ::  String
      foo = concatMap (\(o,l) -> show (o,(o+l))) (take maxItems $ elems (op s r :: Array Int (Int,Int)))
  in if null foo then "NOMATCH" else foo

testOne' :: A -> String
testOne' input =
  let foo ::  String
      foo = concatMap (\(o,l) -> show (o,(o+l))) (take maxItems $ elems input)
  in if null foo then "NOMATCH" else foo

toTest :: String -> (Int,String,String,String)
toTest line = let [n,regex,input,output] = words line
                  noQ []       = []
                  noQ ('?':xs) = '-':'1':noQ xs
                  noQ (x:xs)   = x:noQ xs
                  input' = if input == "NULL" then "" else unN input
              in (read n,regex,input',noQ output)

toTest' :: String -> String -> (String,(Int,String,String,String))
toTest' oldRegex line =
  let [n,regex,input,output] = words line
      noQ []       = []
      noQ ('?':xs) = '-':'1':noQ xs
      noQ (x:xs)   = x:noQ xs
      input' = if input == "NULL" then "" else input
      regex' = if regex == "SAME" then oldRegex else regex
  in (regex',(read n,regex',input',noQ output))

load,load' :: String -> [(Int, String, String, String)]
load = map toTest . lines
load' = snd . mapAccumL toTest' "X_X_X_" . lines

checkTest :: PFT A -> (Int,String,String,String) -> IO [Int]
checkTest opM (n,regex,input,output) = do
  let Result output'e = opM input regex
      p = putStrLn
  p ""
  case output'e of
    Left msg -> do
      p ("############################# Unexpected Error # "++show n ++ " #############################" )
      p ("Searched text: "++show input)
      p ("Regex pattern: "++show regex)
      p ("Expected output: "++show output)
      p ("Error message: "++msg)
      return [n]
    Right output'a -> do
      let output' = testOne' output'a
      case (n<0 , output==output') of
        (False,True) -> p ("Expected Pass #"++show n)
        (False,False) -> p ("############################# Unexpected Fail # "++show n ++ " #############################" )
        (True,True) -> p ("############################# Unexpected Pass # "++show n ++ " #############################" )
        (True,False) ->  p ("Expected Fail #"++show n)
      if (output == output')
        then do p ("text and pattern: "++show input)
                p ("Regex pattern: "++show regex)
                p ("Outputs agree: "++show output)
                return (if n<0 then [n] else [])
        else do p ""
                p ("Searched text: "++show input)
                p ("Regex pattern: "++show regex)
                p ("Expected output: "++show output)
                p ("Actual result  : "++show output')
                return (if n<0 then [] else [n])

checkFile :: (RType -> RSource -> Result A) -> (FilePath, String) -> IO (FilePath,[Int])
checkFile opM (filepath, contents) = do
  putStrLn $ "\nUsing Tests from: "++filepath
  vals <- liftM concat (mapM (checkTest opM) (load' contents))
  return (filepath,vals)

checkTests :: (RType -> RSource -> Result A) -> [(FilePath,String)] -> IO [(String, [Int])]
checkTests opM testCases = mapM (checkFile opM) testCases

readTestCases :: FilePath -> IO [(String, String)]
readTestCases folder = do
  fns <- filter (".txt" `isInfixOf`) <$> getDirectoryContents folder
  when (null fns) $
    fail ("readTestCases: No test-cases found in " ++ show folder)
  forM (sort fns) $ \fn -> do
    bs <- BS.readFile (folder </> fn)
    return (fn, UTF8.toString bs)

newtype Result a = Result (Either String a)
  deriving (Eq, Show, Functor, App.Applicative, Monad)

instance Fail.MonadFail Result where
  fail = Result . Left

type PFT a = RegexContext TDFA.Regex RType a => RType -> RSource -> Result a

posix :: PFT a
posix x reg =
  let q :: Result TDFA.Regex
      q = makeRegexOptsM (defaultCompOpt { TDFA.caseSensitive = False}) defaultExecOpt reg
  in q >>= \ s -> return (match s x)

unN :: String -> String
unN ('\\':'n':xs) = '\n':unN xs
unN (x:xs)        = x:unN xs
unN []            = []

manual :: [String] -> IO ()
manual [sIn,rIn] = do
  let s :: RType
      r :: String
      s = fromString (unN sIn)
      r = (unN rIn)
  -- first match
  let r1 :: TDFA.Regex
      r1 = makeRegex r
  let b1u@(_,_b1s,_,_)=(match r1 s :: (RType,RType,RType,[RType]))
  putStrLn ("Searched text: "++show s)
  putStrLn ("Regex pattern: "++show r)
  print b1u
  -- multiple matches and counting
  let b1 = (match r1 s :: [MatchArray])
      c1 = (match r1 s :: Int)
  putStrLn $ "Count of matches = "++show c1
  putStrLn $ "Matches found = "++show (length b1)
  mapM_ (putStrLn . testOne') b1
manual _ = error "wrong arguments to regex-posix-unittest's manual function"

main :: IO ()
main = do
  putStr "Testing Text.Regex.TDFA version: "
  print TDFA.getVersion_Text_Regex_TDFA
  a <- getArgs
  if length a == 2
    then manual a
    else do
      putStrLn $ "Explanation and discussion of these tests on the wiki at http://www.haskell.org/haskellwiki/Regex_Posix including comparing results from different operating systems"
      putStrLn $ "Questions about this package to the author at email <TextRegexLazy@personal.mightyreason.com>"
      putStrLn $ "The type of both the pattern and test is " ++ show (typeOf (undefined :: RType))
      putStrLn $ "Without extactly two arguments:"
      putStrLn $ "    This program runs all test files listed in test/data-dir/test-manifest.txt"
      putStrLn $ "    Lines with negative number are expected to fail, others are expected to pass."
      putStrLn $ "With exactly two arguments:"
      putStrLn $ "    The first argument is the text to be searched."
      putStrLn $ "    The second argument is the regular expression pattern to search with."
      vals <- checkTests posix =<< readTestCases ("test" </> "cases")
      if null (concatMap snd vals)
        then putStrLn "\nWow, all the tests passed!"
        else do
          putStrLn $ "\nBoo, tests failed!\n"++unlines (map show vals)
          exitFailure

{-
-- for TRE
posix x r = let q :: Posix.Regex
                q = makeRegexOpts (defaultCompOpt .|. Posix.compRightAssoc .|. Posix.compIgnoreCase) defaultExecOpt r
            in match q x

tdfa x r = let q :: TDFA.Wrap.Regex
               q = makeRegexOpts (defaultCompOpt { TDFA.Wrap.caseSensitive = False
                                                 , TDFA.Wrap.rightAssoc = True }) defaultExecOpt r
           in match q x

tdfa2 x r = let q :: TDFA2.Wrap.Regex
                q = makeRegexOpts (defaultCompOpt { TDFA2.Wrap.caseSensitive = False
                                                  , TDFA2.Wrap.rightAssoc = True }) defaultExecOpt r
            in match q x
-}