File: Regress.hs

package info (click to toggle)
haskell-tagsoup 0.6-1
  • links: PTS
  • area: main
  • in suites: lenny
  • size: 148 kB
  • ctags: 10
  • sloc: haskell: 1,063; makefile: 1
file content (143 lines) | stat: -rw-r--r-- 5,084 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143

module Example.Regress (regress) where

import Text.HTML.TagSoup
import Text.HTML.TagSoup.Entity
import qualified Text.HTML.TagSoup.Match as Match
import Control.Exception

-- * The Test Monad

data Test a = Pass
instance Monad Test where
    a >> b = a `seq` b
    return = error "No return for Monad Test"
    (>>=) = error "No bind (>>=) for Monad Test"
instance Show (Test a) where
    show x = x `seq` "All tests passed"

pass :: Test ()
pass = Pass

(===) :: (Show a, Eq a) => a -> a -> Test ()
a === b = if a == b then pass else fail $ "Does not equal: " ++ show a ++ " =/= " ++ show b

-- * The Main section

regress :: IO ()
regress = print $ do
    parseTests
    combiTests
    entityTests
    lazyTags == lazyTags `seq` pass
    matchCombinators


{- |
This routine tests the laziness of the TagSoup parser.
For each critical part of the parser we provide a test input
with a token of infinite size.
Then the output must be infinite too.
If the laziness is broken, then the output will stop early.
We collect the thousandth character of the output of each test case.
If computation of the list stops somewhere,
you have found a laziness stopper.
-}


lazyTags :: [Char]
lazyTags =
   map ((!!1000) . show . parseTags) $
      (cycle "Rhabarber") :
      (repeat '&') :
      ("<"++cycle "html") :
      ("<html "++cycle "na!me=value ") :
      ("<html name="++cycle "value") :
      ("<html name=\""++cycle "value") :
      ("<html name="++cycle "val!ue") :
      ("<html "++cycle "name") :
      ("</"++cycle "html") :
      ("<!-- "++cycle "comment") :
      ("<!"++cycle "doctype") :
      ("<!DOCTYPE"++cycle " description") :
      (cycle "1<2 ") :
      
      -- need further analysis
      ("<html name="++cycle "val&ue") :
      ("<html name="++cycle "va&l!ue") :
      ("&" ++ cycle "t") :
      (cycle "&amp; test") :

      -- i don't see how this can work unless the junk gets into the AST?
      --("</html "++cycle "junk") :

      []



matchCombinators :: Test ()
matchCombinators = assert (and tests) pass
    where
        tests =
            Match.tagText (const True) (TagText "test") :
            Match.tagText ("test"==) (TagText "test") :
            Match.tagText ("soup"/=) (TagText "test") :
            Match.tagOpenNameLit "table"
               (TagOpen "table" [("id", "name")]) :
            Match.tagOpenLit "table" (Match.anyAttrLit ("id", "name"))
               (TagOpen "table" [("id", "name")]) :
            Match.tagOpenLit "table" (Match.anyAttrNameLit "id")
               (TagOpen "table" [("id", "name")]) :
            not (Match.tagOpenLit "table" (Match.anyAttrLit ("id", "name"))
                  (TagOpen "table" [("id", "other name")])) :
            []


parseTests :: Test ()
parseTests = do
    parseTags "<!DOCTYPE TEST>" === [TagOpen "!DOCTYPE" [("TEST","")]]
    parseTags "<test \"foo bar\">" === [TagOpen "test" [("","foo bar")]]
    parseTags "<test \'foo bar\'>" === [TagOpen "test" [("","foo bar")]]
    parseTags "<:test \'foo bar\'>" === [TagOpen ":test" [("","foo bar")]]
    parseTags "hello &amp; world" === [TagText "hello & world"]
    parseTags "hello &#64; world" === [TagText "hello @ world"]
    parseTags "hello &#x40; world" === [TagText "hello @ world"]
    parseTags "hello &haskell; world" === [TagText "hello &haskell; world"]
    parseTags "hello \n\t world" === [TagText "hello \n\t world"]

    parseTags "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\" \"http://www.w3.org/TR/html4/strict.dtd\">" ===
        [TagOpen "!DOCTYPE" [("HTML",""),("PUBLIC",""),("","-//W3C//DTD HTML 4.01//EN"),("","http://www.w3.org/TR/html4/strict.dtd")]]
    parseTags "<script src=\"http://edge.jobthread.com/feeds/jobroll/?s_user_id=100540&subtype=slashdot\">" ===
        [TagOpen "script" [("src","http://edge.jobthread.com/feeds/jobroll/?s_user_id=100540&subtype=slashdot")]]

    parseTags "<a title='foo'bar' href=correct>text" === [TagOpen "a" [("title", "foo"),
                                                                       ("bar",   ""),
                                                                       ("href", "correct")],
                                                         TagText "text"]


entityTests :: Test ()
entityTests = do
    lookupNumericEntity "65" === Just 'A'
    lookupNumericEntity "x41" === Just 'A'
    lookupNumericEntity "x4E" === Just 'N'
    lookupNumericEntity "x4e" === Just 'N'
    lookupNumericEntity "Haskell" === Nothing
    lookupNumericEntity "" === Nothing
    lookupNumericEntity "89439085908539082" === Nothing
    lookupNamedEntity "amp" === Just '&'
    lookupNamedEntity "haskell" === Nothing
    escapeXMLChar 'a' === Nothing
    escapeXMLChar '&' === Just "amp"


combiTests :: Test ()
combiTests = do
    (TagText "test" ~== TagText ""    ) === True
    (TagText "test" ~== TagText "test") === True
    (TagText "test" ~== TagText "soup") === False
    (TagText "test" ~== "test") === True
    (TagOpen "test" [] ~== "<test>") === True
    (TagOpen "test" [] ~== "<soup>") === False
    (TagOpen "test" [] ~/= "<soup>") === True