1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143
|
module Example.Regress (regress) where
import Text.HTML.TagSoup
import Text.HTML.TagSoup.Entity
import qualified Text.HTML.TagSoup.Match as Match
import Control.Exception
-- * The Test Monad
data Test a = Pass
instance Monad Test where
a >> b = a `seq` b
return = error "No return for Monad Test"
(>>=) = error "No bind (>>=) for Monad Test"
instance Show (Test a) where
show x = x `seq` "All tests passed"
pass :: Test ()
pass = Pass
(===) :: (Show a, Eq a) => a -> a -> Test ()
a === b = if a == b then pass else fail $ "Does not equal: " ++ show a ++ " =/= " ++ show b
-- * The Main section
regress :: IO ()
regress = print $ do
parseTests
combiTests
entityTests
lazyTags == lazyTags `seq` pass
matchCombinators
{- |
This routine tests the laziness of the TagSoup parser.
For each critical part of the parser we provide a test input
with a token of infinite size.
Then the output must be infinite too.
If the laziness is broken, then the output will stop early.
We collect the thousandth character of the output of each test case.
If computation of the list stops somewhere,
you have found a laziness stopper.
-}
lazyTags :: [Char]
lazyTags =
map ((!!1000) . show . parseTags) $
(cycle "Rhabarber") :
(repeat '&') :
("<"++cycle "html") :
("<html "++cycle "na!me=value ") :
("<html name="++cycle "value") :
("<html name=\""++cycle "value") :
("<html name="++cycle "val!ue") :
("<html "++cycle "name") :
("</"++cycle "html") :
("<!-- "++cycle "comment") :
("<!"++cycle "doctype") :
("<!DOCTYPE"++cycle " description") :
(cycle "1<2 ") :
-- need further analysis
("<html name="++cycle "val&ue") :
("<html name="++cycle "va&l!ue") :
("&" ++ cycle "t") :
(cycle "& test") :
-- i don't see how this can work unless the junk gets into the AST?
--("</html "++cycle "junk") :
[]
matchCombinators :: Test ()
matchCombinators = assert (and tests) pass
where
tests =
Match.tagText (const True) (TagText "test") :
Match.tagText ("test"==) (TagText "test") :
Match.tagText ("soup"/=) (TagText "test") :
Match.tagOpenNameLit "table"
(TagOpen "table" [("id", "name")]) :
Match.tagOpenLit "table" (Match.anyAttrLit ("id", "name"))
(TagOpen "table" [("id", "name")]) :
Match.tagOpenLit "table" (Match.anyAttrNameLit "id")
(TagOpen "table" [("id", "name")]) :
not (Match.tagOpenLit "table" (Match.anyAttrLit ("id", "name"))
(TagOpen "table" [("id", "other name")])) :
[]
parseTests :: Test ()
parseTests = do
parseTags "<!DOCTYPE TEST>" === [TagOpen "!DOCTYPE" [("TEST","")]]
parseTags "<test \"foo bar\">" === [TagOpen "test" [("","foo bar")]]
parseTags "<test \'foo bar\'>" === [TagOpen "test" [("","foo bar")]]
parseTags "<:test \'foo bar\'>" === [TagOpen ":test" [("","foo bar")]]
parseTags "hello & world" === [TagText "hello & world"]
parseTags "hello @ world" === [TagText "hello @ world"]
parseTags "hello @ world" === [TagText "hello @ world"]
parseTags "hello &haskell; world" === [TagText "hello &haskell; world"]
parseTags "hello \n\t world" === [TagText "hello \n\t world"]
parseTags "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\" \"http://www.w3.org/TR/html4/strict.dtd\">" ===
[TagOpen "!DOCTYPE" [("HTML",""),("PUBLIC",""),("","-//W3C//DTD HTML 4.01//EN"),("","http://www.w3.org/TR/html4/strict.dtd")]]
parseTags "<script src=\"http://edge.jobthread.com/feeds/jobroll/?s_user_id=100540&subtype=slashdot\">" ===
[TagOpen "script" [("src","http://edge.jobthread.com/feeds/jobroll/?s_user_id=100540&subtype=slashdot")]]
parseTags "<a title='foo'bar' href=correct>text" === [TagOpen "a" [("title", "foo"),
("bar", ""),
("href", "correct")],
TagText "text"]
entityTests :: Test ()
entityTests = do
lookupNumericEntity "65" === Just 'A'
lookupNumericEntity "x41" === Just 'A'
lookupNumericEntity "x4E" === Just 'N'
lookupNumericEntity "x4e" === Just 'N'
lookupNumericEntity "Haskell" === Nothing
lookupNumericEntity "" === Nothing
lookupNumericEntity "89439085908539082" === Nothing
lookupNamedEntity "amp" === Just '&'
lookupNamedEntity "haskell" === Nothing
escapeXMLChar 'a' === Nothing
escapeXMLChar '&' === Just "amp"
combiTests :: Test ()
combiTests = do
(TagText "test" ~== TagText "" ) === True
(TagText "test" ~== TagText "test") === True
(TagText "test" ~== TagText "soup") === False
(TagText "test" ~== "test") === True
(TagOpen "test" [] ~== "<test>") === True
(TagOpen "test" [] ~== "<soup>") === False
(TagOpen "test" [] ~/= "<soup>") === True
|