1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199
|
{-# LANGUAGE CPP #-}
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE OverloadedStrings #-}
-- | Simple test suite.
module Main where
import Data.Either (isRight)
import Data.ByteString (ByteString)
import qualified Data.ByteString as BS
import Test.Hspec
import Xeno.SAX (validate, skipDoctype)
import Xeno.DOM (Node, Content(..), parse, name, contents, attributes, children)
import qualified Xeno.DOM.Robust as RDOM
import Xeno.Types
import qualified Debug.Trace as Debug(trace)
main :: IO ()
main = hspec spec
spec :: SpecWith ()
spec = do
describe "Xeno.DOM tests" $ do
it "test 1" $ do
xml <- BS.readFile "data/books-4kb.xml"
let (Right dom) = parse xml
(name dom) `shouldBe` "catalog"
(length $ contents dom) `shouldBe` 25
(length $ children dom) `shouldBe` 12
(length $ allChildrens dom) `shouldBe` 84
(length $ concatMap attributes $ allChildrens dom) `shouldBe` 12
(concatMap attributes $ allChildrens dom) `shouldBe`
[("id","bk101"),("id","bk102"),("id","bk103"),("id","bk104")
,("id","bk105"),("id","bk106"),("id","bk107"),("id","bk108")
,("id","bk109"),("id","bk110"),("id","bk111"),("id","bk112")]
(map name $ allChildrens dom) `shouldBe`
(replicate 12 "book" ++ (concat $
replicate 12 ["author","title","genre","price","publish_date","description"]))
describe "Xeno.DOM tests" $ do
it "DOM from bytestring substring" $ do
let substr = BS.drop 5 "5<8& <valid>xml<here/></valid>"
parsedRoot = fromRightE $ parse substr
name parsedRoot `shouldBe` "valid"
it "Leading whitespace characters are accepted by parse" $
isRight (parse "\n<a></a>") `shouldBe` True
let doc =
parse
"<root><test id=\"1\" extra=\"2\" />\n<test id=\"2\" /><b><test id=\"3\" /></b><test id=\"4\" /><test /></root>"
it "children test" $
map name (children $ fromRightE doc) `shouldBe` ["test", "test", "b", "test", "test"]
it "attributes" $
attributes (head (children $ fromRightE doc)) `shouldBe` [("id", "1"), ("extra", "2")]
it "xml prologue test" $ do
let docWithPrologue = "<?xml version=\"1.1\"?>\n<greeting>Hello, world!</greeting>"
parsedRoot = fromRightE $ Xeno.DOM.parse docWithPrologue
name parsedRoot `shouldBe` "greeting"
describe
"hexml tests"
(do mapM_
(\(v, i) -> it (show i) (shouldBe (validate i) v)) $ concat
[ hexml_examples_sax
, extra_examples_sax
#ifdef WHITESPACE_AROUND_EQUALS
, ws_around_equals_sax
#endif
]
mapM_
(\(v, i) -> it (show i) (shouldBe (either (Left . show) (Right . id) (contents <$> parse i)) v))
cdata_tests
-- If this works without crashing we're happy.
let nsdoc = ("<ns:tag os:attr=\"Namespaced attribute value\">Content.</ns:tag>" :: ByteString)
it
"namespaces" $
validate nsdoc `shouldBe` True
)
describe "robust XML tests" $ do
it "DOM from bytestring substring" $ do
let substr = BS.drop 5 "5<8& <valid>xml<here/></valid>"
parsedRoot = fromRightE $ RDOM.parse substr
name parsedRoot `shouldBe` "valid"
it "Leading whitespace characters are accepted by parse" $
isRight (RDOM.parse "\n<a></a>") `shouldBe` True
let doc =
RDOM.parse
"<root><test id=\"1\" extra=\"2\" />\n<test id=\"2\" /><b><test id=\"3\" /></b><test id=\"4\" /><test /></root>"
it "children test" $
map name (children $ fromRightE doc) `shouldBe` ["test", "test", "b", "test", "test"]
it "attributes" $
attributes (head (children $ fromRightE doc)) `shouldBe` [("id", "1"), ("extra", "2")]
it "xml prologue test" $ do
let docWithPrologue = "<?xml version=\"1.1\"?>\n<greeting>Hello, world!</greeting>"
parsedRoot = fromRightE $ RDOM.parse docWithPrologue
name parsedRoot `shouldBe` "greeting"
it "html doctype test" $ do
let docWithPrologue = "<!DOCTYPE html>\n<greeting>Hello, world!</greeting>"
parsedRoot = fromRightE $ RDOM.parse docWithPrologue
name parsedRoot `shouldBe` "greeting"
describe
"hexml tests"
(do mapM_
(\(v, i) -> it (show i) (shouldBe (validate i) v))
(hexml_examples_sax ++ extra_examples_sax)
mapM_
(\(v, i) -> it (show i) (shouldBe (either (Left . show) (Right . id) (contents <$> parse i)) v))
cdata_tests
-- If this works without crashing we're happy.
let nsdoc = ("<ns:tag os:attr=\"Namespaced attribute value\">Content.</ns:tag>" :: ByteString)
it
"namespaces" $
validate nsdoc `shouldBe` True
)
it "recovers unclosed tag" $ do
let parsed = RDOM.parse "<a attr='a'><img></a>"
Debug.trace (show parsed) $ do
name (fromRightE parsed) `shouldBe` "a"
RDOM.attributes (fromRightE parsed) `shouldBe` [("attr", "a")]
map name (RDOM.children $ fromRightE parsed) `shouldBe` ["img"]
it "ignores too many closing tags" $ do
let parsed = RDOM.parse "<a></a></b></c>"
isRight parsed `shouldBe` True
describe "skipDoctype" $ do
it "strips initial doctype declaration" $ do
skipDoctype "<!DOCTYPE html><?xml version=\"1.0\" encoding=\"UTF-8\"?>Hello" `shouldBe` "<?xml version=\"1.0\" encoding=\"UTF-8\"?>Hello"
it "strips doctype after spaces" $ do
skipDoctype " \n<!DOCTYPE html><?xml version=\"1.0\" encoding=\"UTF-8\"?>Hello" `shouldBe` "<?xml version=\"1.0\" encoding=\"UTF-8\"?>Hello"
it "does not strip anything after or inside element" $ do
let insideElt = "<xml><?xml version=\"1.0\" encoding=\"UTF-8\"?>Hello</xml>"
skipDoctype insideElt `shouldBe` insideElt
hexml_examples_sax :: [(Bool, ByteString)]
hexml_examples_sax =
[(True, "<test id='bob'>here<extra/>there</test>")
,(True, "<test /><close />")
,(True, "<test /><!-- comment > --><close />")
,(True, "<test id=\"bob value\" another-attr=\"test with <\">here </test> more text at the end<close />")
,(True, "<test></more>") -- SAX doesn't care about tag balancing
,(False, "<test")
,(True, "<?xml version=\"1.1\"?>\n<greeting>Hello, world!</greeting>")
]
extra_examples_sax :: [(Bool, ByteString)]
extra_examples_sax =
[(True, "<some-example/>")
,(True, "<a numeric1=\"attribute\"/>")
,(True, "<also.a.dot></also.a.dot>")
]
ws_around_equals_sax :: [(Bool, ByteString)]
ws_around_equals_sax =
[(True, "<o \nm = \"100\"\n gee = \"0\">")
]
-- | We want to make sure that the parser doesn't jump out of the CDATA
-- area prematurely because it encounters a single ].
cdata_tests :: [(Either a [Content], ByteString)]
cdata_tests =
[ ( Right [CData "Oneliner CDATA."]
, "<test><![CDATA[Oneliner CDATA.]]></test>")
, ( Right [CData "<strong>This is strong but not XML tags.</strong>"]
, "<test><![CDATA[<strong>This is strong but not XML tags.</strong>]]></test>")
, ( Right [CData "A lonely ], sad isn't it?"]
, "<test><![CDATA[A lonely ], sad isn't it?]]></test>")
]
-- | Horrible hack. Don't try this at home.
fromRightE :: Either XenoException a -> a
fromRightE = either (error . show) id
mapLeft :: Applicative f => (a -> f b) -> Either a b -> f b
mapLeft f = either f pure
mapRight :: Applicative f => (b -> f a) -> Either a b -> f a
mapRight = either pure
allChildrens :: Node -> [Node]
allChildrens n = allChildrens' [n]
where
allChildrens' :: [Node] -> [Node]
allChildrens' [] = []
allChildrens' ns =
let nextNodes = concatMap children ns
in nextNodes ++ (allChildrens' nextNodes)
|