File: Main.hs

package info (click to toggle)
haskell-xeno 0.6-4
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 212 kB
  • sloc: haskell: 1,324; xml: 120; makefile: 7
file content (199 lines) | stat: -rw-r--r-- 7,852 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
{-# LANGUAGE CPP               #-}
{-# LANGUAGE BangPatterns      #-}
{-# LANGUAGE OverloadedStrings #-}

-- | Simple test suite.

module Main where

import           Data.Either (isRight)
import           Data.ByteString (ByteString)
import qualified Data.ByteString as BS
import           Test.Hspec
import           Xeno.SAX  (validate, skipDoctype)
import           Xeno.DOM  (Node, Content(..), parse, name, contents, attributes, children)
import qualified Xeno.DOM.Robust as RDOM
import           Xeno.Types
import qualified Debug.Trace as Debug(trace)

main :: IO ()
main = hspec spec


spec :: SpecWith ()
spec = do
  describe "Xeno.DOM tests" $ do
    it "test 1" $ do
      xml <- BS.readFile "data/books-4kb.xml"
      let (Right dom) = parse xml
      (name dom) `shouldBe` "catalog"
      (length $ contents dom) `shouldBe` 25
      (length $ children dom) `shouldBe` 12
      (length $ allChildrens dom) `shouldBe` 84
      (length $ concatMap attributes $ allChildrens dom) `shouldBe` 12
      (concatMap attributes $ allChildrens dom) `shouldBe`
          [("id","bk101"),("id","bk102"),("id","bk103"),("id","bk104")
          ,("id","bk105"),("id","bk106"),("id","bk107"),("id","bk108")
          ,("id","bk109"),("id","bk110"),("id","bk111"),("id","bk112")]
      (map name $ allChildrens dom) `shouldBe`
          (replicate 12 "book" ++ (concat $
          replicate 12 ["author","title","genre","price","publish_date","description"]))
  describe "Xeno.DOM tests" $ do
    it "DOM from bytestring substring" $ do
      let substr = BS.drop 5 "5<8& <valid>xml<here/></valid>"
          parsedRoot = fromRightE $ parse substr
      name parsedRoot `shouldBe` "valid"

    it "Leading whitespace characters are accepted by parse" $ 
      isRight (parse "\n<a></a>") `shouldBe` True

    let doc =
              parse
                "<root><test id=\"1\" extra=\"2\" />\n<test id=\"2\" /><b><test id=\"3\" /></b><test id=\"4\" /><test /></root>"

    it "children test" $
      map name (children $ fromRightE doc) `shouldBe` ["test", "test", "b", "test", "test"]

    it "attributes" $ 
      attributes (head (children $ fromRightE doc)) `shouldBe` [("id", "1"), ("extra", "2")]

    it "xml prologue test" $ do
      let docWithPrologue = "<?xml version=\"1.1\"?>\n<greeting>Hello, world!</greeting>"
          parsedRoot = fromRightE $ Xeno.DOM.parse docWithPrologue
      name parsedRoot `shouldBe` "greeting"                

  describe
    "hexml tests"
    (do mapM_
          (\(v, i) -> it (show i) (shouldBe (validate i) v)) $ concat
          [ hexml_examples_sax
          , extra_examples_sax
#ifdef WHITESPACE_AROUND_EQUALS
          , ws_around_equals_sax
#endif
          ]
        mapM_
          (\(v, i) -> it (show i) (shouldBe (either (Left . show) (Right . id) (contents <$> parse i)) v))
          cdata_tests

       -- If this works without crashing we're happy.
        let nsdoc = ("<ns:tag os:attr=\"Namespaced attribute value\">Content.</ns:tag>" :: ByteString)
        it
          "namespaces" $
          validate nsdoc `shouldBe` True
    )
  describe "robust XML tests" $ do
    it "DOM from bytestring substring" $ do
        let substr = BS.drop 5 "5<8& <valid>xml<here/></valid>"
            parsedRoot = fromRightE $ RDOM.parse substr
        name parsedRoot `shouldBe` "valid"

    it "Leading whitespace characters are accepted by parse" $ 
      isRight (RDOM.parse "\n<a></a>") `shouldBe` True

    let doc =
              RDOM.parse
                "<root><test id=\"1\" extra=\"2\" />\n<test id=\"2\" /><b><test id=\"3\" /></b><test id=\"4\" /><test /></root>"

    it "children test" $
      map name (children $ fromRightE doc) `shouldBe` ["test", "test", "b", "test", "test"]

    it "attributes" $ 
      attributes (head (children $ fromRightE doc)) `shouldBe` [("id", "1"), ("extra", "2")]

    it "xml prologue test" $ do
      let docWithPrologue = "<?xml version=\"1.1\"?>\n<greeting>Hello, world!</greeting>"
          parsedRoot = fromRightE $ RDOM.parse docWithPrologue
      name parsedRoot `shouldBe` "greeting"
    it "html doctype test" $ do
      let docWithPrologue = "<!DOCTYPE html>\n<greeting>Hello, world!</greeting>"
          parsedRoot = fromRightE $ RDOM.parse docWithPrologue
      name parsedRoot `shouldBe` "greeting"

    describe
      "hexml tests"
      (do mapM_
            (\(v, i) -> it (show i) (shouldBe (validate i) v))
            (hexml_examples_sax  ++ extra_examples_sax)
          mapM_
            (\(v, i) -> it (show i) (shouldBe (either (Left . show) (Right . id) (contents <$> parse i)) v))
            cdata_tests

         -- If this works without crashing we're happy.
          let nsdoc = ("<ns:tag os:attr=\"Namespaced attribute value\">Content.</ns:tag>" :: ByteString)
          it
            "namespaces" $
            validate nsdoc `shouldBe` True
      )
    it "recovers unclosed tag" $ do
      let parsed = RDOM.parse "<a attr='a'><img></a>"
      Debug.trace (show parsed) $ do
        name (fromRightE parsed) `shouldBe` "a"
        RDOM.attributes (fromRightE parsed) `shouldBe` [("attr", "a")]
        map name (RDOM.children $ fromRightE parsed) `shouldBe` ["img"]
    it "ignores too many closing tags" $ do
      let parsed = RDOM.parse "<a></a></b></c>"
      isRight parsed `shouldBe` True
  describe "skipDoctype" $ do
    it "strips initial doctype declaration" $ do
      skipDoctype "<!DOCTYPE html><?xml version=\"1.0\" encoding=\"UTF-8\"?>Hello" `shouldBe` "<?xml version=\"1.0\" encoding=\"UTF-8\"?>Hello"
    it "strips doctype after spaces" $ do
      skipDoctype "  \n<!DOCTYPE html><?xml version=\"1.0\" encoding=\"UTF-8\"?>Hello" `shouldBe` "<?xml version=\"1.0\" encoding=\"UTF-8\"?>Hello"
    it "does not strip anything after or inside element" $ do
      let insideElt = "<xml><?xml version=\"1.0\" encoding=\"UTF-8\"?>Hello</xml>"
      skipDoctype  insideElt `shouldBe` insideElt

hexml_examples_sax :: [(Bool, ByteString)]
hexml_examples_sax =
    [(True, "<test id='bob'>here<extra/>there</test>")
    ,(True, "<test /><close />")
    ,(True, "<test /><!-- comment > --><close />")
    ,(True, "<test id=\"bob value\" another-attr=\"test with <\">here </test> more text at the end<close />")
    ,(True, "<test></more>") -- SAX doesn't care about tag balancing
    ,(False, "<test")
    ,(True, "<?xml version=\"1.1\"?>\n<greeting>Hello, world!</greeting>")
    ]

extra_examples_sax :: [(Bool, ByteString)]
extra_examples_sax =
    [(True, "<some-example/>")
    ,(True, "<a numeric1=\"attribute\"/>")
    ,(True, "<also.a.dot></also.a.dot>")
    ]

ws_around_equals_sax :: [(Bool, ByteString)]
ws_around_equals_sax =
    [(True, "<o  \nm   = \"100\"\n  gee =  \"0\">")
    ]

-- | We want to make sure that the parser doesn't jump out of the CDATA
-- area prematurely because it encounters a single ].
cdata_tests :: [(Either a [Content], ByteString)]
cdata_tests =
    [ ( Right [CData "Oneliner CDATA."]
      , "<test><![CDATA[Oneliner CDATA.]]></test>")
    , ( Right [CData "<strong>This is strong but not XML tags.</strong>"]
      , "<test><![CDATA[<strong>This is strong but not XML tags.</strong>]]></test>")
    , ( Right [CData "A lonely ], sad isn't it?"]
      , "<test><![CDATA[A lonely ], sad isn't it?]]></test>")
    ]

-- | Horrible hack. Don't try this at home.
fromRightE :: Either XenoException a -> a
fromRightE = either (error . show) id

mapLeft :: Applicative f => (a -> f b) -> Either a b -> f b
mapLeft f = either f pure

mapRight :: Applicative f => (b -> f a) -> Either a b -> f a
mapRight = either pure

allChildrens :: Node -> [Node]
allChildrens n = allChildrens' [n]
  where
    allChildrens' :: [Node] -> [Node]
    allChildrens' [] = []
    allChildrens' ns =
        let nextNodes = concatMap children ns
        in nextNodes ++ (allChildrens' nextNodes)