File: test_push_parser.rb

package info (click to toggle)
ruby-nokogiri 1.13.10%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 7,416 kB
  • sloc: ansic: 38,198; xml: 28,086; ruby: 22,271; java: 15,517; cpp: 7,037; yacc: 244; sh: 148; makefile: 136
file content (243 lines) | stat: -rw-r--r-- 6,832 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
# -*- coding: utf-8 -*-
# frozen_string_literal: true

require "helper"

class Nokogiri::SAX::TestCase
  describe Nokogiri::XML::SAX::PushParser do
    let(:parser) { Nokogiri::XML::SAX::PushParser.new(Doc.new) }

    it :test_exception do
      assert_raises(Nokogiri::XML::SyntaxError) do
        parser << "<foo /><foo />"
      end

      assert_raises(Nokogiri::XML::SyntaxError) do
        parser << nil
      end
    end

    it :test_early_finish do
      parser << "<foo>"
      assert_raises(Nokogiri::XML::SyntaxError) do
        parser.finish
      end
    end

    it :test_write_last_chunk do
      parser << "<foo>"
      parser.write("</foo>", true)
      assert_equal [["foo", []]], parser.document.start_elements
      assert_equal [["foo"]], parser.document.end_elements
    end

    it :test_empty_doc do
      parser.options |= Nokogiri::XML::ParseOptions::RECOVER
      parser.write("", true)
      assert_nil parser.document.start_elements
      assert_nil parser.document.end_elements
    end

    it :test_finish_should_rethrow_last_error do
      expected = assert_raises(Nokogiri::XML::SyntaxError) { parser << "</foo>" }
      actual = assert_raises(Nokogiri::XML::SyntaxError) { parser.finish }
      assert_equal actual.message, expected.message
    end

    it :test_should_throw_error_returned_by_document do
      doc = Doc.new
      class << doc
        def error(msg)
          raise "parse error"
        end
      end
      parser = Nokogiri::XML::SAX::PushParser.new(doc)

      exception = assert_raises(RuntimeError) { parser << "</foo>" }
      assert_equal("parse error", exception.message)
    end

    it :test_writing_nil do
      assert_equal parser.write(nil), parser
    end

    it :test_end_document_called do
      parser.<<(<<~EOF)
        <p id="asdfasdf">
          <!-- This is a comment -->
          Paragraph 1
        </p>
      EOF
      refute parser.document.end_document_called
      parser.finish
      assert parser.document.end_document_called
    end

    it :test_start_element do
      parser.<<(<<~EOF)
        <p id="asdfasdf">
      EOF

      assert_equal [["p", [["id", "asdfasdf"]]]],
        parser.document.start_elements

      parser.<<(<<~EOF)
          <!-- This is a comment -->
          Paragraph 1
        </p>
      EOF
      assert_equal [" This is a comment "], parser.document.comments
      parser.finish
    end

    it :test_start_element_with_namespaces do
      parser.<<(<<~EOF)
        <p xmlns:foo="http://foo.example.com/">
      EOF

      assert_equal [["p", [["xmlns:foo", "http://foo.example.com/"]]]],
        parser.document.start_elements

      parser.<<(<<~EOF)
          <!-- This is a comment -->
          Paragraph 1
        </p>
      EOF
      assert_equal [" This is a comment "], parser.document.comments
      parser.finish
    end

    it :test_start_element_ns do
      parser.<<(<<~EOF)
        <stream:stream xmlns='jabber:client' xmlns:stream='http://etherx.jabber.org/streams' version='1.0' size='large'></stream:stream>
      EOF

      assert_equal 1, parser.document.start_elements_namespace.length
      el = parser.document.start_elements_namespace.first

      assert_equal "stream", el.first
      assert_equal 2, el[1].length
      assert_equal [["version", "1.0"], ["size", "large"]],
        el[1].map { |x| [x.localname, x.value] }

      assert_equal "stream", el[2]
      assert_equal "http://etherx.jabber.org/streams", el[3]
      parser.finish
    end

    it :test_end_element_ns do
      parser.<<(<<~EOF)
        <stream:stream xmlns='jabber:client' xmlns:stream='http://etherx.jabber.org/streams' version='1.0'></stream:stream>
      EOF

      assert_equal [["stream", "stream", "http://etherx.jabber.org/streams"]],
        parser.document.end_elements_namespace
      parser.finish
    end

    it :test_chevron_partial_xml do
      parser.<<(<<~EOF)
        <p id="asdfasdf">
      EOF

      parser.<<(<<~EOF)
          <!-- This is a comment -->
          Paragraph 1
        </p>
      EOF
      assert_equal [" This is a comment "], parser.document.comments
      parser.finish
    end

    it :test_chevron do
      parser.<<(<<~EOF)
        <p id="asdfasdf">
          <!-- This is a comment -->
          Paragraph 1
        </p>
      EOF
      parser.finish
      assert_equal [" This is a comment "], parser.document.comments
    end

    it :test_default_options do
      assert_equal 0, parser.options
    end

    it :test_recover do
      parser.options |= Nokogiri::XML::ParseOptions::RECOVER
      parser.<<(<<~EOF)
        <p>
          Foo
          <bar>
          Bar
        </p>
      EOF
      parser.finish
      assert(parser.document.errors.size >= 1)
      assert_equal [["p", []], ["bar", []]], parser.document.start_elements
      assert_equal "FooBar", parser.document.data.map { |x|
        x.gsub(/\s/, "")
      }.join
    end

    it :test_broken_encoding do
      skip_unless_libxml2("ultra hard to fix for pure Java version")
      parser.options |= Nokogiri::XML::ParseOptions::RECOVER
      # This is ISO_8859-1:
      parser.<< "<?xml version='1.0' encoding='UTF-8'?><r>Gau\337</r>"
      parser.finish
      assert(parser.document.errors.size >= 1)
      assert_equal "Gau\337", parser.document.data.join
      assert_equal [["r"]], parser.document.end_elements
    end

    it :test_replace_entities_attribute_behavior do
      if Nokogiri.uses_libxml?
        # initially false
        refute parser.replace_entities

        # can be set to true
        parser.replace_entities = true
        assert parser.replace_entities

        # can be set to false
        parser.replace_entities = false
        refute parser.replace_entities
      else
        # initially true
        assert parser.replace_entities

        # ignore attempts to set to false
        parser.replace_entities = false # TODO: should we raise an exception here?
        assert parser.replace_entities
      end
    end

    it :test_untouched_entities do
      skip_unless_libxml2("entities are always replaced in pure Java version")
      parser.<<(<<~EOF)
        <p id="asdf&amp;asdf">
          <!-- This is a comment -->
          Paragraph 1 &amp; 2
        </p>
      EOF
      parser.finish
      assert_equal [["p", [["id", "asdf&#38;asdf"]]]], parser.document.start_elements
      assert_equal "Paragraph 1 & 2", parser.document.data.join.strip
    end

    it :test_replaced_entities do
      parser.replace_entities = true
      parser.<<(<<~EOF)
        <p id="asdf&amp;asdf">
          <!-- This is a comment -->
          Paragraph 1 &amp; 2
        </p>
      EOF
      parser.finish
      assert_equal [["p", [["id", "asdf&asdf"]]]], parser.document.start_elements
      assert_equal "Paragraph 1 & 2", parser.document.data.join.strip
    end
  end
end