File: tc_html_parser.rb

package info (click to toggle)
ruby-libxml 2.3.2-1
  • links: PTS, VCS
  • area: main
  • in suites: wheezy
  • size: 1,812 kB
  • sloc: xml: 9,628; ruby: 7,119; ansic: 6,665; makefile: 2
file content (153 lines) | stat: -rw-r--r-- 3,768 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
# encoding: UTF-8

require './test_helper'
require 'stringio'
require 'test/unit'

class HTMLParserTest < Test::Unit::TestCase
  def html_file
    File.expand_path(File.join(File.dirname(__FILE__), 'model/ruby-lang.html'))
  end

  # -----  Sources  ------
  def test_file
    xp = XML::HTMLParser.file(html_file)
    assert_instance_of(XML::HTMLParser, xp)
    doc = xp.parse
    assert_not_nil(doc)
  end

  def test_noexistent_file
    error = assert_raise(XML::Error) do
      XML::HTMLParser.file('i_dont_exist.xml')
    end

    assert_equal('Warning: failed to load external entity "i_dont_exist.xml".', error.to_s)
  end

  def test_nil_file
    error = assert_raise(TypeError) do
      XML::HTMLParser.file(nil)
    end

    assert_equal("can't convert nil into String", error.to_s)
  end

  def test_io
    File.open(html_file) do |io|
      xp = XML::HTMLParser.io(io)
      assert_instance_of(XML::HTMLParser, xp)

      doc = xp.parse
      assert_instance_of(XML::Document, doc)
    end
  end

  def test_io_gc
    # Test that the reader keeps a reference
    # to the io object
    file = File.open(html_file)
    parser = XML::HTMLParser.io(file)
    file = nil
    GC.start
    assert(parser.parse)
  end

  def test_nil_io
    error = assert_raise(TypeError) do
      XML::HTMLParser.io(nil)
    end

    assert_equal("Must pass in an IO object", error.to_s)
  end

  def test_string_io
    data = File.read(html_file)
    io = StringIO.new(data)
    xp = XML::HTMLParser.io(io)
    assert_instance_of(XML::HTMLParser, xp)

    doc = xp.parse
    assert_instance_of(XML::Document, doc)
  end

  def test_string
    str = '<html><body><p>hi</p></body></html>'
    xp = XML::HTMLParser.string(str)

    assert_instance_of(XML::HTMLParser, xp)
    assert_instance_of(XML::HTMLParser, xp)

    doc = xp.parse
    assert_instance_of(XML::Document, doc)
  end

  def test_nil_string
    error = assert_raise(TypeError) do
      XML::HTMLParser.string(nil)
    end

    assert_equal("wrong argument type nil (expected String)", error.to_s)
  end

  def test_parse
    html = <<-EOS
      <html>
        <head>
          <meta name=keywords content=nasty>
        </head>
        <body>Hello<br>World</html>
   EOS

    parser = XML::HTMLParser.string(html)
    doc = parser.parse
    assert_instance_of XML::Document, doc

    root = doc.root
    assert_instance_of XML::Node, root
    assert_equal 'html', root.name

    head = root.child
    assert_instance_of XML::Node, head
    assert_equal 'head', head.name

    meta = head.child
    assert_instance_of XML::Node, meta
    assert_equal 'meta', meta.name
    assert_equal 'keywords', meta[:name]
    assert_equal 'nasty', meta[:content]

    body = head.next
    assert_instance_of XML::Node, body
    assert_equal 'body', body.name

    hello = body.child
    # It appears that some versions of libxml2 add a layer of <p>
    # cant figure our why or how, so this skips it if there
    hello = hello.child if hello.name == "p"

    assert_instance_of XML::Node, hello
    assert_equal 'Hello', hello.content

    br = hello.next
    assert_instance_of XML::Node, br
    assert_equal 'br', br.name

    world = br.next
    assert_instance_of XML::Node, world
    assert_equal 'World', world.content
  end

  def test_no_implied
    html = "hello world"
    parser = XML::HTMLParser.string(html, :options => XML::HTMLParser::Options::NOIMPLIED)
    doc = parser.parse
    assert_equal("<p>#{html}</p>", doc.root.to_s)
  end

  def test_open_many_files
    1000.times do
      doc = XML::HTMLParser.file('model/ruby-lang.html').parse
    end
  end
end