File: test_node_encoding.rb

package info (click to toggle)
ruby-nokogiri 1.11.1%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 5,576 kB
  • sloc: xml: 28,086; ruby: 18,456; java: 13,067; ansic: 5,138; yacc: 265; sh: 208; makefile: 27
file content (95 lines) | stat: -rw-r--r-- 2,628 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
# -*- coding: utf-8 -*-
require "helper"

module Nokogiri
  module HTML
    class TestNodeEncoding < Nokogiri::TestCase
      def setup
        super
        @html = Nokogiri::HTML(File.open(NICH_FILE, "rb"))
      end

      def test_get_attribute
        node = @html.css('a').first
        assert_equal 'UTF-8', node['href'].encoding.name
      end

      def test_text_encoding_is_utf_8
        assert_equal 'UTF-8', @html.text.encoding.name
      end

      def test_serialize_encoding_html
        if Nokogiri.uses_libxml? && !Nokogiri::VersionInfo.instance.libxml2_has_iconv?
          skip "libxml2 without iconv does not pass this test"
        end

        assert_equal @html.encoding.downcase,
          @html.serialize.encoding.name.downcase

        @doc = Nokogiri::HTML(@html.serialize)
        assert_equal @html.serialize, @doc.serialize
      end

      def test_default_encoding
        doc = Nokogiri::HTML(nil)
        assert_nil doc.encoding
        assert_equal 'UTF-8', doc.serialize.encoding.name
      end

      def test_encode_special_chars
        foo = @html.css('a').first.encode_special_chars('foo')
        assert_equal 'UTF-8', foo.encoding.name
      end

      def test_content
        node = @html.css('a').first
        assert_equal 'UTF-8', node.content.encoding.name
      end

      def test_name
        node = @html.css('a').first
        assert_equal 'UTF-8', node.name.encoding.name
      end

      def test_path
        node = @html.css('a').first
        assert_equal 'UTF-8', node.path.encoding.name
      end

      def test_inner_html
        doc = Nokogiri::HTML File.open(SHIFT_JIS_HTML, 'rb')

        hello = "こんにちは"

        contents = doc.at('h2').inner_html
        assert_equal doc.encoding, contents.encoding.name
        assert_match hello.encode('Shift_JIS'), contents

        contents = doc.at('h2').inner_html(:encoding => 'UTF-8')
        assert_match hello, contents

        doc.encoding = 'UTF-8'
        contents = doc.at('h2').inner_html
        assert_match hello, contents
      end

      def test_encoding_GH_1113
        doc = Nokogiri::HTML::Document.new
        hex = '<p>&#x1f340;</p>'
        decimal = '<p>&#127808;</p>'
        encoded = '<p>🍀</p>'

        doc.encoding = 'UTF-8'
        [hex, decimal, encoded].each do |document|
          assert_equal encoded, doc.fragment(document).to_s
        end

        doc.encoding = 'US-ASCII'
        expected = Nokogiri.jruby? ? hex : decimal
        [hex, decimal].each do |document|
          assert_equal expected, doc.fragment(document).to_s
        end
      end
    end
  end
end