File: test_node_encoding.rb

package info (click to toggle)
ruby-nokogiri 1.13.10%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 7,416 kB
  • sloc: ansic: 38,198; xml: 28,086; ruby: 22,271; java: 15,517; cpp: 7,037; yacc: 244; sh: 148; makefile: 136
file content (97 lines) | stat: -rw-r--r-- 2,674 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
# -*- coding: utf-8 -*-
# frozen_string_literal: true

require "helper"

module Nokogiri
  module HTML
    class TestNodeEncoding < Nokogiri::TestCase
      def setup
        super
        @html = Nokogiri::HTML(File.open(NICH_FILE, "rb"))
      end

      def test_get_attribute
        node = @html.css("a").first
        assert_equal("UTF-8", node["href"].encoding.name)
      end

      def test_text_encoding_is_utf_8
        assert_equal("UTF-8", @html.text.encoding.name)
      end

      def test_serialize_encoding_html
        if Nokogiri.uses_libxml? && !Nokogiri::VersionInfo.instance.libxml2_has_iconv?
          skip("libxml2 without iconv does not pass this test")
        end

        assert_equal(@html.encoding.downcase,
          @html.serialize.encoding.name.downcase)

        @doc = Nokogiri::HTML(@html.serialize)
        assert_equal(@html.serialize, @doc.serialize)
      end

      def test_default_encoding
        doc = Nokogiri::HTML(nil)
        assert_nil(doc.encoding)
        assert_equal("UTF-8", doc.serialize.encoding.name)
      end

      def test_encode_special_chars
        foo = @html.css("a").first.encode_special_chars("foo")
        assert_equal("UTF-8", foo.encoding.name)
      end

      def test_content
        node = @html.css("a").first
        assert_equal("UTF-8", node.content.encoding.name)
      end

      def test_name
        node = @html.css("a").first
        assert_equal("UTF-8", node.name.encoding.name)
      end

      def test_path
        node = @html.css("a").first
        assert_equal("UTF-8", node.path.encoding.name)
      end

      def test_inner_html
        doc = Nokogiri::HTML(File.open(SHIFT_JIS_HTML, "rb"))

        hello = "こんにちは"

        contents = doc.at("h2").inner_html
        assert_equal(doc.encoding, contents.encoding.name)
        assert_match(hello.encode("Shift_JIS"), contents)

        contents = doc.at("h2").inner_html(encoding: "UTF-8")
        assert_match(hello, contents)

        doc.encoding = "UTF-8"
        contents = doc.at("h2").inner_html
        assert_match(hello, contents)
      end

      def test_encoding_GH_1113
        doc = Nokogiri::HTML::Document.new
        hex = "<p>&#x1f340;</p>"
        decimal = "<p>&#127808;</p>"
        encoded = "<p>🍀</p>"

        doc.encoding = "UTF-8"
        [hex, decimal, encoded].each do |document|
          assert_equal(encoded, doc.fragment(document).to_s)
        end

        doc.encoding = "US-ASCII"
        expected = Nokogiri.jruby? ? hex : decimal
        [hex, decimal].each do |document|
          assert_equal(expected, doc.fragment(document).to_s)
        end
      end
    end
  end
end