File: test_serialization_encoding.rb

package info (click to toggle)
ruby-nokogiri 1.18.10%2Bdfsg-3
  • links: PTS, VCS
  • area: main
  • in suites: forky
  • size: 8,124 kB
  • sloc: ansic: 38,893; xml: 27,665; ruby: 27,510; java: 15,398; cpp: 7,107; yacc: 244; sh: 208; makefile: 149; sed: 14
file content (114 lines) | stat: -rw-r--r-- 4,652 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
# coding: utf-8
# frozen_string_literal: true

require "helper"

class TestSerializationEncoding < Nokogiri::TestCase
  def round_trip_through_file
    Tempfile.create do |io|
      yield io
      io.rewind
      io.read
    end
  end

  describe "serialization encoding" do
    matrix = [
      {
        klass: Nokogiri::XML::Document,
        documents: [
          { encoding: Encoding::UTF_8, path: ADDRESS_XML_FILE },
          { encoding: Encoding::Shift_JIS, path: SHIFT_JIS_XML },
        ],
      },
      {
        klass: Nokogiri::HTML4::Document,
        documents: [
          { encoding: Encoding::UTF_8, path: HTML_FILE },
          { encoding: Encoding::Shift_JIS, path: SHIFT_JIS_HTML },
        ],
      },
    ]
    if Nokogiri.uses_gumbo?
      matrix << {
        klass: Nokogiri::HTML5::Document,
        documents: [
          { encoding: Encoding::UTF_8, path: HTML_FILE },
          { encoding: Encoding::Shift_JIS, path: SHIFT_JIS_HTML },
        ],
      }
    end

    matrix.each do |matrix_entry|
      describe matrix_entry[:klass] do
        let(:klass) { matrix_entry[:klass] }
        matrix_entry[:documents].each do |document|
          describe document[:encoding] do
            it "serializes with the expected encoding" do
              doc = klass.parse(
                File.read(
                  document[:path],
                  encoding: document[:encoding],
                ),
              )

              expected_default_encoding =
                if defined?(Nokogiri::HTML5::Document) && klass == Nokogiri::HTML5::Document
                  Encoding::UTF_8 # FIXME: see #2801, this should be document[:encoding]
                else
                  document[:encoding]
                end

              assert_equal(expected_default_encoding, doc.to_s.encoding)

              assert_equal(expected_default_encoding, doc.to_xml.encoding)
              assert_equal(Encoding::UTF_8, doc.to_xml(encoding: "UTF-8").encoding)
              assert_equal(Encoding::Shift_JIS, doc.to_xml(encoding: "SHIFT_JIS").encoding)
              assert_equal(Encoding::UTF_8, doc.to_xml(encoding: Encoding::UTF_8).encoding)
              assert_equal(Encoding::Shift_JIS, doc.to_xml(encoding: Encoding::Shift_JIS).encoding)

              assert_equal(expected_default_encoding, doc.to_xhtml.encoding)
              assert_equal(Encoding::UTF_8, doc.to_xhtml(encoding: "UTF-8").encoding)
              assert_equal(Encoding::Shift_JIS, doc.to_xhtml(encoding: "SHIFT_JIS").encoding)
              assert_equal(Encoding::UTF_8, doc.to_xhtml(encoding: Encoding::UTF_8).encoding)
              assert_equal(Encoding::Shift_JIS, doc.to_xhtml(encoding: Encoding::Shift_JIS).encoding)

              assert_equal(expected_default_encoding, doc.to_html.encoding)
              assert_equal(Encoding::UTF_8, doc.to_html(encoding: "UTF-8").encoding)
              assert_equal(Encoding::Shift_JIS, doc.to_html(encoding: "SHIFT_JIS").encoding)
              assert_equal(Encoding::UTF_8, doc.to_html(encoding: Encoding::UTF_8).encoding)
              assert_equal(Encoding::Shift_JIS, doc.to_html(encoding: Encoding::Shift_JIS).encoding)

              assert_equal(expected_default_encoding, doc.serialize.encoding)
              assert_equal(Encoding::UTF_8, doc.serialize(encoding: "UTF-8").encoding)
              assert_equal(Encoding::Shift_JIS, doc.serialize(encoding: "SHIFT_JIS").encoding)
              assert_equal(Encoding::UTF_8, doc.serialize(encoding: Encoding::UTF_8).encoding)
              assert_equal(Encoding::Shift_JIS, doc.serialize(encoding: Encoding::Shift_JIS).encoding)

              assert_equal(
                doc.serialize.bytes,
                round_trip_through_file { |io| doc.write_to(io) }.bytes,
              )
              assert_equal(
                doc.serialize(encoding: "UTF-8").bytes,
                round_trip_through_file { |io| doc.write_to(io, encoding: "UTF-8") }.bytes,
              )
              assert_equal(
                doc.serialize(encoding: "SHIFT_JIS").bytes,
                round_trip_through_file { |io| doc.write_to(io, encoding: "SHIFT_JIS") }.bytes,
              )
              assert_equal(
                doc.serialize(encoding: "UTF-8").bytes,
                round_trip_through_file { |io| doc.write_to(io, encoding: Encoding::UTF_8) }.bytes,
              )
              assert_equal(
                doc.serialize(encoding: "Shift_JIS").bytes,
                round_trip_through_file { |io| doc.write_to(io, encoding: Encoding::Shift_JIS) }.bytes,
              )
            end
          end
        end
      end
    end
  end
end