File: tc_encoding.rb

package info (click to toggle)
ruby-libxml 2.9.0-2
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 1,964 kB
  • ctags: 2,319
  • sloc: xml: 8,711; ansic: 8,472; ruby: 7,563; makefile: 3
file content (132 lines) | stat: -rw-r--r-- 4,840 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
# encoding: UTF-8

require './test_helper'

# Code  UTF8        Latin1      Hex
# m      109          109        6D
# ö      195 182      246        C3 B6 / F6
# t      116          116        74
# l      108          108        6C
# e      101          101        65
# y      121          121        79
# _       95           95        5F
# c       99           99        63
# r      114          114        72
# ü      195 188      252        C3 BC / FC
# e      101          101        65

# See:
#  http://en.wikipedia.org/wiki/ISO/IEC_8859-1
#  http://en.wikipedia.org/wiki/List_of_Unicode_characters

class TestEncoding < Minitest::Test

  def setup
    @backup_internal_encoding = Encoding.default_internal
    Encoding.default_internal = nil
  end

  def teardown
    Encoding.default_internal = @backup_internal_encoding
  end

  def file_for_encoding(encoding)
    file_name = "model/bands.#{encoding.name.downcase}.xml"
    File.join(File.dirname(__FILE__), file_name)
  end

  def load_encoding(encoding)
    @encoding = encoding
    file = file_for_encoding(encoding)

    # Strip spaces to make testing easier
    XML.default_keep_blanks = false
    @doc = XML::Document.file(file)
    XML.default_keep_blanks = true
  end

  def test_encoding
    doc = XML::Document.new
    assert_equal(XML::Encoding::NONE, doc.encoding)
    assert_equal(Encoding::ASCII_8BIT, doc.rb_encoding) if defined?(Encoding)

    file = File.expand_path(File.join(File.dirname(__FILE__), 'model/bands.xml'))
    doc = XML::Document.file(file)
    assert_equal(XML::Encoding::UTF_8, doc.encoding)
    assert_equal(Encoding::UTF_8, doc.rb_encoding) if defined?(Encoding)

    doc.encoding = XML::Encoding::ISO_8859_1
    assert_equal(XML::Encoding::ISO_8859_1, doc.encoding)
    assert_equal(Encoding::ISO8859_1, doc.rb_encoding) if defined?(Encoding)
  end

  def test_no_internal_encoding_iso_8859_1
    Encoding.default_internal = nil
    load_encoding(Encoding::ISO_8859_1)
    node = @doc.root.children.first

    name = node.name
    assert_equal(Encoding::UTF_8, name.encoding)
    assert_equal("m\u00F6tley_cr\u00FCe", name)
    assert_equal("109 195 182 116 108 101 121 95 99 114 195 188 101",
                 name.bytes.to_a.join(" "))
    assert_equal("M\u00F6tley Cr\u00FCe is an American heavy metal band formed in Los Angeles, California in 1981.",
                 node.content)

    name = name.encode(Encoding::ISO_8859_1)
    assert_equal(Encoding::ISO_8859_1, name.encoding)
    assert_equal("m\xF6tley_cr\xFCe".force_encoding(Encoding::ISO_8859_1), name)
    assert_equal("109 246 116 108 101 121 95 99 114 252 101",
                 name.bytes.to_a.join(" "))
    assert_equal("M\xF6tley Cr\xFCe is an American heavy metal band formed in Los Angeles, California in 1981.".force_encoding(Encoding::ISO_8859_1),
                node.content.encode(Encoding::ISO_8859_1))
  end

  def test_internal_encoding_iso_8859_1
    Encoding.default_internal = Encoding::ISO_8859_1
    load_encoding(Encoding::ISO_8859_1)
    node = @doc.root.children.first

    name = node.name
    assert_equal(Encoding::ISO_8859_1, name.encoding)
    assert_equal("109 246 116 108 101 121 95 99 114 252 101",
                 name.bytes.to_a.join(" "))
    assert_equal("m\xF6tley_cr\xFCe".force_encoding(Encoding::ISO_8859_1), name)
    assert_equal("109 246 116 108 101 121 95 99 114 252 101",
                 name.bytes.to_a.join(" "))
    assert_equal("M\xF6tley Cr\xFCe is an American heavy metal band formed in Los Angeles, California in 1981.".force_encoding(Encoding::ISO_8859_1),
                node.content.encode(Encoding::ISO_8859_1))
  end

  def test_no_internal_encoding_utf_8
    Encoding.default_internal = nil
    load_encoding(Encoding::UTF_8)
    node = @doc.root.children.first

    name = node.name
    assert_equal(@encoding, name.encoding)
    assert_equal("109 195 182 116 108 101 121 95 99 114 195 188 101",
                 name.bytes.to_a.join(" "))

    name = name.encode(Encoding::ISO_8859_1)
    assert_equal(Encoding::ISO_8859_1, name.encoding)
    assert_equal("109 246 116 108 101 121 95 99 114 252 101",
                 name.bytes.to_a.join(" "))
  end

  def test_internal_encoding_utf_8
    Encoding.default_internal = Encoding::ISO_8859_1
    load_encoding(Encoding::UTF_8)
    node = @doc.root.children.first

    name = node.name
    assert_equal(Encoding::ISO_8859_1, name.encoding)
    assert_equal("109 246 116 108 101 121 95 99 114 252 101",
                 name.bytes.to_a.join(" "))
  end

  def test_encoding_conversions
    assert_equal("UTF-8", XML::Encoding.to_s(XML::Encoding::UTF_8))
    assert_equal(XML::Encoding::UTF_8, XML::Encoding.from_s("UTF-8"))
  end
end