1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95
|
# -*- coding: utf-8 -*-
require "helper"
module Nokogiri
module HTML
class TestNodeEncoding < Nokogiri::TestCase
def setup
super
@html = Nokogiri::HTML(File.open(NICH_FILE, "rb"))
end
def test_get_attribute
node = @html.css('a').first
assert_equal 'UTF-8', node['href'].encoding.name
end
def test_text_encoding_is_utf_8
assert_equal 'UTF-8', @html.text.encoding.name
end
def test_serialize_encoding_html
if Nokogiri.uses_libxml? && !Nokogiri::VersionInfo.instance.libxml2_has_iconv?
skip "libxml2 without iconv does not pass this test"
end
assert_equal @html.encoding.downcase,
@html.serialize.encoding.name.downcase
@doc = Nokogiri::HTML(@html.serialize)
assert_equal @html.serialize, @doc.serialize
end
def test_default_encoding
doc = Nokogiri::HTML(nil)
assert_nil doc.encoding
assert_equal 'UTF-8', doc.serialize.encoding.name
end
def test_encode_special_chars
foo = @html.css('a').first.encode_special_chars('foo')
assert_equal 'UTF-8', foo.encoding.name
end
def test_content
node = @html.css('a').first
assert_equal 'UTF-8', node.content.encoding.name
end
def test_name
node = @html.css('a').first
assert_equal 'UTF-8', node.name.encoding.name
end
def test_path
node = @html.css('a').first
assert_equal 'UTF-8', node.path.encoding.name
end
def test_inner_html
doc = Nokogiri::HTML File.open(SHIFT_JIS_HTML, 'rb')
hello = "こんにちは"
contents = doc.at('h2').inner_html
assert_equal doc.encoding, contents.encoding.name
assert_match hello.encode('Shift_JIS'), contents
contents = doc.at('h2').inner_html(:encoding => 'UTF-8')
assert_match hello, contents
doc.encoding = 'UTF-8'
contents = doc.at('h2').inner_html
assert_match hello, contents
end
def test_encoding_GH_1113
doc = Nokogiri::HTML::Document.new
hex = '<p>🍀</p>'
decimal = '<p>🍀</p>'
encoded = '<p>🍀</p>'
doc.encoding = 'UTF-8'
[hex, decimal, encoded].each do |document|
assert_equal encoded, doc.fragment(document).to_s
end
doc.encoding = 'US-ASCII'
expected = Nokogiri.jruby? ? hex : decimal
[hex, decimal].each do |document|
assert_equal expected, doc.fragment(document).to_s
end
end
end
end
end
|