1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97
|
# -*- coding: utf-8 -*-
# frozen_string_literal: true
require "helper"
module Nokogiri
module HTML
class TestNodeEncoding < Nokogiri::TestCase
def setup
super
@html = Nokogiri::HTML(File.open(NICH_FILE, "rb"))
end
def test_get_attribute
node = @html.css("a").first
assert_equal("UTF-8", node["href"].encoding.name)
end
def test_text_encoding_is_utf_8
assert_equal("UTF-8", @html.text.encoding.name)
end
def test_serialize_encoding_html
if Nokogiri.uses_libxml? && !Nokogiri::VersionInfo.instance.libxml2_has_iconv?
skip("libxml2 without iconv does not pass this test")
end
assert_equal(@html.encoding.downcase,
@html.serialize.encoding.name.downcase)
@doc = Nokogiri::HTML(@html.serialize)
assert_equal(@html.serialize, @doc.serialize)
end
def test_default_encoding
doc = Nokogiri::HTML(nil)
assert_nil(doc.encoding)
assert_equal("UTF-8", doc.serialize.encoding.name)
end
def test_encode_special_chars
foo = @html.css("a").first.encode_special_chars("foo")
assert_equal("UTF-8", foo.encoding.name)
end
def test_content
node = @html.css("a").first
assert_equal("UTF-8", node.content.encoding.name)
end
def test_name
node = @html.css("a").first
assert_equal("UTF-8", node.name.encoding.name)
end
def test_path
node = @html.css("a").first
assert_equal("UTF-8", node.path.encoding.name)
end
def test_inner_html
doc = Nokogiri::HTML(File.open(SHIFT_JIS_HTML, "rb"))
hello = "こんにちは"
contents = doc.at("h2").inner_html
assert_equal(doc.encoding, contents.encoding.name)
assert_match(hello.encode("Shift_JIS"), contents)
contents = doc.at("h2").inner_html(encoding: "UTF-8")
assert_match(hello, contents)
doc.encoding = "UTF-8"
contents = doc.at("h2").inner_html
assert_match(hello, contents)
end
def test_encoding_GH_1113
doc = Nokogiri::HTML::Document.new
hex = "<p>🍀</p>"
decimal = "<p>🍀</p>"
encoded = "<p>🍀</p>"
doc.encoding = "UTF-8"
[hex, decimal, encoded].each do |document|
assert_equal(encoded, doc.fragment(document).to_s)
end
doc.encoding = "US-ASCII"
expected = Nokogiri.jruby? ? hex : decimal
[hex, decimal].each do |document|
assert_equal(expected, doc.fragment(document).to_s)
end
end
end
end
end
|