1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187
|
# -*- coding: utf-8 -*-
require 'mechanize/test_case'
# tests for Page encoding and charset and parsing
class TestMechanizePageEncoding < Mechanize::TestCase
MECH_ASCII_ENCODING = 'US-ASCII'
def setup
super
@uri = URI('http://localhost/')
@response_headers = { 'content-type' => 'text/html' }
@body = '<title>hi</title>'
end
def util_page body = @body, headers = @response_headers
Mechanize::Page.new @uri, headers, body && body.force_encoding(Encoding::BINARY), 200, @mech
end
def test_page_charset
charset = Mechanize::Page.charset 'text/html;charset=vAlue'
assert_equal 'vAlue', charset
charset = Mechanize::Page.charset 'text/html;charset=vaLue, text/html'
assert_equal 'vaLue', charset
charset = Mechanize::Page.charset 'text/html ; charset = valUe, text/html'
assert_equal 'valUe', charset
end
def test_page_charset_upcase
charset = Mechanize::Page.charset 'TEXT/HTML;CHARSET=UTF-8'
assert_equal 'UTF-8', charset
end
def test_page_charset_semicolon
charset = Mechanize::Page.charset 'text/html;charset=UTF-8;'
assert_equal 'UTF-8', charset
end
def test_page_charset_no_chaset_token
charset = Mechanize::Page.charset 'text/html'
assert_nil charset
end
def test_page_charset_returns_nil_when_charset_says_none
charset = Mechanize::Page.charset 'text/html;charset=none'
assert_nil charset
end
def test_page_charset_multiple
charset = Mechanize::Page.charset 'text/html;charset=111;charset=222'
assert_equal '111', charset
end
def test_page_response_header_charset
headers = { 'content-type' => 'text/html;charset=HEADER' }
charsets = Mechanize::Page.response_header_charset(headers)
assert_equal ['HEADER'], charsets
end
def test_page_response_header_charset_no_token
headers = {'content-type' => 'text/html'}
charsets = Mechanize::Page.response_header_charset(headers)
assert_equal [], charsets
headers = {'X-My-Header' => 'hello'}
charsets = Mechanize::Page.response_header_charset(headers)
assert_equal [], charsets
end
def test_page_response_header_charset_wrong_header
headers = { 'x-content-type' => 'text/html;charset=bogus' }
charsets = Mechanize::Page.response_header_charset(headers)
assert_equal [], charsets
end
def test_response_header_charset
page = util_page nil, {'content-type' => 'text/html;charset=HEADER'}
assert_equal ['HEADER'], page.response_header_charset
end
def test_page_meta_charset
body = '<meta http-equiv="content-type" content="text/html;charset=META">'
charsets = Mechanize::Page.meta_charset(body)
assert_equal ['META'], charsets
end
def test_page_meta_charset_is_empty_when_no_charset_meta
body = '<meta http-equiv="refresh" content="5; url=index.html">'
charsets = Mechanize::Page.meta_charset(body)
assert_equal [], charsets
end
def test_page_meta_charset_no_content
body = '<meta http-equiv="content-type">'
charsets = Mechanize::Page.meta_charset(body)
assert_empty charsets
end
# Test to fix issue: https://github.com/sparklemotion/mechanize/issues/143
def test_page_meta_charset_handles_whitespace
body = '<meta http-equiv = "Content-Type" content = "text/html; charset=iso-8859-1">'
charsets = Mechanize::Page.meta_charset(body)
assert_equal ["iso-8859-1"], charsets
end
def test_meta_charset
body = '<meta http-equiv="content-type" content="text/html;charset=META">'
page = util_page body
assert_equal ['META'], page.meta_charset
end
def test_detected_encoding
page = util_page
assert_equal MECH_ASCII_ENCODING, page.detected_encoding
end
def test_encodings
response = {'content-type' => 'text/html;charset=HEADER'}
body = '<meta http-equiv="content-type" content="text/html;charset=META">'
@mech.default_encoding = 'DEFAULT'
page = util_page body, response
assert_equal true, page.encodings.include?('HEADER')
assert_equal true, page.encodings.include?('META')
assert_equal true, page.encodings.include?(MECH_ASCII_ENCODING)
assert_equal true, page.encodings.include?('DEFAULT')
end
def test_parser_with_default_encoding
# pre test
assert_equal false, util_page.encodings.include?('Windows-1252')
@mech.default_encoding = 'Windows-1252'
page = util_page
assert_equal true, page.encodings.include?('Windows-1252')
end
def test_parser_force_default_encoding
@mech.default_encoding = 'Windows-1252'
@mech.force_default_encoding = true
page = util_page
assert page.encodings.include? 'Windows-1252'
end
def test_parser_encoding_equals_overwrites_force_default_encoding
@mech.default_encoding = 'Windows-1252'
@mech.force_default_encoding = true
page = util_page
assert_equal 'Windows-1252', page.encoding
page.encoding = 'ISO-8859-2'
assert_equal 'ISO-8859-2', page.encoding
end
def test_parser_encoding_when_searching_elements
skip "Encoding not implemented" unless have_encoding?
body = '<span id="latin1">hi</span>'
page = util_page body, 'content-type' => 'text/html,charset=ISO-8859-1'
result = page.search('#latin1')
assert_equal Encoding::UTF_8, result.text.encoding
end
end
|