1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136
|
# frozen_string_literal: true
require_relative "../test_helper"
module Prism
class StringEncodingTest < TestCase
each_encoding do |encoding, _|
define_method(:"test_#{encoding.name}") do
assert_encoding(encoding)
end
end
def test_coding
actual = Prism.parse_statement("# coding: utf-8\n'string'").unescaped.encoding
assert_equal Encoding::UTF_8, actual
end
def test_coding_with_whitespace
actual = Prism.parse_statement("# coding \t \r \v : \t \v \r ascii-8bit \n'string'").unescaped.encoding
assert_equal Encoding::ASCII_8BIT, actual
end
def test_emacs_style
actual = Prism.parse_statement("# -*- coding: utf-8 -*-\n'string'").unescaped.encoding
assert_equal Encoding::UTF_8, actual
end
def test_utf_8_unix
actual = Prism.parse_statement("# coding: utf-8-unix\n'string'").unescaped.encoding
assert_equal Encoding::UTF_8, actual
end
def test_utf_8_dos
actual = Prism.parse_statement("# coding: utf-8-dos\n'string'").unescaped.encoding
assert_equal Encoding::UTF_8, actual
end
def test_utf_8_mac
actual = Prism.parse_statement("# coding: utf-8-mac\n'string'").unescaped.encoding
assert_equal Encoding::UTF_8, actual
end
def test_utf_8_star
actual = Prism.parse_statement("# coding: utf-8-*\n'string'").unescaped.encoding
assert_equal Encoding::UTF_8, actual
end
def test_first_lexed_token
encoding = Prism.lex("# encoding: ascii-8bit").value[0][0].value.encoding
assert_equal Encoding::ASCII_8BIT, encoding
end
if !ENV["PRISM_BUILD_MINIMAL"]
# This test may be a little confusing. Basically when we use our strpbrk,
# it takes into account the encoding of the file.
def test_strpbrk_multibyte
result = Prism.parse(<<~RUBY)
# encoding: Shift_JIS
%w[\x81\x5c]
RUBY
assert(result.errors.empty?)
assert_equal(
(+"\x81\x5c").force_encoding(Encoding::Shift_JIS),
result.statement.elements.first.unescaped
)
end
def test_slice_encoding
slice = Prism.parse("# encoding: Shift_JIS\nア").value.slice
assert_equal (+"ア").force_encoding(Encoding::SHIFT_JIS), slice
assert_equal Encoding::SHIFT_JIS, slice.encoding
end
def test_multibyte_escapes
[
["'", "'"],
["\"", "\""],
["`", "`"],
["/", "/"],
["<<'HERE'\n", "\nHERE"],
["<<-HERE\n", "\nHERE"]
].each do |opening, closing|
assert Prism.parse_success?("# encoding: shift_jis\n'\\\x82\xA0'\n")
end
end
end
private
def assert_encoding(encoding)
escapes = ["\\x00", "\\x7F", "\\x80", "\\xFF", "\\u{00}", "\\u{7F}", "\\u{80}", "\\M-\\C-?"]
escapes = escapes.concat(escapes.product(escapes).map(&:join))
escapes.each do |escaped|
source = "# encoding: #{encoding.name}\n\"#{escaped}\""
expected =
begin
eval(source).encoding
rescue SyntaxError => error
if error.message.include?("UTF-8 mixed within")
error.message[/UTF-8 mixed within .+? source/]
else
raise
end
end
actual =
Prism.parse(source).then do |result|
if result.success?
string = result.statement
if string.forced_utf8_encoding?
Encoding::UTF_8
elsif string.forced_binary_encoding?
Encoding::ASCII_8BIT
else
encoding
end
else
error = result.errors.first
if error.message.include?("mixed")
error.message
else
raise error.message
end
end
end
assert_equal expected, actual
end
end
end
end
|