mirror of
https://github.com/ruby/ruby.git
synced 2025-08-15 13:39:04 +02:00
136 lines
3.9 KiB
Ruby
136 lines
3.9 KiB
Ruby
# frozen_string_literal: true
|
|
|
|
require_relative "../test_helper"
|
|
|
|
module Prism
|
|
class StringEncodingTest < TestCase
|
|
each_encoding do |encoding, _|
|
|
define_method(:"test_#{encoding.name}") do
|
|
assert_encoding(encoding)
|
|
end
|
|
end
|
|
|
|
def test_coding
|
|
actual = Prism.parse_statement("# coding: utf-8\n'string'").unescaped.encoding
|
|
assert_equal Encoding::UTF_8, actual
|
|
end
|
|
|
|
def test_coding_with_whitespace
|
|
actual = Prism.parse_statement("# coding \t \r \v : \t \v \r ascii-8bit \n'string'").unescaped.encoding
|
|
assert_equal Encoding::ASCII_8BIT, actual
|
|
end
|
|
|
|
def test_emacs_style
|
|
actual = Prism.parse_statement("# -*- coding: utf-8 -*-\n'string'").unescaped.encoding
|
|
assert_equal Encoding::UTF_8, actual
|
|
end
|
|
|
|
def test_utf_8_unix
|
|
actual = Prism.parse_statement("# coding: utf-8-unix\n'string'").unescaped.encoding
|
|
assert_equal Encoding::UTF_8, actual
|
|
end
|
|
|
|
def test_utf_8_dos
|
|
actual = Prism.parse_statement("# coding: utf-8-dos\n'string'").unescaped.encoding
|
|
assert_equal Encoding::UTF_8, actual
|
|
end
|
|
|
|
def test_utf_8_mac
|
|
actual = Prism.parse_statement("# coding: utf-8-mac\n'string'").unescaped.encoding
|
|
assert_equal Encoding::UTF_8, actual
|
|
end
|
|
|
|
def test_utf_8_star
|
|
actual = Prism.parse_statement("# coding: utf-8-*\n'string'").unescaped.encoding
|
|
assert_equal Encoding::UTF_8, actual
|
|
end
|
|
|
|
def test_first_lexed_token
|
|
encoding = Prism.lex("# encoding: ascii-8bit").value[0][0].value.encoding
|
|
assert_equal Encoding::ASCII_8BIT, encoding
|
|
end
|
|
|
|
if !ENV["PRISM_BUILD_MINIMAL"]
|
|
# This test may be a little confusing. Basically when we use our strpbrk,
|
|
# it takes into account the encoding of the file.
|
|
def test_strpbrk_multibyte
|
|
result = Prism.parse(<<~RUBY)
|
|
# encoding: Shift_JIS
|
|
%w[\x81\x5c]
|
|
RUBY
|
|
|
|
assert(result.errors.empty?)
|
|
assert_equal(
|
|
(+"\x81\x5c").force_encoding(Encoding::Shift_JIS),
|
|
result.statement.elements.first.unescaped
|
|
)
|
|
end
|
|
|
|
def test_slice_encoding
|
|
slice = Prism.parse("# encoding: Shift_JIS\nア").value.slice
|
|
assert_equal (+"ア").force_encoding(Encoding::SHIFT_JIS), slice
|
|
assert_equal Encoding::SHIFT_JIS, slice.encoding
|
|
end
|
|
|
|
def test_multibyte_escapes
|
|
[
|
|
["'", "'"],
|
|
["\"", "\""],
|
|
["`", "`"],
|
|
["/", "/"],
|
|
["<<'HERE'\n", "\nHERE"],
|
|
["<<-HERE\n", "\nHERE"]
|
|
].each do |opening, closing|
|
|
assert Prism.parse_success?("# encoding: shift_jis\n'\\\x82\xA0'\n")
|
|
end
|
|
end
|
|
end
|
|
|
|
private
|
|
|
|
def assert_encoding(encoding)
|
|
escapes = ["\\x00", "\\x7F", "\\x80", "\\xFF", "\\u{00}", "\\u{7F}", "\\u{80}", "\\M-\\C-?"]
|
|
escapes = escapes.concat(escapes.product(escapes).map(&:join))
|
|
|
|
escapes.each do |escaped|
|
|
source = "# encoding: #{encoding.name}\n\"#{escaped}\""
|
|
|
|
expected =
|
|
begin
|
|
eval(source).encoding
|
|
rescue SyntaxError => error
|
|
if error.message.include?("UTF-8 mixed within")
|
|
error.message[/UTF-8 mixed within .+? source/]
|
|
else
|
|
raise
|
|
end
|
|
end
|
|
|
|
actual =
|
|
Prism.parse(source).then do |result|
|
|
if result.success?
|
|
string = result.statement
|
|
|
|
if string.forced_utf8_encoding?
|
|
Encoding::UTF_8
|
|
elsif string.forced_binary_encoding?
|
|
Encoding::ASCII_8BIT
|
|
else
|
|
encoding
|
|
end
|
|
else
|
|
error = result.errors.first
|
|
|
|
if error.message.include?("mixed")
|
|
error.message
|
|
else
|
|
raise error.message
|
|
end
|
|
end
|
|
end
|
|
|
|
assert_equal expected, actual
|
|
end
|
|
end
|
|
end
|
|
end
|