ruby/test/prism/encoding/string_encoding_test.rb
2024-05-30 15:18:20 -04:00

136 lines
3.9 KiB
Ruby

# frozen_string_literal: true
require_relative "../test_helper"
module Prism
class StringEncodingTest < TestCase
each_encoding do |encoding, _|
define_method(:"test_#{encoding.name}") do
assert_encoding(encoding)
end
end
def test_coding
actual = Prism.parse_statement("# coding: utf-8\n'string'").unescaped.encoding
assert_equal Encoding::UTF_8, actual
end
def test_coding_with_whitespace
actual = Prism.parse_statement("# coding \t \r \v : \t \v \r ascii-8bit \n'string'").unescaped.encoding
assert_equal Encoding::ASCII_8BIT, actual
end
def test_emacs_style
actual = Prism.parse_statement("# -*- coding: utf-8 -*-\n'string'").unescaped.encoding
assert_equal Encoding::UTF_8, actual
end
def test_utf_8_unix
actual = Prism.parse_statement("# coding: utf-8-unix\n'string'").unescaped.encoding
assert_equal Encoding::UTF_8, actual
end
def test_utf_8_dos
actual = Prism.parse_statement("# coding: utf-8-dos\n'string'").unescaped.encoding
assert_equal Encoding::UTF_8, actual
end
def test_utf_8_mac
actual = Prism.parse_statement("# coding: utf-8-mac\n'string'").unescaped.encoding
assert_equal Encoding::UTF_8, actual
end
def test_utf_8_star
actual = Prism.parse_statement("# coding: utf-8-*\n'string'").unescaped.encoding
assert_equal Encoding::UTF_8, actual
end
def test_first_lexed_token
encoding = Prism.lex("# encoding: ascii-8bit").value[0][0].value.encoding
assert_equal Encoding::ASCII_8BIT, encoding
end
if !ENV["PRISM_BUILD_MINIMAL"]
# This test may be a little confusing. Basically when we use our strpbrk,
# it takes into account the encoding of the file.
def test_strpbrk_multibyte
result = Prism.parse(<<~RUBY)
# encoding: Shift_JIS
%w[\x81\x5c]
RUBY
assert(result.errors.empty?)
assert_equal(
(+"\x81\x5c").force_encoding(Encoding::Shift_JIS),
result.statement.elements.first.unescaped
)
end
def test_slice_encoding
slice = Prism.parse("# encoding: Shift_JIS\n").value.slice
assert_equal (+"").force_encoding(Encoding::SHIFT_JIS), slice
assert_equal Encoding::SHIFT_JIS, slice.encoding
end
def test_multibyte_escapes
[
["'", "'"],
["\"", "\""],
["`", "`"],
["/", "/"],
["<<'HERE'\n", "\nHERE"],
["<<-HERE\n", "\nHERE"]
].each do |opening, closing|
assert Prism.parse_success?("# encoding: shift_jis\n'\\\x82\xA0'\n")
end
end
end
private
def assert_encoding(encoding)
escapes = ["\\x00", "\\x7F", "\\x80", "\\xFF", "\\u{00}", "\\u{7F}", "\\u{80}", "\\M-\\C-?"]
escapes = escapes.concat(escapes.product(escapes).map(&:join))
escapes.each do |escaped|
source = "# encoding: #{encoding.name}\n\"#{escaped}\""
expected =
begin
eval(source).encoding
rescue SyntaxError => error
if error.message.include?("UTF-8 mixed within")
error.message[/UTF-8 mixed within .+? source/]
else
raise
end
end
actual =
Prism.parse(source).then do |result|
if result.success?
string = result.statement
if string.forced_utf8_encoding?
Encoding::UTF_8
elsif string.forced_binary_encoding?
Encoding::ASCII_8BIT
else
encoding
end
else
error = result.errors.first
if error.message.include?("mixed")
error.message
else
raise error.message
end
end
end
assert_equal expected, actual
end
end
end
end