ruby/test/prism/encoding/encodings_test.rb
2024-05-30 15:18:20 -04:00

101 lines
3.4 KiB
Ruby

# frozen_string_literal: true
return if RUBY_ENGINE != "ruby"
require_relative "../test_helper"
module Prism
class EncodingsTest < TestCase
class ConstantContext < BasicObject
def self.const_missing(const)
const
end
end
class IdentifierContext < BasicObject
def method_missing(name, *)
name
end
end
# These test that we're correctly parsing codepoints for each alias of each
# encoding that prism supports.
each_encoding do |encoding, range|
(encoding.names - %w[external internal filesystem locale]).each do |name|
define_method(:"test_encoding_#{name}") do
assert_encoding(encoding, name, range)
end
end
end
private
def assert_encoding_constant(name, character)
source = "# encoding: #{name}\n#{character}"
expected = ConstantContext.new.instance_eval(source)
result = Prism.parse(source)
assert result.success?
actual = result.value.statements.body.last
assert_kind_of ConstantReadNode, actual
assert_equal expected, actual.name
end
def assert_encoding_identifier(name, character)
source = "# encoding: #{name}\n#{character}"
expected = IdentifierContext.new.instance_eval(source)
result = Prism.parse(source)
assert result.success?
actual = result.value.statements.body.last
assert_kind_of CallNode, actual
assert_equal expected, actual.name
end
# Check that we can properly parse every codepoint in the given encoding.
def assert_encoding(encoding, name, range)
# I'm not entirely sure, but I believe these codepoints are incorrect in
# their parsing in CRuby. They all report as matching `[[:lower:]]` but
# then they are parsed as constants. This is because CRuby determines if
# an identifier is a constant or not by case folding it down to lowercase
# and checking if there is a difference. And even though they report
# themselves as lowercase, their case fold is different. I have reported
# this bug upstream.
case encoding
when Encoding::UTF_8, Encoding::UTF_8_MAC, Encoding::UTF8_DoCoMo, Encoding::UTF8_KDDI, Encoding::UTF8_SoftBank, Encoding::CESU_8
range = range.to_a - [
0x01c5, 0x01c8, 0x01cb, 0x01f2, 0x1f88, 0x1f89, 0x1f8a, 0x1f8b,
0x1f8c, 0x1f8d, 0x1f8e, 0x1f8f, 0x1f98, 0x1f99, 0x1f9a, 0x1f9b,
0x1f9c, 0x1f9d, 0x1f9e, 0x1f9f, 0x1fa8, 0x1fa9, 0x1faa, 0x1fab,
0x1fac, 0x1fad, 0x1fae, 0x1faf, 0x1fbc, 0x1fcc, 0x1ffc,
]
when Encoding::Windows_1253
range = range.to_a - [0xb5]
end
range.each do |codepoint|
character = codepoint.chr(encoding)
if character.match?(/[[:alpha:]]/)
if character.match?(/[[:upper:]]/)
assert_encoding_constant(name, character)
else
assert_encoding_identifier(name, character)
end
elsif character.match?(/[[:alnum:]]/)
assert_encoding_identifier(name, "_#{character}")
else
next if ["/", "{"].include?(character)
source = "# encoding: #{name}\n/(?##{character})/\n"
assert Prism.parse_success?(source), "Expected #{source.inspect} to parse successfully."
end
rescue RangeError
source = "# encoding: #{name}\n\\x#{codepoint.to_s(16)}"
assert Prism.parse_failure?(source)
end
end
end
end