From 566f9463c2be0010815c6521b32c5067a6fff699 Mon Sep 17 00:00:00 2001 From: Earlopain <14981592+Earlopain@users.noreply.github.com> Date: Sun, 12 Jan 2025 13:49:44 +0100 Subject: [PATCH] [ruby/prism] Fix parser translator tSPACE tokens for percent arrays Tests worked around this but the incompatibility is not hard to fix. This fixes 17 token incompatibilies in tests here that were previously passing https://github.com/ruby/prism/commit/101962526d --- lib/prism/translation/parser/lexer.rb | 10 +++++++++- test/prism/ruby/parser_test.rb | 7 ------- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/lib/prism/translation/parser/lexer.rb b/lib/prism/translation/parser/lexer.rb index fd1d0243ca..1808473cad 100644 --- a/lib/prism/translation/parser/lexer.rb +++ b/lib/prism/translation/parser/lexer.rb @@ -457,7 +457,15 @@ module Prism location = range(token.location.start_offset, token.location.start_offset + 1) end - quote_stack.pop + if percent_array?(quote_stack.pop) + prev_token = lexed[index - 2][0] if index - 2 >= 0 + empty = %i[PERCENT_LOWER_I PERCENT_LOWER_W PERCENT_UPPER_I PERCENT_UPPER_W].include?(prev_token&.type) + ends_with_whitespace = prev_token&.type == :WORDS_SEP + # parser always emits a space token after content in a percent array, even if no actual whitespace is present. + if !empty && !ends_with_whitespace + tokens << [:tSPACE, [nil, range(token.location.start_offset, token.location.start_offset)]] + end + end when :tSYMBEG if (next_token = lexed[index][0]) && next_token.type != :STRING_CONTENT && next_token.type != :EMBEXPR_BEGIN && next_token.type != :EMBVAR && next_token.type != :STRING_END next_location = token.location.join(next_token.location) diff --git a/test/prism/ruby/parser_test.rb b/test/prism/ruby/parser_test.rb index 2e9211e70d..1542bc6562 100644 --- a/test/prism/ruby/parser_test.rb +++ b/test/prism/ruby/parser_test.rb @@ -219,13 +219,6 @@ module Prism expected_index += 1 actual_index += 1 - # The parser gem always has a space before a string end in list - # literals, but we don't. So we'll skip over the space. - if expected_token[0] == :tSPACE && actual_token[0] == :tSTRING_END - expected_index += 1 - next - end - # There are a lot of tokens that have very specific meaning according # to the context of the parser. We don't expose that information in # prism, so we need to normalize these tokens a bit.