[ruby/prism] Decode %r like % strings

%r regular expressions need to be decoded like strings.  This commit
fixes %r decoding so it works like strings.

85bfd9c0cd
This commit is contained in:
Aaron Patterson 2024-12-11 15:54:56 -08:00 committed by git
parent 0a1fa99482
commit 9181e8bc87
2 changed files with 62 additions and 18 deletions

View file

@ -12110,9 +12110,28 @@ parser_lex(pm_parser_t *parser) {
pm_regexp_token_buffer_t token_buffer = { 0 }; pm_regexp_token_buffer_t token_buffer = { 0 };
while (breakpoint != NULL) { while (breakpoint != NULL) {
uint8_t term = lex_mode->as.regexp.terminator;
bool is_terminator = (*breakpoint == term);
// If the terminator is newline, we need to consider \r\n _also_ a newline
// For example: `%\nfoo\r\n`
// The string should be "foo", not "foo\r"
if (*breakpoint == '\r' && peek_at(parser, breakpoint + 1) == '\n') {
if (term == '\n') {
is_terminator = true;
}
// If the terminator is a CR, but we see a CRLF, we need to
// treat the CRLF as a newline, meaning this is _not_ the
// terminator
if (term == '\r') {
is_terminator = false;
}
}
// If we hit the terminator, we need to determine what kind of // If we hit the terminator, we need to determine what kind of
// token to return. // token to return.
if (*breakpoint == lex_mode->as.regexp.terminator) { if (is_terminator) {
if (lex_mode->as.regexp.nesting > 0) { if (lex_mode->as.regexp.nesting > 0) {
parser->current.end = breakpoint + 1; parser->current.end = breakpoint + 1;
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false); breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
@ -12342,20 +12361,21 @@ parser_lex(pm_parser_t *parser) {
continue; continue;
} }
bool is_terminator = (*breakpoint == lex_mode->as.string.terminator); uint8_t term = lex_mode->as.string.terminator;
bool is_terminator = (*breakpoint == term);
// If the terminator is newline, we need to consider \r\n _also_ a newline // If the terminator is newline, we need to consider \r\n _also_ a newline
// For example: `%\nfoo\r\n` // For example: `%r\nfoo\r\n`
// The string should be "foo", not "foo\r" // The string should be /foo/, not /foo\r/
if (*breakpoint == '\r' && peek_at(parser, breakpoint + 1) == '\n') { if (*breakpoint == '\r' && peek_at(parser, breakpoint + 1) == '\n') {
if (lex_mode->as.string.terminator == '\n') { if (term == '\n') {
is_terminator = true; is_terminator = true;
} }
// If the terminator is a CR, but we see a CRLF, we need to // If the terminator is a CR, but we see a CRLF, we need to
// treat the CRLF as a newline, meaning this is _not_ the // treat the CRLF as a newline, meaning this is _not_ the
// terminator // terminator
if (lex_mode->as.string.terminator == '\r') { if (term == '\r') {
is_terminator = false; is_terminator = false;
} }
} }

View file

@ -3,56 +3,80 @@
require_relative "test_helper" require_relative "test_helper"
module Prism module Prism
class PercentDelimiterStringTest < TestCase module PercentDelimiterTests
def test_newline_terminator_with_lf_crlf def test_newline_terminator_with_lf_crlf
str = "%\n123456\r\n" str = l "\n123456\r\n"
assert_parse "123456", str assert_parse "123456", str
end end
def test_newline_terminator_with_lf_crlf_with_extra_cr def test_newline_terminator_with_lf_crlf_with_extra_cr
str = "%\n123456\r\r\n" str = l "\n123456\r\r\n"
assert_parse "123456\r", str assert_parse "123456\r", str
end end
def test_newline_terminator_with_crlf_pair def test_newline_terminator_with_crlf_pair
str = "%\r\n123456\r\n" str = l "\r\n123456\r\n"
assert_parse "123456", str assert_parse "123456", str
end end
def test_newline_terminator_with_crlf_crlf_with_extra_cr def test_newline_terminator_with_crlf_crlf_with_extra_cr
str = "%\r\n123456\r\r\n" str = l "\r\n123456\r\r\n"
assert_parse "123456\r", str assert_parse "123456\r", str
end end
def test_newline_terminator_with_cr_cr def test_newline_terminator_with_cr_cr
str = "%\r123456\r;\n" str = l "\r123456\r;\n"
assert_parse "123456", str assert_parse "123456", str
end end
def test_newline_terminator_with_crlf_lf def test_newline_terminator_with_crlf_lf
str = "%\r\n123456\n;\n" str = l "\r\n123456\n;\n"
assert_parse "123456", str assert_parse "123456", str
end end
def test_cr_crlf def test_cr_crlf
str = "%\r1\r\n \r" str = l "\r1\r\n \r"
assert_parse "1\n ", str assert_parse "1\n ", str
end end
def test_lf_crlf def test_lf_crlf
str = "%\n1\r\n \n" str = l "\n1\r\n \n"
assert_parse "1", str assert_parse "1", str
end end
def test_lf_lf def test_lf_lf
str = "%\n1\n \n" str = l "\n1\n \n"
assert_parse "1", str assert_parse "1", str
end end
def assert_parse(expected, str) def assert_parse(expected, str)
assert_equal expected, find_node(str).unescaped
end
end
class PercentDelimiterStringTest < TestCase
include PercentDelimiterTests
def find_node(str)
tree = Prism.parse str tree = Prism.parse str
node = tree.value.breadth_first_search { |x| Prism::StringNode === x } tree.value.breadth_first_search { |x| Prism::StringNode === x }
assert_equal expected, node.unescaped end
def l(str)
"%" + str
end
end
class PercentDelimiterRegexpTest < TestCase
include PercentDelimiterTests
def l(str)
"%r" + str
end
def find_node(str)
tree = Prism.parse str
tree.value.breadth_first_search { |x| Prism::RegularExpressionNode === x }
end end
end end
end end