From d6424453dbb8cd2794e22a7d3ba400732e709ec2 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 11 Oct 2023 11:12:41 -0400 Subject: [PATCH] [ruby/prism] Enable remaining heredoc unescape tests https://github.com/ruby/prism/commit/c7ea4941c5 --- prism/prism.c | 27 ++++++++++--------- ...ser_slash_slash_n_escaping_in_literals.txt | 2 +- test/prism/unescape_test.rb | 8 ++++++ 3 files changed, 23 insertions(+), 14 deletions(-) diff --git a/prism/prism.c b/prism/prism.c index c9cfe65d76..0dbe408659 100644 --- a/prism/prism.c +++ b/prism/prism.c @@ -8482,6 +8482,7 @@ parser_lex(pm_parser_t *parser) { const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end); pm_token_buffer_t token_buffer = { 0 }; + bool was_escaped_newline = false; while (breakpoint != NULL) { switch (*breakpoint) { @@ -8509,6 +8510,7 @@ parser_lex(pm_parser_t *parser) { // content. Then, the next time a token is lexed, it will match // again and return the end of the heredoc. if ( + !was_escaped_newline && (start + ident_length <= parser->end) && (memcmp(start, ident_start, ident_length) == 0) ) { @@ -8550,6 +8552,9 @@ parser_lex(pm_parser_t *parser) { case '\r': parser->current.end++; if (peek(parser) != '\n') { + if (quote == PM_HEREDOC_QUOTE_SINGLE) { + pm_token_buffer_push(&token_buffer, '\\'); + } pm_token_buffer_push(&token_buffer, '\r'); break; } @@ -8559,25 +8564,19 @@ parser_lex(pm_parser_t *parser) { // to leave the escaped newline in place so that // it can be removed later when we dedent the // heredoc. - if (lex_mode->as.heredoc.indent == PM_HEREDOC_INDENT_TILDE) { + if (quote == PM_HEREDOC_QUOTE_SINGLE || lex_mode->as.heredoc.indent == PM_HEREDOC_INDENT_TILDE) { pm_token_buffer_push(&token_buffer, '\\'); pm_token_buffer_push(&token_buffer, '\n'); } - if (parser->heredoc_end) { - // ... if we are on the same line as a heredoc, - // flush the heredoc and continue parsing after - // heredoc_end. - parser_flush_heredoc_end(parser); - pm_token_buffer_copy(parser, &token_buffer); - LEX(PM_TOKEN_STRING_CONTENT); - } else { - // ... else track the newline. - pm_newline_list_append(&parser->newline_list, parser->current.end); + token_buffer.cursor = parser->current.end + 1; + breakpoint = parser->current.end; + + if (quote != PM_HEREDOC_QUOTE_SINGLE) { + was_escaped_newline = true; } - parser->current.end++; - break; + continue; default: if (quote == PM_HEREDOC_QUOTE_SINGLE) { pm_token_buffer_push(&token_buffer, '\\'); @@ -8616,6 +8615,8 @@ parser_lex(pm_parser_t *parser) { default: assert(false && "unreachable"); } + + was_escaped_newline = false; } // If we've hit the end of the string, then this is an unterminated diff --git a/test/prism/snapshots/whitequark/parser_slash_slash_n_escaping_in_literals.txt b/test/prism/snapshots/whitequark/parser_slash_slash_n_escaping_in_literals.txt index ad38f53cee..20496c3cfc 100644 --- a/test/prism/snapshots/whitequark/parser_slash_slash_n_escaping_in_literals.txt +++ b/test/prism/snapshots/whitequark/parser_slash_slash_n_escaping_in_literals.txt @@ -114,7 +114,7 @@ │ ├── opening_loc: (51,0)-(51,9) = "<<-'HERE'" │ ├── content_loc: (52,0)-(53,0) = "a\\\nb\n" │ ├── closing_loc: (54,0)-(54,0) = "HERE\n" - │ └── unescaped: "ab\n" + │ └── unescaped: "a\\\nb\n" ├── @ XStringNode (location: (56,0)-(56,9)) │ ├── opening_loc: (56,0)-(56,9) = "<<-`HERE`" │ ├── content_loc: (57,0)-(58,0) = "a\\\nb\n" diff --git a/test/prism/unescape_test.rb b/test/prism/unescape_test.rb index d431fe9afd..de4d1bf1b9 100644 --- a/test/prism/unescape_test.rb +++ b/test/prism/unescape_test.rb @@ -102,6 +102,14 @@ module Prism [Context::String.new("%[", "]"), escapes], [Context::String.new("`", "`"), escapes], [Context::String.new("%x[", "]"), escapes], + [Context::String.new("<