[ruby/prism] Enable remaining heredoc unescape tests

c7ea4941c5
2025-08-24 13:34:17 +02:00 · 2023-10-11 11:12:41 -04:00 · 2023-10-11 11:12:41 -04:00 · d6424453db
commit d6424453db
parent 24768d8a57
3 changed files with 23 additions and 14 deletions
--- a/prism/prism.c
+++ b/prism/prism.c
@ -8482,6 +8482,7 @@ parser_lex(pm_parser_t *parser) {

            const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
            pm_token_buffer_t token_buffer = { 0 };
+            bool was_escaped_newline = false;

            while (breakpoint != NULL) {
                switch (*breakpoint) {
@ -8509,6 +8510,7 @@ parser_lex(pm_parser_t *parser) {
                        // content. Then, the next time a token is lexed, it will match
                        // again and return the end of the heredoc.
                        if (
+                            !was_escaped_newline &&
                            (start + ident_length <= parser->end) &&
                            (memcmp(start, ident_start, ident_length) == 0)
                        ) {
@ -8550,6 +8552,9 @@ parser_lex(pm_parser_t *parser) {
                            case '\r':
                                parser->current.end++;
                                if (peek(parser) != '\n') {
+                                    if (quote == PM_HEREDOC_QUOTE_SINGLE) {
+                                        pm_token_buffer_push(&token_buffer, '\\');
+                                    }
                                    pm_token_buffer_push(&token_buffer, '\r');
                                    break;
                                }
@ -8559,25 +8564,19 @@ parser_lex(pm_parser_t *parser) {
                                // to leave the escaped newline in place so that
                                // it can be removed later when we dedent the
                                // heredoc.
-                                if (lex_mode->as.heredoc.indent == PM_HEREDOC_INDENT_TILDE) {
+                                if (quote == PM_HEREDOC_QUOTE_SINGLE || lex_mode->as.heredoc.indent == PM_HEREDOC_INDENT_TILDE) {
                                    pm_token_buffer_push(&token_buffer, '\\');
                                    pm_token_buffer_push(&token_buffer, '\n');
                                }

-                                if (parser->heredoc_end) {
-                                    // ... if we are on the same line as a heredoc,
-                                    // flush the heredoc and continue parsing after
-                                    // heredoc_end.
-                                    parser_flush_heredoc_end(parser);
-                                    pm_token_buffer_copy(parser, &token_buffer);
-                                    LEX(PM_TOKEN_STRING_CONTENT);
-                                } else {
-                                    // ... else track the newline.
-                                    pm_newline_list_append(&parser->newline_list, parser->current.end);
+                                token_buffer.cursor = parser->current.end + 1;
+                                breakpoint = parser->current.end;
+
+                                if (quote != PM_HEREDOC_QUOTE_SINGLE) {
+                                    was_escaped_newline = true;
                                }

-                                parser->current.end++;
-                                break;
+                                continue;
                            default:
                                if (quote == PM_HEREDOC_QUOTE_SINGLE) {
                                    pm_token_buffer_push(&token_buffer, '\\');
@ -8616,6 +8615,8 @@ parser_lex(pm_parser_t *parser) {
                    default:
                        assert(false && "unreachable");
                }
+
+                was_escaped_newline = false;
            }

            // If we've hit the end of the string, then this is an unterminated
--- a/test/prism/snapshots/whitequark/parser_slash_slash_n_escaping_in_literals.txt
+++ b/test/prism/snapshots/whitequark/parser_slash_slash_n_escaping_in_literals.txt
@ -114,7 +114,7 @@
        │   ├── opening_loc: (51,0)-(51,9) = "<<-'HERE'"
        │   ├── content_loc: (52,0)-(53,0) = "a\\\nb\n"
        │   ├── closing_loc: (54,0)-(54,0) = "HERE\n"
-        │   └── unescaped: "ab\n"
+        │   └── unescaped: "a\\\nb\n"
        ├── @ XStringNode (location: (56,0)-(56,9))
        │   ├── opening_loc: (56,0)-(56,9) = "<<-`HERE`"
        │   ├── content_loc: (57,0)-(58,0) = "a\\\nb\n"
--- a/test/prism/unescape_test.rb
+++ b/test/prism/unescape_test.rb
@ -102,6 +102,14 @@ module Prism
      [Context::String.new("%[", "]"),           escapes],
      [Context::String.new("`", "`"),            escapes],
      [Context::String.new("%x[", "]"),          escapes],
+      [Context::String.new("<<H\n", "\nH"),      escapes],
+      [Context::String.new("<<'H'\n", "\nH"),    escapes],
+      [Context::String.new("<<\"H\"\n", "\nH"),  escapes],
+      [Context::String.new("<<`H`\n", "\nH"),    escapes],
+      [Context::String.new("<<-H\n", "\nH"),      escapes],
+      [Context::String.new("<<-'H'\n", "\nH"),    escapes],
+      [Context::String.new("<<-\"H\"\n", "\nH"),  escapes],
+      [Context::String.new("<<-`H`\n", "\nH"),    escapes],
      # [Context::String.new("<<~H\n", "\nH"),     escapes],
      # [Context::String.new("<<~'H'\n", "\nH"),   escapes],
      # [Context::String.new("<<~\"H\"\n", "\nH"), escapes],