mirror of
https://github.com/ruby/ruby.git
synced 2025-09-16 00:54:01 +02:00
parent
18ee7c9a10
commit
76e11595e2
7 changed files with 72 additions and 21 deletions
|
@ -234,6 +234,9 @@ typedef struct pm_lex_mode {
|
|||
* a tilde heredoc.
|
||||
*/
|
||||
size_t common_whitespace;
|
||||
|
||||
/** True if the previous token ended with a line continuation. */
|
||||
bool line_continuation;
|
||||
} heredoc;
|
||||
} as;
|
||||
|
||||
|
|
|
@ -9450,7 +9450,8 @@ parser_lex(pm_parser_t *parser) {
|
|||
.next_start = parser->current.end,
|
||||
.quote = quote,
|
||||
.indent = indent,
|
||||
.common_whitespace = (size_t) -1
|
||||
.common_whitespace = (size_t) -1,
|
||||
.line_continuation = false
|
||||
}
|
||||
});
|
||||
|
||||
|
@ -10719,6 +10720,9 @@ parser_lex(pm_parser_t *parser) {
|
|||
// current lex mode.
|
||||
pm_lex_mode_t *lex_mode = parser->lex_modes.current;
|
||||
|
||||
bool line_continuation = lex_mode->as.heredoc.line_continuation;
|
||||
lex_mode->as.heredoc.line_continuation = false;
|
||||
|
||||
// We'll check if we're at the end of the file. If we are, then we
|
||||
// will add an error (because we weren't able to find the
|
||||
// terminator) but still continue parsing so that content after the
|
||||
|
@ -10736,7 +10740,7 @@ parser_lex(pm_parser_t *parser) {
|
|||
|
||||
// If we are immediately following a newline and we have hit the
|
||||
// terminator, then we need to return the ending of the heredoc.
|
||||
if (current_token_starts_line(parser)) {
|
||||
if (!line_continuation && current_token_starts_line(parser)) {
|
||||
const uint8_t *start = parser->current.start;
|
||||
if (start + ident_length <= parser->end) {
|
||||
const uint8_t *newline = next_newline(start, parser->end - start);
|
||||
|
@ -10808,7 +10812,7 @@ parser_lex(pm_parser_t *parser) {
|
|||
|
||||
const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
|
||||
pm_token_buffer_t token_buffer = { { 0 }, 0 };
|
||||
bool was_escaped_newline = false;
|
||||
bool was_line_continuation = false;
|
||||
|
||||
while (breakpoint != NULL) {
|
||||
switch (*breakpoint) {
|
||||
|
@ -10831,7 +10835,7 @@ parser_lex(pm_parser_t *parser) {
|
|||
// some leading whitespace.
|
||||
const uint8_t *start = breakpoint + 1;
|
||||
|
||||
if (!was_escaped_newline && (start + ident_length <= parser->end)) {
|
||||
if (!was_line_continuation && (start + ident_length <= parser->end)) {
|
||||
// We want to match the terminator starting from the end of the line in case
|
||||
// there is whitespace in the ident such as <<-' DOC' or <<~' DOC'.
|
||||
const uint8_t *newline = next_newline(start, parser->end - start);
|
||||
|
@ -10873,7 +10877,6 @@ parser_lex(pm_parser_t *parser) {
|
|||
// heredoc here as string content. Then, the next time a
|
||||
// token is lexed, it will match again and return the
|
||||
// end of the heredoc.
|
||||
|
||||
if (lex_mode->as.heredoc.indent == PM_HEREDOC_INDENT_TILDE) {
|
||||
if ((lex_mode->as.heredoc.common_whitespace > whitespace) && peek_at(parser, start) != '\n') {
|
||||
lex_mode->as.heredoc.common_whitespace = whitespace;
|
||||
|
@ -10881,7 +10884,7 @@ parser_lex(pm_parser_t *parser) {
|
|||
|
||||
parser->current.end = breakpoint + 1;
|
||||
|
||||
if (!was_escaped_newline) {
|
||||
if (!was_line_continuation) {
|
||||
pm_token_buffer_flush(parser, &token_buffer);
|
||||
LEX(PM_TOKEN_STRING_CONTENT);
|
||||
}
|
||||
|
@ -10943,7 +10946,26 @@ parser_lex(pm_parser_t *parser) {
|
|||
}
|
||||
/* fallthrough */
|
||||
case '\n':
|
||||
was_escaped_newline = true;
|
||||
// If we are in a tilde here, we should
|
||||
// break out of the loop and return the
|
||||
// string content.
|
||||
if (lex_mode->as.heredoc.indent == PM_HEREDOC_INDENT_TILDE) {
|
||||
const uint8_t *end = parser->current.end;
|
||||
pm_newline_list_append(&parser->newline_list, end);
|
||||
|
||||
// Here we want the buffer to only
|
||||
// include up to the backslash.
|
||||
parser->current.end = breakpoint;
|
||||
pm_token_buffer_flush(parser, &token_buffer);
|
||||
|
||||
// Now we can advance the end of the
|
||||
// token past the newline.
|
||||
parser->current.end = end + 1;
|
||||
lex_mode->as.heredoc.line_continuation = true;
|
||||
LEX(PM_TOKEN_STRING_CONTENT);
|
||||
}
|
||||
|
||||
was_line_continuation = true;
|
||||
token_buffer.cursor = parser->current.end + 1;
|
||||
breakpoint = parser->current.end;
|
||||
continue;
|
||||
|
@ -10980,7 +11002,7 @@ parser_lex(pm_parser_t *parser) {
|
|||
assert(false && "unreachable");
|
||||
}
|
||||
|
||||
was_escaped_newline = false;
|
||||
was_line_continuation = false;
|
||||
}
|
||||
|
||||
if (parser->current.end > parser->current.start) {
|
||||
|
|
|
@ -71,6 +71,7 @@ module Prism
|
|||
# https://github.com/seattlerb/ruby_parser/issues/344
|
||||
failures = crlf | %w[
|
||||
alias.txt
|
||||
heredocs_with_ignored_newlines.txt
|
||||
method_calls.txt
|
||||
methods.txt
|
||||
multi_write.txt
|
||||
|
@ -94,6 +95,7 @@ module Prism
|
|||
whitequark/lvar_injecting_match.txt
|
||||
whitequark/not.txt
|
||||
whitequark/op_asgn_cmd.txt
|
||||
whitequark/parser_bug_640.txt
|
||||
whitequark/parser_slash_slash_n_escaping_in_literals.txt
|
||||
whitequark/pattern_matching_single_line_allowed_omission_of_parentheses.txt
|
||||
whitequark/pattern_matching_single_line.txt
|
||||
|
|
|
@ -11,7 +11,7 @@
|
|||
│ └── unescaped: ""
|
||||
└── @ InterpolatedStringNode (location: (4,0)-(4,8))
|
||||
├── opening_loc: (4,0)-(4,8) = "<<~THERE"
|
||||
├── parts: (length: 8)
|
||||
├── parts: (length: 9)
|
||||
│ ├── @ StringNode (location: (5,0)-(6,0))
|
||||
│ │ ├── flags: ∅
|
||||
│ │ ├── opening_loc: ∅
|
||||
|
@ -42,12 +42,18 @@
|
|||
│ │ ├── content_loc: (9,0)-(10,0) = "\n"
|
||||
│ │ ├── closing_loc: ∅
|
||||
│ │ └── unescaped: "\n"
|
||||
│ ├── @ StringNode (location: (10,0)-(12,0))
|
||||
│ ├── @ StringNode (location: (10,0)-(11,0))
|
||||
│ │ ├── flags: ∅
|
||||
│ │ ├── opening_loc: ∅
|
||||
│ │ ├── content_loc: (10,0)-(12,0) = " <<~BUT\\\n but\n"
|
||||
│ │ ├── content_loc: (10,0)-(11,0) = " <<~BUT\\\n"
|
||||
│ │ ├── closing_loc: ∅
|
||||
│ │ └── unescaped: "<<~BUT but\n"
|
||||
│ │ └── unescaped: "<<~BUT"
|
||||
│ ├── @ StringNode (location: (11,0)-(12,0))
|
||||
│ │ ├── flags: ∅
|
||||
│ │ ├── opening_loc: ∅
|
||||
│ │ ├── content_loc: (11,0)-(12,0) = " but\n"
|
||||
│ │ ├── closing_loc: ∅
|
||||
│ │ └── unescaped: " but\n"
|
||||
│ ├── @ StringNode (location: (12,0)-(13,0))
|
||||
│ │ ├── flags: ∅
|
||||
│ │ ├── opening_loc: ∅
|
||||
|
|
|
@ -3,9 +3,19 @@
|
|||
└── statements:
|
||||
@ StatementsNode (location: (1,0)-(1,6))
|
||||
└── body: (length: 1)
|
||||
└── @ StringNode (location: (1,0)-(1,6))
|
||||
├── flags: ∅
|
||||
└── @ InterpolatedStringNode (location: (1,0)-(1,6))
|
||||
├── opening_loc: (1,0)-(1,6) = "<<~FOO"
|
||||
├── content_loc: (2,0)-(4,0) = " baz\\\n qux\n"
|
||||
├── closing_loc: (4,0)-(5,0) = "FOO\n"
|
||||
└── unescaped: "baz qux\n"
|
||||
├── parts: (length: 2)
|
||||
│ ├── @ StringNode (location: (2,0)-(3,0))
|
||||
│ │ ├── flags: ∅
|
||||
│ │ ├── opening_loc: ∅
|
||||
│ │ ├── content_loc: (2,0)-(3,0) = " baz\\\n"
|
||||
│ │ ├── closing_loc: ∅
|
||||
│ │ └── unescaped: "baz"
|
||||
│ └── @ StringNode (location: (3,0)-(4,0))
|
||||
│ ├── flags: ∅
|
||||
│ ├── opening_loc: ∅
|
||||
│ ├── content_loc: (3,0)-(4,0) = " qux\n"
|
||||
│ ├── closing_loc: ∅
|
||||
│ └── unescaped: "qux\n"
|
||||
└── closing_loc: (4,0)-(5,0) = "FOO\n"
|
||||
|
|
|
@ -11,13 +11,19 @@
|
|||
│ └── unescaped: " 1 2\n 3\n"
|
||||
└── @ InterpolatedStringNode (location: (8,0)-(8,4))
|
||||
├── opening_loc: (8,0)-(8,4) = "<<~E"
|
||||
├── parts: (length: 2)
|
||||
│ ├── @ StringNode (location: (9,0)-(11,0))
|
||||
├── parts: (length: 3)
|
||||
│ ├── @ StringNode (location: (9,0)-(10,0))
|
||||
│ │ ├── flags: ∅
|
||||
│ │ ├── opening_loc: ∅
|
||||
│ │ ├── content_loc: (9,0)-(11,0) = " 1 \\\n 2\n"
|
||||
│ │ ├── content_loc: (9,0)-(10,0) = " 1 \\\n"
|
||||
│ │ ├── closing_loc: ∅
|
||||
│ │ └── unescaped: "1 2\n"
|
||||
│ │ └── unescaped: "1 "
|
||||
│ ├── @ StringNode (location: (10,0)-(11,0))
|
||||
│ │ ├── flags: ∅
|
||||
│ │ ├── opening_loc: ∅
|
||||
│ │ ├── content_loc: (10,0)-(11,0) = " 2\n"
|
||||
│ │ ├── closing_loc: ∅
|
||||
│ │ └── unescaped: "2\n"
|
||||
│ └── @ StringNode (location: (11,0)-(12,0))
|
||||
│ ├── flags: ∅
|
||||
│ ├── opening_loc: ∅
|
||||
|
|
|
@ -230,6 +230,8 @@ module Prism
|
|||
else
|
||||
assert_equal expected.bytes, actual.bytes, message
|
||||
end
|
||||
rescue Exception
|
||||
binding.irb
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue