diff --git a/prism/prism.c b/prism/prism.c index fc74270c34..f44ac73ea8 100644 --- a/prism/prism.c +++ b/prism/prism.c @@ -672,6 +672,26 @@ pm_parser_warn_node(pm_parser_t *parser, const pm_node_t *node, pm_diagnostic_id #define PM_PARSER_WARN_NODE_FORMAT(parser, node, diag_id, ...) \ PM_PARSER_WARN_FORMAT(parser, (node)->location.start, (node)->location.end, diag_id, __VA_ARGS__) +/** + * Add an error for an expected heredoc terminator. This is a special function + * only because it grabs its location off of a lex mode instead of a node or a + * token. + */ +static void +pm_parser_err_heredoc_term(pm_parser_t *parser, pm_lex_mode_t *lex_mode) { + const uint8_t *ident_start = lex_mode->as.heredoc.ident_start; + size_t ident_length = lex_mode->as.heredoc.ident_length; + + PM_PARSER_ERR_FORMAT( + parser, + ident_start, + ident_start + ident_length, + PM_ERR_HEREDOC_TERM, + (int) ident_length, + (const char *) ident_start + ); +} + /******************************************************************************/ /* Scope-related functions */ /******************************************************************************/ @@ -10836,7 +10856,7 @@ parser_lex(pm_parser_t *parser) { // this is not a valid heredoc declaration. In this case we // will add an error, but we will still return a heredoc // start. - pm_parser_err_current(parser, PM_ERR_HEREDOC_TERM); + pm_parser_err_heredoc_term(parser, parser->lex_modes.current); body_start = parser->end; } else { // Otherwise, we want to indicate that the body of the @@ -12163,7 +12183,7 @@ parser_lex(pm_parser_t *parser) { // terminator) but still continue parsing so that content after the // declaration of the heredoc can be parsed. if (parser->current.end >= parser->end) { - pm_parser_err_current(parser, PM_ERR_HEREDOC_TERM); + pm_parser_err_heredoc_term(parser, lex_mode); parser->next_start = lex_mode->as.heredoc.next_start; parser->heredoc_end = parser->current.end; lex_state_set(parser, PM_LEX_STATE_END); @@ -17468,8 +17488,16 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b if (match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) { // If we get here, then we have an empty heredoc. We'll create // an empty content token and return an empty string node. - lex_mode_pop(parser); - expect1(parser, PM_TOKEN_HEREDOC_END, PM_ERR_HEREDOC_TERM); + if (match1(parser, PM_TOKEN_HEREDOC_END)) { + lex_mode_pop(parser); + parser_lex(parser); + } else { + pm_parser_err_heredoc_term(parser, lex_mode); + lex_mode_pop(parser); + parser->previous.start = parser->previous.end; + parser->previous.type = PM_TOKEN_MISSING; + } + pm_token_t content = parse_strings_empty_content(parser->previous.start); if (quote == PM_HEREDOC_QUOTE_BACKTICK) { @@ -17510,8 +17538,16 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b } node = (pm_node_t *) cast; - lex_mode_pop(parser); - expect1(parser, PM_TOKEN_HEREDOC_END, PM_ERR_HEREDOC_TERM); + + if (match1(parser, PM_TOKEN_HEREDOC_END)) { + lex_mode_pop(parser); + parser_lex(parser); + } else { + pm_parser_err_heredoc_term(parser, lex_mode); + lex_mode_pop(parser); + parser->previous.start = parser->previous.end; + parser->previous.type = PM_TOKEN_MISSING; + } } else { // If we get here, then we have multiple parts in the heredoc, // so we'll need to create an interpolated string node to hold @@ -17533,8 +17569,15 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b pm_interpolated_x_string_node_t *cast = pm_interpolated_xstring_node_create(parser, &opening, &opening); cast->parts = parts; - lex_mode_pop(parser); - expect1(parser, PM_TOKEN_HEREDOC_END, PM_ERR_HEREDOC_TERM); + if (match1(parser, PM_TOKEN_HEREDOC_END)) { + lex_mode_pop(parser); + parser_lex(parser); + } else { + pm_parser_err_heredoc_term(parser, lex_mode); + lex_mode_pop(parser); + parser->previous.start = parser->previous.end; + parser->previous.type = PM_TOKEN_MISSING; + } pm_interpolated_xstring_node_closing_set(cast, &parser->previous); cast->base.location = cast->opening_loc; @@ -17543,8 +17586,15 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b pm_interpolated_string_node_t *cast = pm_interpolated_string_node_create(parser, &opening, &parts, &opening); pm_node_list_free(&parts); - lex_mode_pop(parser); - expect1(parser, PM_TOKEN_HEREDOC_END, PM_ERR_HEREDOC_TERM); + if (match1(parser, PM_TOKEN_HEREDOC_END)) { + lex_mode_pop(parser); + parser_lex(parser); + } else { + pm_parser_err_heredoc_term(parser, lex_mode); + lex_mode_pop(parser); + parser->previous.start = parser->previous.end; + parser->previous.type = PM_TOKEN_MISSING; + } pm_interpolated_string_node_closing_set(cast, &parser->previous); cast->base.location = cast->opening_loc; diff --git a/prism/templates/src/diagnostic.c.erb b/prism/templates/src/diagnostic.c.erb index 78dc6c2249..bd6f4012bc 100644 --- a/prism/templates/src/diagnostic.c.erb +++ b/prism/templates/src/diagnostic.c.erb @@ -207,7 +207,7 @@ static const pm_diagnostic_data_t diagnostic_messages[PM_DIAGNOSTIC_ID_MAX] = { [PM_ERR_HASH_ROCKET] = { "expected a `=>` between the hash key and value", PM_ERROR_LEVEL_SYNTAX }, [PM_ERR_HASH_TERM] = { "expected a `}` to close the hash literal", PM_ERROR_LEVEL_SYNTAX }, [PM_ERR_HASH_VALUE] = { "expected a value in the hash literal", PM_ERROR_LEVEL_SYNTAX }, - [PM_ERR_HEREDOC_TERM] = { "could not find a terminator for the heredoc", PM_ERROR_LEVEL_SYNTAX }, + [PM_ERR_HEREDOC_TERM] = { "unterminated heredoc; can't find string \"%.*s\"", PM_ERROR_LEVEL_SYNTAX }, [PM_ERR_INCOMPLETE_QUESTION_MARK] = { "incomplete expression at `?`", PM_ERROR_LEVEL_SYNTAX }, [PM_ERR_INCOMPLETE_VARIABLE_CLASS_3_3] = { "`%.*s' is not allowed as a class variable name", PM_ERROR_LEVEL_SYNTAX }, [PM_ERR_INCOMPLETE_VARIABLE_CLASS] = { "'%.*s' is not allowed as a class variable name", PM_ERROR_LEVEL_SYNTAX }, @@ -243,13 +243,13 @@ static const pm_diagnostic_data_t diagnostic_messages[PM_DIAGNOSTIC_ID_MAX] = { [PM_ERR_LAMBDA_TERM_BRACE] = { "expected a lambda block beginning with `{` to end with `}`", PM_ERROR_LEVEL_SYNTAX }, [PM_ERR_LAMBDA_TERM_END] = { "expected a lambda block beginning with `do` to end with `end`", PM_ERROR_LEVEL_SYNTAX }, [PM_ERR_LIST_I_LOWER_ELEMENT] = { "expected a symbol in a `%i` list", PM_ERROR_LEVEL_SYNTAX }, - [PM_ERR_LIST_I_LOWER_TERM] = { "expected a closing delimiter for the `%i` list", PM_ERROR_LEVEL_SYNTAX }, + [PM_ERR_LIST_I_LOWER_TERM] = { "unterminated list; expected a closing delimiter for the `%i`", PM_ERROR_LEVEL_SYNTAX }, [PM_ERR_LIST_I_UPPER_ELEMENT] = { "expected a symbol in a `%I` list", PM_ERROR_LEVEL_SYNTAX }, - [PM_ERR_LIST_I_UPPER_TERM] = { "expected a closing delimiter for the `%I` list", PM_ERROR_LEVEL_SYNTAX }, + [PM_ERR_LIST_I_UPPER_TERM] = { "unterminated list; expected a closing delimiter for the `%I`", PM_ERROR_LEVEL_SYNTAX }, [PM_ERR_LIST_W_LOWER_ELEMENT] = { "expected a string in a `%w` list", PM_ERROR_LEVEL_SYNTAX }, - [PM_ERR_LIST_W_LOWER_TERM] = { "expected a closing delimiter for the `%w` list", PM_ERROR_LEVEL_SYNTAX }, + [PM_ERR_LIST_W_LOWER_TERM] = { "unterminated list; expected a closing delimiter for the `%w`", PM_ERROR_LEVEL_SYNTAX }, [PM_ERR_LIST_W_UPPER_ELEMENT] = { "expected a string in a `%W` list", PM_ERROR_LEVEL_SYNTAX }, - [PM_ERR_LIST_W_UPPER_TERM] = { "expected a closing delimiter for the `%W` list", PM_ERROR_LEVEL_SYNTAX }, + [PM_ERR_LIST_W_UPPER_TERM] = { "unterminated list; expected a closing delimiter for the `%W`", PM_ERROR_LEVEL_SYNTAX }, [PM_ERR_MALLOC_FAILED] = { "failed to allocate memory", PM_ERROR_LEVEL_SYNTAX }, [PM_ERR_MIXED_ENCODING] = { "UTF-8 mixed within %s source", PM_ERROR_LEVEL_SYNTAX }, [PM_ERR_MODULE_IN_METHOD] = { "unexpected module definition in method body", PM_ERROR_LEVEL_SYNTAX }, @@ -306,7 +306,7 @@ static const pm_diagnostic_data_t diagnostic_messages[PM_DIAGNOSTIC_ID_MAX] = { [PM_ERR_REGEXP_NON_ESCAPED_MBC] = { "/.../n has a non escaped non ASCII character in non ASCII-8BIT script: /%.*s/", PM_ERROR_LEVEL_SYNTAX }, [PM_ERR_REGEXP_INVALID_UNICODE_RANGE] = { "invalid Unicode range: /%.*s/", PM_ERROR_LEVEL_SYNTAX }, [PM_ERR_REGEXP_UNKNOWN_OPTIONS] = { "unknown regexp %s: %.*s", PM_ERROR_LEVEL_SYNTAX }, - [PM_ERR_REGEXP_TERM] = { "expected a closing delimiter for the regular expression", PM_ERROR_LEVEL_SYNTAX }, + [PM_ERR_REGEXP_TERM] = { "unterminated regular expression; expected a closing delimiter", PM_ERROR_LEVEL_SYNTAX }, [PM_ERR_REGEXP_UTF8_CHAR_NON_UTF8_REGEXP] = { "UTF-8 character in non UTF-8 regexp: /%s/", PM_ERROR_LEVEL_SYNTAX }, [PM_ERR_RESCUE_EXPRESSION] = { "expected a rescued expression", PM_ERROR_LEVEL_SYNTAX }, [PM_ERR_RESCUE_MODIFIER_VALUE] = { "expected a value after the `rescue` modifier", PM_ERROR_LEVEL_SYNTAX }, @@ -323,7 +323,7 @@ static const pm_diagnostic_data_t diagnostic_messages[PM_DIAGNOSTIC_ID_MAX] = { [PM_ERR_STRING_LITERAL_EOF] = { "unterminated string meets end of file", PM_ERROR_LEVEL_SYNTAX }, [PM_ERR_STRING_LITERAL_TERM] = { "unexpected %s, expected a string literal terminator", PM_ERROR_LEVEL_SYNTAX }, [PM_ERR_SYMBOL_INVALID] = { "invalid symbol", PM_ERROR_LEVEL_SYNTAX }, // TODO expected symbol? prism.c ~9719 - [PM_ERR_SYMBOL_TERM_DYNAMIC] = { "expected a closing delimiter for the dynamic symbol", PM_ERROR_LEVEL_SYNTAX }, + [PM_ERR_SYMBOL_TERM_DYNAMIC] = { "unterminated quoted string; expected a closing delimiter for the dynamic symbol", PM_ERROR_LEVEL_SYNTAX }, [PM_ERR_SYMBOL_TERM_INTERPOLATED] = { "expected a closing delimiter for the interpolated symbol", PM_ERROR_LEVEL_SYNTAX }, [PM_ERR_TERNARY_COLON] = { "expected a `:` after the true expression of a ternary operator", PM_ERROR_LEVEL_SYNTAX }, [PM_ERR_TERNARY_EXPRESSION_FALSE] = { "expected an expression after `:` in the ternary operator", PM_ERROR_LEVEL_SYNTAX },