[ruby/prism] Better invalid token messages

8c9bed2a4d
This commit is contained in:
Kevin Newton 2024-02-06 12:59:47 -05:00 committed by git
parent ccec209b2c
commit f5b368df0c
4 changed files with 21 additions and 7 deletions

View file

@ -198,8 +198,10 @@ static const pm_diagnostic_data_t diagnostic_messages[PM_DIAGNOSTIC_ID_LEN] = {
[PM_ERR_INVALID_NUMBER_HEXADECIMAL] = { "invalid hexadecimal number", PM_ERROR_LEVEL_FATAL }, [PM_ERR_INVALID_NUMBER_HEXADECIMAL] = { "invalid hexadecimal number", PM_ERROR_LEVEL_FATAL },
[PM_ERR_INVALID_NUMBER_OCTAL] = { "invalid octal number", PM_ERROR_LEVEL_FATAL }, [PM_ERR_INVALID_NUMBER_OCTAL] = { "invalid octal number", PM_ERROR_LEVEL_FATAL },
[PM_ERR_INVALID_NUMBER_UNDERSCORE] = { "invalid underscore placement in number", PM_ERROR_LEVEL_FATAL }, [PM_ERR_INVALID_NUMBER_UNDERSCORE] = { "invalid underscore placement in number", PM_ERROR_LEVEL_FATAL },
[PM_ERR_INVALID_CHARACTER] = { "invalid character 0x%X", PM_ERROR_LEVEL_FATAL },
[PM_ERR_INVALID_MULTIBYTE_CHARACTER] = { "invalid multibyte character 0x%X", PM_ERROR_LEVEL_FATAL },
[PM_ERR_INVALID_PRINTABLE_CHARACTER] = { "invalid character `%c`", PM_ERROR_LEVEL_FATAL },
[PM_ERR_INVALID_PERCENT] = { "invalid `%` token", PM_ERROR_LEVEL_FATAL }, // TODO WHAT? [PM_ERR_INVALID_PERCENT] = { "invalid `%` token", PM_ERROR_LEVEL_FATAL }, // TODO WHAT?
[PM_ERR_INVALID_TOKEN] = { "invalid token", PM_ERROR_LEVEL_FATAL }, // TODO WHAT?
[PM_ERR_INVALID_VARIABLE_GLOBAL] = { "invalid global variable", PM_ERROR_LEVEL_FATAL }, [PM_ERR_INVALID_VARIABLE_GLOBAL] = { "invalid global variable", PM_ERROR_LEVEL_FATAL },
[PM_ERR_IT_NOT_ALLOWED] = { "`it` is not allowed when an ordinary parameter is defined", PM_ERROR_LEVEL_FATAL }, [PM_ERR_IT_NOT_ALLOWED] = { "`it` is not allowed when an ordinary parameter is defined", PM_ERROR_LEVEL_FATAL },
[PM_ERR_LAMBDA_OPEN] = { "expected a `do` keyword or a `{` to open the lambda block", PM_ERROR_LEVEL_FATAL }, [PM_ERR_LAMBDA_OPEN] = { "expected a `do` keyword or a `{` to open the lambda block", PM_ERROR_LEVEL_FATAL },

View file

@ -196,8 +196,10 @@ typedef enum {
PM_ERR_INVALID_NUMBER_HEXADECIMAL, PM_ERR_INVALID_NUMBER_HEXADECIMAL,
PM_ERR_INVALID_NUMBER_OCTAL, PM_ERR_INVALID_NUMBER_OCTAL,
PM_ERR_INVALID_NUMBER_UNDERSCORE, PM_ERR_INVALID_NUMBER_UNDERSCORE,
PM_ERR_INVALID_CHARACTER,
PM_ERR_INVALID_MULTIBYTE_CHARACTER,
PM_ERR_INVALID_PRINTABLE_CHARACTER,
PM_ERR_INVALID_PERCENT, PM_ERR_INVALID_PERCENT,
PM_ERR_INVALID_TOKEN,
PM_ERR_INVALID_VARIABLE_GLOBAL, PM_ERR_INVALID_VARIABLE_GLOBAL,
PM_ERR_IT_NOT_ALLOWED, PM_ERR_IT_NOT_ALLOWED,
PM_ERR_LAMBDA_OPEN, PM_ERR_LAMBDA_OPEN,

View file

@ -9590,11 +9590,21 @@ parser_lex(pm_parser_t *parser) {
if (*parser->current.start != '_') { if (*parser->current.start != '_') {
size_t width = char_is_identifier_start(parser, parser->current.start); size_t width = char_is_identifier_start(parser, parser->current.start);
// If this isn't the beginning of an identifier, then it's an invalid // If this isn't the beginning of an identifier, then
// token as we've exhausted all of the other options. We'll skip past // it's an invalid token as we've exhausted all of the
// it and return the next token. // other options. We'll skip past it and return the next
// token after adding an appropriate error message.
if (!width) { if (!width) {
pm_parser_err_current(parser, PM_ERR_INVALID_TOKEN); pm_diagnostic_id_t diag_id;
if (*parser->current.start >= 0x80) {
diag_id = PM_ERR_INVALID_MULTIBYTE_CHARACTER;
} else if (char_is_ascii_printable(*parser->current.start) || (*parser->current.start == '\\')) {
diag_id = PM_ERR_INVALID_PRINTABLE_CHARACTER;
} else {
diag_id = PM_ERR_INVALID_CHARACTER;
}
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, *parser->current.start);
goto lex_next_token; goto lex_next_token;
} }

View file

@ -16,7 +16,7 @@ module Prism
assert_equal <<~'ERROR', Debug.format_errors('"%W"\u"', false) assert_equal <<~'ERROR', Debug.format_errors('"%W"\u"', false)
> 1 | "%W"\u" > 1 | "%W"\u"
| ^ expected a newline or semicolon after the statement | ^ expected a newline or semicolon after the statement
| ^ invalid token | ^ invalid character `\`
| ^ expected a closing delimiter for the string literal | ^ expected a closing delimiter for the string literal
ERROR ERROR
end end