diff --git a/prism/diagnostic.c b/prism/diagnostic.c index df7ae381ba..c718246c80 100644 --- a/prism/diagnostic.c +++ b/prism/diagnostic.c @@ -198,8 +198,10 @@ static const pm_diagnostic_data_t diagnostic_messages[PM_DIAGNOSTIC_ID_LEN] = { [PM_ERR_INVALID_NUMBER_HEXADECIMAL] = { "invalid hexadecimal number", PM_ERROR_LEVEL_FATAL }, [PM_ERR_INVALID_NUMBER_OCTAL] = { "invalid octal number", PM_ERROR_LEVEL_FATAL }, [PM_ERR_INVALID_NUMBER_UNDERSCORE] = { "invalid underscore placement in number", PM_ERROR_LEVEL_FATAL }, + [PM_ERR_INVALID_CHARACTER] = { "invalid character 0x%X", PM_ERROR_LEVEL_FATAL }, + [PM_ERR_INVALID_MULTIBYTE_CHARACTER] = { "invalid multibyte character 0x%X", PM_ERROR_LEVEL_FATAL }, + [PM_ERR_INVALID_PRINTABLE_CHARACTER] = { "invalid character `%c`", PM_ERROR_LEVEL_FATAL }, [PM_ERR_INVALID_PERCENT] = { "invalid `%` token", PM_ERROR_LEVEL_FATAL }, // TODO WHAT? - [PM_ERR_INVALID_TOKEN] = { "invalid token", PM_ERROR_LEVEL_FATAL }, // TODO WHAT? [PM_ERR_INVALID_VARIABLE_GLOBAL] = { "invalid global variable", PM_ERROR_LEVEL_FATAL }, [PM_ERR_IT_NOT_ALLOWED] = { "`it` is not allowed when an ordinary parameter is defined", PM_ERROR_LEVEL_FATAL }, [PM_ERR_LAMBDA_OPEN] = { "expected a `do` keyword or a `{` to open the lambda block", PM_ERROR_LEVEL_FATAL }, diff --git a/prism/diagnostic.h b/prism/diagnostic.h index 35a5c88793..019afb96b3 100644 --- a/prism/diagnostic.h +++ b/prism/diagnostic.h @@ -196,8 +196,10 @@ typedef enum { PM_ERR_INVALID_NUMBER_HEXADECIMAL, PM_ERR_INVALID_NUMBER_OCTAL, PM_ERR_INVALID_NUMBER_UNDERSCORE, + PM_ERR_INVALID_CHARACTER, + PM_ERR_INVALID_MULTIBYTE_CHARACTER, + PM_ERR_INVALID_PRINTABLE_CHARACTER, PM_ERR_INVALID_PERCENT, - PM_ERR_INVALID_TOKEN, PM_ERR_INVALID_VARIABLE_GLOBAL, PM_ERR_IT_NOT_ALLOWED, PM_ERR_LAMBDA_OPEN, diff --git a/prism/prism.c b/prism/prism.c index 3ed55f06d8..22503fd726 100644 --- a/prism/prism.c +++ b/prism/prism.c @@ -9590,11 +9590,21 @@ parser_lex(pm_parser_t *parser) { if (*parser->current.start != '_') { size_t width = char_is_identifier_start(parser, parser->current.start); - // If this isn't the beginning of an identifier, then it's an invalid - // token as we've exhausted all of the other options. We'll skip past - // it and return the next token. + // If this isn't the beginning of an identifier, then + // it's an invalid token as we've exhausted all of the + // other options. We'll skip past it and return the next + // token after adding an appropriate error message. if (!width) { - pm_parser_err_current(parser, PM_ERR_INVALID_TOKEN); + pm_diagnostic_id_t diag_id; + if (*parser->current.start >= 0x80) { + diag_id = PM_ERR_INVALID_MULTIBYTE_CHARACTER; + } else if (char_is_ascii_printable(*parser->current.start) || (*parser->current.start == '\\')) { + diag_id = PM_ERR_INVALID_PRINTABLE_CHARACTER; + } else { + diag_id = PM_ERR_INVALID_CHARACTER; + } + + PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, *parser->current.start); goto lex_next_token; } diff --git a/test/prism/format_errors_test.rb b/test/prism/format_errors_test.rb index bc0b26165d..a142e8eee1 100644 --- a/test/prism/format_errors_test.rb +++ b/test/prism/format_errors_test.rb @@ -16,7 +16,7 @@ module Prism assert_equal <<~'ERROR', Debug.format_errors('"%W"\u"', false) > 1 | "%W"\u" | ^ expected a newline or semicolon after the statement - | ^ invalid token + | ^ invalid character `\` | ^ expected a closing delimiter for the string literal ERROR end