[ruby/prism] Optimize context_terminator with a lookup table

483aa89234
This commit is contained in:
Kevin Newton 2025-05-06 08:57:30 -04:00
parent 3ef8d833ab
commit 18e37ac430
2 changed files with 96 additions and 112 deletions

View file

@ -322,13 +322,42 @@ warnings:
- UNUSED_LOCAL_VARIABLE
- VOID_STATEMENT
tokens:
# The order of the tokens at the beginning is important, because we use them
# for a lookup table.
- name: EOF
value: 1
comment: final token in the file
- name: MISSING
comment: "a token that was expected but not found"
- name: NOT_PROVIDED
comment: "a token that was not present but it is okay"
- name: BRACE_RIGHT
comment: "}"
- name: COMMA
comment: ","
- name: EMBEXPR_END
comment: "}"
- name: KEYWORD_DO
comment: "do"
- name: KEYWORD_ELSE
comment: "else"
- name: KEYWORD_ELSIF
comment: "elsif"
- name: KEYWORD_END
comment: "end"
- name: KEYWORD_ENSURE
comment: "ensure"
- name: KEYWORD_IN
comment: "in"
- name: KEYWORD_RESCUE
comment: "rescue"
- name: KEYWORD_THEN
comment: "then"
- name: KEYWORD_WHEN
comment: "when"
- name: NEWLINE
comment: "a newline character outside of other tokens"
- name: PARENTHESIS_RIGHT
comment: ")"
- name: SEMICOLON
comment: ";"
# Tokens from here on are not used for lookup, and can be in any order.
- name: AMPERSAND
comment: "&"
- name: AMPERSAND_AMPERSAND
@ -351,8 +380,6 @@ tokens:
comment: "!~"
- name: BRACE_LEFT
comment: "{"
- name: BRACE_RIGHT
comment: "}"
- name: BRACKET_LEFT
comment: "["
- name: BRACKET_LEFT_ARRAY
@ -375,8 +402,6 @@ tokens:
comment: ":"
- name: COLON_COLON
comment: "::"
- name: COMMA
comment: ","
- name: COMMENT
comment: "a comment"
- name: CONSTANT
@ -395,8 +420,6 @@ tokens:
comment: "a line inside of embedded documentation"
- name: EMBEXPR_BEGIN
comment: "#{"
- name: EMBEXPR_END
comment: "}"
- name: EMBVAR
comment: "#"
- name: EQUAL
@ -463,20 +486,10 @@ tokens:
comment: "def"
- name: KEYWORD_DEFINED
comment: "defined?"
- name: KEYWORD_DO
comment: "do"
- name: KEYWORD_DO_LOOP
comment: "do keyword for a predicate in a while, until, or for loop"
- name: KEYWORD_ELSE
comment: "else"
- name: KEYWORD_ELSIF
comment: "elsif"
- name: KEYWORD_END
comment: "end"
- name: KEYWORD_END_UPCASE
comment: "END"
- name: KEYWORD_ENSURE
comment: "ensure"
- name: KEYWORD_FALSE
comment: "false"
- name: KEYWORD_FOR
@ -485,8 +498,6 @@ tokens:
comment: "if"
- name: KEYWORD_IF_MODIFIER
comment: "if in the modifier form"
- name: KEYWORD_IN
comment: "in"
- name: KEYWORD_MODULE
comment: "module"
- name: KEYWORD_NEXT
@ -499,8 +510,6 @@ tokens:
comment: "or"
- name: KEYWORD_REDO
comment: "redo"
- name: KEYWORD_RESCUE
comment: "rescue"
- name: KEYWORD_RESCUE_MODIFIER
comment: "rescue in the modifier form"
- name: KEYWORD_RETRY
@ -511,8 +520,6 @@ tokens:
comment: "self"
- name: KEYWORD_SUPER
comment: "super"
- name: KEYWORD_THEN
comment: "then"
- name: KEYWORD_TRUE
comment: "true"
- name: KEYWORD_UNDEF
@ -525,8 +532,6 @@ tokens:
comment: "until"
- name: KEYWORD_UNTIL_MODIFIER
comment: "until in the modifier form"
- name: KEYWORD_WHEN
comment: "when"
- name: KEYWORD_WHILE
comment: "while"
- name: KEYWORD_WHILE_MODIFIER
@ -563,16 +568,12 @@ tokens:
comment: "-="
- name: MINUS_GREATER
comment: "->"
- name: NEWLINE
comment: "a newline character outside of other tokens"
- name: NUMBERED_REFERENCE
comment: "a numbered reference to a capture group in the previous regular expression match"
- name: PARENTHESIS_LEFT
comment: "("
- name: PARENTHESIS_LEFT_PARENTHESES
comment: "( for a parentheses node"
- name: PARENTHESIS_RIGHT
comment: ")"
- name: PERCENT
comment: "%"
- name: PERCENT_EQUAL
@ -605,8 +606,6 @@ tokens:
comment: "the beginning of a regular expression"
- name: REGEXP_END
comment: "the end of a regular expression"
- name: SEMICOLON
comment: ";"
- name: SLASH
comment: "/"
- name: SLASH_EQUAL
@ -651,6 +650,10 @@ tokens:
comment: "a separator between words in a list"
- name: __END__
comment: "marker for the point in the file at which the parser should stop"
- name: MISSING
comment: "a token that was expected but not found"
- name: NOT_PROVIDED
comment: "a token that was not present but it is okay"
flags:
- name: ArgumentsNodeFlags
values:

View file

@ -8586,85 +8586,66 @@ parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) {
/* Context manipulations */
/******************************************************************************/
static bool
context_terminator(pm_context_t context, pm_token_t *token) {
switch (context) {
case PM_CONTEXT_MAIN:
case PM_CONTEXT_DEF_PARAMS:
case PM_CONTEXT_DEFINED:
case PM_CONTEXT_MULTI_TARGET:
case PM_CONTEXT_TERNARY:
case PM_CONTEXT_RESCUE_MODIFIER:
return token->type == PM_TOKEN_EOF;
case PM_CONTEXT_DEFAULT_PARAMS:
return token->type == PM_TOKEN_COMMA || token->type == PM_TOKEN_PARENTHESIS_RIGHT;
case PM_CONTEXT_PREEXE:
case PM_CONTEXT_POSTEXE:
return token->type == PM_TOKEN_BRACE_RIGHT;
case PM_CONTEXT_MODULE:
case PM_CONTEXT_CLASS:
case PM_CONTEXT_SCLASS:
case PM_CONTEXT_LAMBDA_DO_END:
case PM_CONTEXT_DEF:
case PM_CONTEXT_BLOCK_KEYWORDS:
return token->type == PM_TOKEN_KEYWORD_END || token->type == PM_TOKEN_KEYWORD_RESCUE || token->type == PM_TOKEN_KEYWORD_ENSURE;
case PM_CONTEXT_WHILE:
case PM_CONTEXT_UNTIL:
case PM_CONTEXT_ELSE:
case PM_CONTEXT_FOR:
case PM_CONTEXT_BEGIN_ENSURE:
case PM_CONTEXT_BLOCK_ENSURE:
case PM_CONTEXT_CLASS_ENSURE:
case PM_CONTEXT_DEF_ENSURE:
case PM_CONTEXT_LAMBDA_ENSURE:
case PM_CONTEXT_MODULE_ENSURE:
case PM_CONTEXT_SCLASS_ENSURE:
return token->type == PM_TOKEN_KEYWORD_END;
case PM_CONTEXT_LOOP_PREDICATE:
return token->type == PM_TOKEN_KEYWORD_DO || token->type == PM_TOKEN_KEYWORD_THEN;
case PM_CONTEXT_FOR_INDEX:
return token->type == PM_TOKEN_KEYWORD_IN;
case PM_CONTEXT_CASE_WHEN:
return token->type == PM_TOKEN_KEYWORD_WHEN || token->type == PM_TOKEN_KEYWORD_END || token->type == PM_TOKEN_KEYWORD_ELSE;
case PM_CONTEXT_CASE_IN:
return token->type == PM_TOKEN_KEYWORD_IN || token->type == PM_TOKEN_KEYWORD_END || token->type == PM_TOKEN_KEYWORD_ELSE;
case PM_CONTEXT_IF:
case PM_CONTEXT_ELSIF:
return token->type == PM_TOKEN_KEYWORD_ELSE || token->type == PM_TOKEN_KEYWORD_ELSIF || token->type == PM_TOKEN_KEYWORD_END;
case PM_CONTEXT_UNLESS:
return token->type == PM_TOKEN_KEYWORD_ELSE || token->type == PM_TOKEN_KEYWORD_END;
case PM_CONTEXT_EMBEXPR:
return token->type == PM_TOKEN_EMBEXPR_END;
case PM_CONTEXT_BLOCK_BRACES:
return token->type == PM_TOKEN_BRACE_RIGHT;
case PM_CONTEXT_PARENS:
return token->type == PM_TOKEN_PARENTHESIS_RIGHT;
case PM_CONTEXT_BEGIN:
case PM_CONTEXT_BEGIN_RESCUE:
case PM_CONTEXT_BLOCK_RESCUE:
case PM_CONTEXT_CLASS_RESCUE:
case PM_CONTEXT_DEF_RESCUE:
case PM_CONTEXT_LAMBDA_RESCUE:
case PM_CONTEXT_MODULE_RESCUE:
case PM_CONTEXT_SCLASS_RESCUE:
return token->type == PM_TOKEN_KEYWORD_ENSURE || token->type == PM_TOKEN_KEYWORD_RESCUE || token->type == PM_TOKEN_KEYWORD_ELSE || token->type == PM_TOKEN_KEYWORD_END;
case PM_CONTEXT_BEGIN_ELSE:
case PM_CONTEXT_BLOCK_ELSE:
case PM_CONTEXT_CLASS_ELSE:
case PM_CONTEXT_DEF_ELSE:
case PM_CONTEXT_LAMBDA_ELSE:
case PM_CONTEXT_MODULE_ELSE:
case PM_CONTEXT_SCLASS_ELSE:
return token->type == PM_TOKEN_KEYWORD_ENSURE || token->type == PM_TOKEN_KEYWORD_END;
case PM_CONTEXT_LAMBDA_BRACES:
return token->type == PM_TOKEN_BRACE_RIGHT;
case PM_CONTEXT_PREDICATE:
return token->type == PM_TOKEN_KEYWORD_THEN || token->type == PM_TOKEN_NEWLINE || token->type == PM_TOKEN_SEMICOLON;
case PM_CONTEXT_NONE:
return false;
}
static const uint32_t context_terminators[] = {
[PM_CONTEXT_NONE] = 0,
[PM_CONTEXT_BEGIN] = (1 << PM_TOKEN_KEYWORD_ENSURE) | (1 << PM_TOKEN_KEYWORD_RESCUE) | (1 << PM_TOKEN_KEYWORD_ELSE) | (1 << PM_TOKEN_KEYWORD_END),
[PM_CONTEXT_BEGIN_ENSURE] = (1 << PM_TOKEN_KEYWORD_END),
[PM_CONTEXT_BEGIN_ELSE] = (1 << PM_TOKEN_KEYWORD_ENSURE) | (1 << PM_TOKEN_KEYWORD_END),
[PM_CONTEXT_BEGIN_RESCUE] = (1 << PM_TOKEN_KEYWORD_ENSURE) | (1 << PM_TOKEN_KEYWORD_RESCUE) | (1 << PM_TOKEN_KEYWORD_ELSE) | (1 << PM_TOKEN_KEYWORD_END),
[PM_CONTEXT_BLOCK_BRACES] = (1 << PM_TOKEN_BRACE_RIGHT),
[PM_CONTEXT_BLOCK_KEYWORDS] = (1 << PM_TOKEN_KEYWORD_END) | (1 << PM_TOKEN_KEYWORD_RESCUE) | (1 << PM_TOKEN_KEYWORD_ENSURE),
[PM_CONTEXT_BLOCK_ENSURE] = (1 << PM_TOKEN_KEYWORD_END),
[PM_CONTEXT_BLOCK_ELSE] = (1 << PM_TOKEN_KEYWORD_ENSURE) | (1 << PM_TOKEN_KEYWORD_END),
[PM_CONTEXT_BLOCK_RESCUE] = (1 << PM_TOKEN_KEYWORD_ENSURE) | (1 << PM_TOKEN_KEYWORD_RESCUE) | (1 << PM_TOKEN_KEYWORD_ELSE) | (1 << PM_TOKEN_KEYWORD_END),
[PM_CONTEXT_CASE_WHEN] = (1 << PM_TOKEN_KEYWORD_WHEN) | (1 << PM_TOKEN_KEYWORD_END) | (1 << PM_TOKEN_KEYWORD_ELSE),
[PM_CONTEXT_CASE_IN] = (1 << PM_TOKEN_KEYWORD_IN) | (1 << PM_TOKEN_KEYWORD_END) | (1 << PM_TOKEN_KEYWORD_ELSE),
[PM_CONTEXT_CLASS] = (1 << PM_TOKEN_KEYWORD_END) | (1 << PM_TOKEN_KEYWORD_RESCUE) | (1 << PM_TOKEN_KEYWORD_ENSURE),
[PM_CONTEXT_CLASS_ENSURE] = (1 << PM_TOKEN_KEYWORD_END),
[PM_CONTEXT_CLASS_ELSE] = (1 << PM_TOKEN_KEYWORD_ENSURE) | (1 << PM_TOKEN_KEYWORD_END),
[PM_CONTEXT_CLASS_RESCUE] = (1 << PM_TOKEN_KEYWORD_ENSURE) | (1 << PM_TOKEN_KEYWORD_RESCUE) | (1 << PM_TOKEN_KEYWORD_ELSE) | (1 << PM_TOKEN_KEYWORD_END),
[PM_CONTEXT_DEF] = (1 << PM_TOKEN_KEYWORD_END) | (1 << PM_TOKEN_KEYWORD_RESCUE) | (1 << PM_TOKEN_KEYWORD_ENSURE),
[PM_CONTEXT_DEF_ENSURE] = (1 << PM_TOKEN_KEYWORD_END),
[PM_CONTEXT_DEF_ELSE] = (1 << PM_TOKEN_KEYWORD_ENSURE) | (1 << PM_TOKEN_KEYWORD_END),
[PM_CONTEXT_DEF_RESCUE] = (1 << PM_TOKEN_KEYWORD_ENSURE) | (1 << PM_TOKEN_KEYWORD_RESCUE) | (1 << PM_TOKEN_KEYWORD_ELSE) | (1 << PM_TOKEN_KEYWORD_END),
[PM_CONTEXT_DEF_PARAMS] = (1 << PM_TOKEN_EOF),
[PM_CONTEXT_DEFINED] = (1 << PM_TOKEN_EOF),
[PM_CONTEXT_DEFAULT_PARAMS] = (1 << PM_TOKEN_COMMA) | (1 << PM_TOKEN_PARENTHESIS_RIGHT),
[PM_CONTEXT_ELSE] = (1 << PM_TOKEN_KEYWORD_END),
[PM_CONTEXT_ELSIF] = (1 << PM_TOKEN_KEYWORD_ELSE) | (1 << PM_TOKEN_KEYWORD_ELSIF) | (1 << PM_TOKEN_KEYWORD_END),
[PM_CONTEXT_EMBEXPR] = (1 << PM_TOKEN_EMBEXPR_END),
[PM_CONTEXT_FOR] = (1 << PM_TOKEN_KEYWORD_END),
[PM_CONTEXT_FOR_INDEX] = (1 << PM_TOKEN_KEYWORD_IN),
[PM_CONTEXT_IF] = (1 << PM_TOKEN_KEYWORD_ELSE) | (1 << PM_TOKEN_KEYWORD_ELSIF) | (1 << PM_TOKEN_KEYWORD_END),
[PM_CONTEXT_LAMBDA_BRACES] = (1 << PM_TOKEN_BRACE_RIGHT),
[PM_CONTEXT_LAMBDA_DO_END] = (1 << PM_TOKEN_KEYWORD_END) | (1 << PM_TOKEN_KEYWORD_RESCUE) | (1 << PM_TOKEN_KEYWORD_ENSURE),
[PM_CONTEXT_LAMBDA_ENSURE] = (1 << PM_TOKEN_KEYWORD_END),
[PM_CONTEXT_LAMBDA_ELSE] = (1 << PM_TOKEN_KEYWORD_ENSURE) | (1 << PM_TOKEN_KEYWORD_END),
[PM_CONTEXT_LAMBDA_RESCUE] = (1 << PM_TOKEN_KEYWORD_ENSURE) | (1 << PM_TOKEN_KEYWORD_RESCUE) | (1 << PM_TOKEN_KEYWORD_ELSE) | (1 << PM_TOKEN_KEYWORD_END),
[PM_CONTEXT_LOOP_PREDICATE] = (1 << PM_TOKEN_KEYWORD_DO) | (1 << PM_TOKEN_KEYWORD_THEN),
[PM_CONTEXT_MAIN] = (1 << PM_TOKEN_EOF),
[PM_CONTEXT_MODULE] = (1 << PM_TOKEN_KEYWORD_END) | (1 << PM_TOKEN_KEYWORD_RESCUE) | (1 << PM_TOKEN_KEYWORD_ENSURE),
[PM_CONTEXT_MODULE_ENSURE] = (1 << PM_TOKEN_KEYWORD_END),
[PM_CONTEXT_MODULE_ELSE] = (1 << PM_TOKEN_KEYWORD_ENSURE) | (1 << PM_TOKEN_KEYWORD_END),
[PM_CONTEXT_MODULE_RESCUE] = (1 << PM_TOKEN_KEYWORD_ENSURE) | (1 << PM_TOKEN_KEYWORD_RESCUE) | (1 << PM_TOKEN_KEYWORD_ELSE) | (1 << PM_TOKEN_KEYWORD_END),
[PM_CONTEXT_MULTI_TARGET] = (1 << PM_TOKEN_EOF),
[PM_CONTEXT_PARENS] = (1 << PM_TOKEN_PARENTHESIS_RIGHT),
[PM_CONTEXT_POSTEXE] = (1 << PM_TOKEN_BRACE_RIGHT),
[PM_CONTEXT_PREDICATE] = (1 << PM_TOKEN_KEYWORD_THEN) | (1 << PM_TOKEN_NEWLINE) | (1 << PM_TOKEN_SEMICOLON),
[PM_CONTEXT_PREEXE] = (1 << PM_TOKEN_BRACE_RIGHT),
[PM_CONTEXT_RESCUE_MODIFIER] = (1 << PM_TOKEN_EOF),
[PM_CONTEXT_SCLASS] = (1 << PM_TOKEN_KEYWORD_END) | (1 << PM_TOKEN_KEYWORD_RESCUE) | (1 << PM_TOKEN_KEYWORD_ENSURE),
[PM_CONTEXT_SCLASS_ENSURE] = (1 << PM_TOKEN_KEYWORD_END),
[PM_CONTEXT_SCLASS_ELSE] = (1 << PM_TOKEN_KEYWORD_ENSURE) | (1 << PM_TOKEN_KEYWORD_END),
[PM_CONTEXT_SCLASS_RESCUE] = (1 << PM_TOKEN_KEYWORD_ENSURE) | (1 << PM_TOKEN_KEYWORD_RESCUE) | (1 << PM_TOKEN_KEYWORD_ELSE) | (1 << PM_TOKEN_KEYWORD_END),
[PM_CONTEXT_TERNARY] = (1 << PM_TOKEN_EOF),
[PM_CONTEXT_UNLESS] = (1 << PM_TOKEN_KEYWORD_ELSE) | (1 << PM_TOKEN_KEYWORD_END),
[PM_CONTEXT_UNTIL] = (1 << PM_TOKEN_KEYWORD_END),
[PM_CONTEXT_WHILE] = (1 << PM_TOKEN_KEYWORD_END),
};
return false;
static inline bool
context_terminator(pm_context_t context, pm_token_t *token) {
return token->type < 32 && (context_terminators[context] & (1 << token->type));
}
/**