ruby/yarp/yarp.c
Aaron Patterson abce8583e2 [ruby/yarp] Fix heredocs inside %W and %w lists
The problem was that we were treating heredoc bodies as part of the %W
list because we didn't push the scanning cursor past the heredoc after
lexing out the here doc.  To fix this, we changed the whitespace
scanning function to quit scanning when it reaches a newline but only in
the case that a heredoc is present.

Additionally, we need to prevent double counting newlines in the case of
a heredoc.  For example:

```ruby
%W(<<foo 123)
foo
```

The newline after the `)` is counted as part of scanning the heredoc, so
we added logic to prevent double counting the newline when scanning the
rest of the %W list.

eb090d8126

Co-authored-by: Jemma Issroff <jemmaissroff@gmail.com>
2023-07-20 14:58:11 +00:00

12926 lines
550 KiB
C

#include "yarp.h"
#include "yarp/version.h"
// The YARP version and the serialization format.
const char *
yp_version(void) {
return YP_VERSION;
}
// In heredocs, tabs automatically complete up to the next 8 spaces. This is
// defined in CRuby as TAB_WIDTH.
#define YP_TAB_WHITESPACE_SIZE 8
// Debugging logging will provide you will additional debugging functions as
// well as automatically replace some functions with their debugging
// counterparts.
#ifndef YP_DEBUG_LOGGING
#define YP_DEBUG_LOGGING 0
#endif
#if YP_DEBUG_LOGGING
/******************************************************************************/
/* Debugging */
/******************************************************************************/
YP_ATTRIBUTE_UNUSED static const char *
debug_context(yp_context_t context) {
switch (context) {
case YP_CONTEXT_BEGIN: return "BEGIN";
case YP_CONTEXT_CLASS: return "CLASS";
case YP_CONTEXT_CASE_IN: return "CASE_IN";
case YP_CONTEXT_CASE_WHEN: return "CASE_WHEN";
case YP_CONTEXT_DEF: return "DEF";
case YP_CONTEXT_DEF_PARAMS: return "DEF_PARAMS";
case YP_CONTEXT_DEFAULT_PARAMS: return "DEFAULT_PARAMS";
case YP_CONTEXT_ENSURE: return "ENSURE";
case YP_CONTEXT_ELSE: return "ELSE";
case YP_CONTEXT_ELSIF: return "ELSIF";
case YP_CONTEXT_EMBEXPR: return "EMBEXPR";
case YP_CONTEXT_BLOCK_BRACES: return "BLOCK_BRACES";
case YP_CONTEXT_BLOCK_KEYWORDS: return "BLOCK_KEYWORDS";
case YP_CONTEXT_FOR: return "FOR";
case YP_CONTEXT_IF: return "IF";
case YP_CONTEXT_MAIN: return "MAIN";
case YP_CONTEXT_MODULE: return "MODULE";
case YP_CONTEXT_PARENS: return "PARENS";
case YP_CONTEXT_POSTEXE: return "POSTEXE";
case YP_CONTEXT_PREDICATE: return "PREDICATE";
case YP_CONTEXT_PREEXE: return "PREEXE";
case YP_CONTEXT_RESCUE: return "RESCUE";
case YP_CONTEXT_RESCUE_ELSE: return "RESCUE_ELSE";
case YP_CONTEXT_SCLASS: return "SCLASS";
case YP_CONTEXT_UNLESS: return "UNLESS";
case YP_CONTEXT_UNTIL: return "UNTIL";
case YP_CONTEXT_WHILE: return "WHILE";
case YP_CONTEXT_LAMBDA_BRACES: return "LAMBDA_BRACES";
case YP_CONTEXT_LAMBDA_DO_END: return "LAMBDA_DO_END";
}
return NULL;
}
YP_ATTRIBUTE_UNUSED static void
debug_contexts(yp_parser_t *parser) {
yp_context_node_t *context_node = parser->current_context;
fprintf(stderr, "CONTEXTS: ");
if (context_node != NULL) {
while (context_node != NULL) {
fprintf(stderr, "%s", debug_context(context_node->context));
context_node = context_node->prev;
if (context_node != NULL) {
fprintf(stderr, " <- ");
}
}
} else {
fprintf(stderr, "NONE");
}
fprintf(stderr, "\n");
}
YP_ATTRIBUTE_UNUSED static void
debug_node(const char *message, yp_parser_t *parser, yp_node_t *node) {
yp_buffer_t buffer;
if (!yp_buffer_init(&buffer)) return;
yp_prettyprint(parser, node, &buffer);
fprintf(stderr, "%s\n%.*s\n", message, (int) buffer.length, buffer.value);
yp_buffer_free(&buffer);
}
YP_ATTRIBUTE_UNUSED static void
debug_lex_mode(yp_parser_t *parser) {
yp_lex_mode_t *lex_mode = parser->lex_modes.current;
bool first = true;
while (lex_mode != NULL) {
if (first) {
first = false;
} else {
fprintf(stderr, " <- ");
}
switch (lex_mode->mode) {
case YP_LEX_DEFAULT: fprintf(stderr, "DEFAULT"); break;
case YP_LEX_EMBEXPR: fprintf(stderr, "EMBEXPR"); break;
case YP_LEX_EMBVAR: fprintf(stderr, "EMBVAR"); break;
case YP_LEX_HEREDOC: fprintf(stderr, "HEREDOC"); break;
case YP_LEX_LIST: fprintf(stderr, "LIST (terminator=%c, interpolation=%d)", lex_mode->as.list.terminator, lex_mode->as.list.interpolation); break;
case YP_LEX_REGEXP: fprintf(stderr, "REGEXP (terminator=%c)", lex_mode->as.regexp.terminator); break;
case YP_LEX_STRING: fprintf(stderr, "STRING (terminator=%c, interpolation=%d)", lex_mode->as.string.terminator, lex_mode->as.string.interpolation); break;
case YP_LEX_NUMERIC: fprintf(stderr, "NUMERIC (token_type=%s)", yp_token_type_to_str(lex_mode->as.numeric.type)); break;
}
lex_mode = lex_mode->prev;
}
fprintf(stderr, "\n");
}
YP_ATTRIBUTE_UNUSED static void
debug_state(yp_parser_t *parser) {
fprintf(stderr, "STATE: ");
bool first = true;
if (parser->lex_state == YP_LEX_STATE_NONE) {
fprintf(stderr, "NONE\n");
return;
}
#define CHECK_STATE(state) \
if (parser->lex_state & state) { \
if (!first) fprintf(stderr, "|"); \
fprintf(stderr, "%s", #state); \
first = false; \
}
CHECK_STATE(YP_LEX_STATE_BEG)
CHECK_STATE(YP_LEX_STATE_END)
CHECK_STATE(YP_LEX_STATE_ENDARG)
CHECK_STATE(YP_LEX_STATE_ENDFN)
CHECK_STATE(YP_LEX_STATE_ARG)
CHECK_STATE(YP_LEX_STATE_CMDARG)
CHECK_STATE(YP_LEX_STATE_MID)
CHECK_STATE(YP_LEX_STATE_FNAME)
CHECK_STATE(YP_LEX_STATE_DOT)
CHECK_STATE(YP_LEX_STATE_CLASS)
CHECK_STATE(YP_LEX_STATE_LABEL)
CHECK_STATE(YP_LEX_STATE_LABELED)
CHECK_STATE(YP_LEX_STATE_FITEM)
#undef CHECK_STATE
fprintf(stderr, "\n");
}
YP_ATTRIBUTE_UNUSED static void
debug_token(yp_token_t * token) {
fprintf(stderr, "%s: \"%.*s\"\n", yp_token_type_to_str(token->type), (int) (token->end - token->start), token->start);
}
#endif
/******************************************************************************/
/* Lex mode manipulations */
/******************************************************************************/
// Returns the incrementor character that should be used to increment the
// nesting count if one is possible.
static inline char
lex_mode_incrementor(const char start) {
switch (start) {
case '(':
case '[':
case '{':
case '<':
return start;
default:
return '\0';
}
}
// Returns the matching character that should be used to terminate a list
// beginning with the given character.
static inline char
lex_mode_terminator(const char start) {
switch (start) {
case '(':
return ')';
case '[':
return ']';
case '{':
return '}';
case '<':
return '>';
default:
return start;
}
}
// Push a new lex state onto the stack. If we're still within the pre-allocated
// space of the lex state stack, then we'll just use a new slot. Otherwise we'll
// allocate a new pointer and use that.
static bool
lex_mode_push(yp_parser_t *parser, yp_lex_mode_t lex_mode) {
lex_mode.prev = parser->lex_modes.current;
parser->lex_modes.index++;
if (parser->lex_modes.index > YP_LEX_STACK_SIZE - 1) {
parser->lex_modes.current = (yp_lex_mode_t *) malloc(sizeof(yp_lex_mode_t));
if (parser->lex_modes.current == NULL) return false;
*parser->lex_modes.current = lex_mode;
} else {
parser->lex_modes.stack[parser->lex_modes.index] = lex_mode;
parser->lex_modes.current = &parser->lex_modes.stack[parser->lex_modes.index];
}
return true;
}
// Push on a new list lex mode.
static inline bool
lex_mode_push_list(yp_parser_t *parser, bool interpolation, char delimiter) {
char incrementor = lex_mode_incrementor(delimiter);
char terminator = lex_mode_terminator(delimiter);
yp_lex_mode_t lex_mode = {
.mode = YP_LEX_LIST,
.as.list = {
.nesting = 0,
.interpolation = interpolation,
.incrementor = incrementor,
.terminator = terminator
}
};
// These are the places where we need to split up the content of the list.
// We'll use strpbrk to find the first of these characters.
char *breakpoints = lex_mode.as.list.breakpoints;
memcpy(breakpoints, "\\ \t\f\r\v\n\0\0\0", sizeof(lex_mode.as.list.breakpoints));
// Now we'll add the terminator to the list of breakpoints.
size_t index = 7;
breakpoints[index++] = terminator;
// If interpolation is allowed, then we're going to check for the #
// character. Otherwise we'll only look for escapes and the terminator.
if (interpolation) {
breakpoints[index++] = '#';
}
// If there is an incrementor, then we'll check for that as well.
if (incrementor != '\0') {
breakpoints[index++] = incrementor;
}
return lex_mode_push(parser, lex_mode);
}
// Push on a new regexp lex mode.
static inline bool
lex_mode_push_regexp(yp_parser_t *parser, char incrementor, char terminator) {
yp_lex_mode_t lex_mode = {
.mode = YP_LEX_REGEXP,
.as.regexp = {
.nesting = 0,
.incrementor = incrementor,
.terminator = terminator
}
};
// These are the places where we need to split up the content of the
// regular expression. We'll use strpbrk to find the first of these
// characters.
char *breakpoints = lex_mode.as.regexp.breakpoints;
memcpy(breakpoints, "\n\\#\0\0", sizeof(lex_mode.as.regexp.breakpoints));
// First we'll add the terminator.
breakpoints[3] = terminator;
// Next, if there is an incrementor, then we'll check for that as well.
if (incrementor != '\0') {
breakpoints[4] = incrementor;
}
return lex_mode_push(parser, lex_mode);
}
// Push on a new string lex mode.
static inline bool
lex_mode_push_string(yp_parser_t *parser, bool interpolation, bool label_allowed, char incrementor, char terminator) {
yp_lex_mode_t lex_mode = {
.mode = YP_LEX_STRING,
.as.string = {
.nesting = 0,
.interpolation = interpolation,
.label_allowed = label_allowed,
.incrementor = incrementor,
.terminator = terminator
}
};
// These are the places where we need to split up the content of the
// string. We'll use strpbrk to find the first of these characters.
char *breakpoints = lex_mode.as.string.breakpoints;
memcpy(breakpoints, "\n\\\0\0\0", sizeof(lex_mode.as.string.breakpoints));
// Now add in the terminator.
size_t index = 2;
breakpoints[index++] = terminator;
// If interpolation is allowed, then we're going to check for the #
// character. Otherwise we'll only look for escapes and the terminator.
if (interpolation) {
breakpoints[index++] = '#';
}
// If we have an incrementor, then we'll add that in as a breakpoint as
// well.
if (incrementor != '\0') {
breakpoints[index++] = incrementor;
}
return lex_mode_push(parser, lex_mode);
}
// Pop the current lex state off the stack. If we're within the pre-allocated
// space of the lex state stack, then we'll just decrement the index. Otherwise
// we'll free the current pointer and use the previous pointer.
static void
lex_mode_pop(yp_parser_t *parser) {
if (parser->lex_modes.index == 0) {
parser->lex_modes.current->mode = YP_LEX_DEFAULT;
} else if (parser->lex_modes.index < YP_LEX_STACK_SIZE) {
parser->lex_modes.index--;
parser->lex_modes.current = &parser->lex_modes.stack[parser->lex_modes.index];
} else {
parser->lex_modes.index--;
yp_lex_mode_t *prev = parser->lex_modes.current->prev;
free(parser->lex_modes.current);
parser->lex_modes.current = prev;
}
}
// This is the equivalent of IS_lex_state is CRuby.
static inline bool
lex_state_p(yp_parser_t *parser, yp_lex_state_t state) {
return parser->lex_state & state;
}
typedef enum {
YP_IGNORED_NEWLINE_NONE = 0,
YP_IGNORED_NEWLINE_ALL,
YP_IGNORED_NEWLINE_PATTERN
} yp_ignored_newline_type_t;
static inline yp_ignored_newline_type_t
lex_state_ignored_p(yp_parser_t *parser) {
bool ignored = lex_state_p(parser, YP_LEX_STATE_BEG | YP_LEX_STATE_CLASS | YP_LEX_STATE_FNAME | YP_LEX_STATE_DOT) && !lex_state_p(parser, YP_LEX_STATE_LABELED);
if (ignored) {
return YP_IGNORED_NEWLINE_ALL;
} else if (parser->lex_state == (YP_LEX_STATE_ARG | YP_LEX_STATE_LABELED)) {
return YP_IGNORED_NEWLINE_PATTERN;
} else {
return YP_IGNORED_NEWLINE_NONE;
}
}
static inline bool
lex_state_beg_p(yp_parser_t *parser) {
return lex_state_p(parser, YP_LEX_STATE_BEG_ANY) || (parser->lex_state == (YP_LEX_STATE_ARG | YP_LEX_STATE_LABELED));
}
static inline bool
lex_state_arg_p(yp_parser_t *parser) {
return lex_state_p(parser, YP_LEX_STATE_ARG_ANY);
}
static inline bool
lex_state_spcarg_p(yp_parser_t *parser, bool space_seen) {
return lex_state_arg_p(parser) && space_seen && !yp_char_is_whitespace(*parser->current.end);
}
static inline bool
lex_state_end_p(yp_parser_t *parser) {
return lex_state_p(parser, YP_LEX_STATE_END_ANY);
}
// This is the equivalent of IS_AFTER_OPERATOR in CRuby.
static inline bool
lex_state_operator_p(yp_parser_t *parser) {
return lex_state_p(parser, YP_LEX_STATE_FNAME | YP_LEX_STATE_DOT);
}
// Set the state of the lexer. This is defined as a function to be able to put a breakpoint in it.
static inline void
lex_state_set(yp_parser_t *parser, yp_lex_state_t state) {
parser->lex_state = state;
}
#if YP_DEBUG_LOGGING
static inline void
debug_lex_state_set(yp_parser_t *parser, yp_lex_state_t state, char const * caller_name, int line_number) {
fprintf(stderr, "Caller: %s:%d\nPrevious: ", caller_name, line_number);
debug_state(parser);
lex_state_set(parser, state);
fprintf(stderr, "Now: ");
debug_state(parser);
fprintf(stderr, "\n");
}
#define lex_state_set(parser, state) debug_lex_state_set(parser, state, __func__, __LINE__)
#endif
/******************************************************************************/
/* Node-related functions */
/******************************************************************************/
// Retrieve the constant pool id for the given location.
static inline yp_constant_id_t
yp_parser_constant_id_location(yp_parser_t *parser, const char *start, const char *end) {
return yp_constant_pool_insert(&parser->constant_pool, start, (size_t) (end - start));
}
// Retrieve the constant pool id for the given token.
static inline yp_constant_id_t
yp_parser_constant_id_token(yp_parser_t *parser, const yp_token_t *token) {
return yp_parser_constant_id_location(parser, token->start, token->end);
}
// In a lot of places in the tree you can have tokens that are not provided but
// that do not cause an error. For example, in a method call without
// parentheses. In these cases we set the token to the "not provided" type. For
// example:
//
// yp_token_t token;
// not_provided(&token, parser->previous.end);
//
static inline yp_token_t
not_provided(yp_parser_t *parser) {
return (yp_token_t) { .type = YP_TOKEN_NOT_PROVIDED, .start = parser->start, .end = parser->start };
}
#define YP_EMPTY_STRING ((yp_string_t) { .type = YP_STRING_SHARED, .as.shared.start = NULL, .as.shared.end = NULL })
#define YP_LOCATION_NULL_VALUE(parser) ((yp_location_t) { .start = parser->start, .end = parser->start })
#define YP_LOCATION_TOKEN_VALUE(token) ((yp_location_t) { .start = (token)->start, .end = (token)->end })
#define YP_LOCATION_NODE_VALUE(node) ((yp_location_t) { .start = (node)->location.start, .end = (node)->location.end })
#define YP_LOCATION_NODE_BASE_VALUE(node) ((yp_location_t) { .start = (node)->base.location.start, .end = (node)->base.location.end })
#define YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE ((yp_location_t) { .start = NULL, .end = NULL })
#define YP_OPTIONAL_LOCATION_TOKEN_VALUE(token) ((token)->type == YP_TOKEN_NOT_PROVIDED ? YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE : YP_LOCATION_TOKEN_VALUE(token))
#define YP_TOKEN_NOT_PROVIDED_VALUE(parser) ((yp_token_t) { .type = YP_TOKEN_NOT_PROVIDED, .start = (parser)->start, .end = (parser)->start })
// This is a special out parameter to the parse_arguments_list function that
// includes opening and closing parentheses in addition to the arguments since
// it's so common. It is handy to use when passing argument information to one
// of the call node creation functions.
typedef struct {
yp_location_t opening_loc;
yp_arguments_node_t *arguments;
yp_location_t closing_loc;
yp_block_node_t *block;
} yp_arguments_t;
#define YP_EMPTY_ARGUMENTS ((yp_arguments_t) { .opening_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE, .arguments = NULL, .closing_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE, .block = NULL })
/******************************************************************************/
/* Node creation functions */
/******************************************************************************/
// Parse out the options for a regular expression.
static inline uint32_t
yp_regular_expression_flags_create(const yp_token_t *closing) {
uint32_t flags = 0;
if (closing->type == YP_TOKEN_REGEXP_END) {
for (const char *flag = closing->start + 1; flag < closing->end; flag++) {
switch (*flag) {
case 'i': flags |= YP_REGULAR_EXPRESSION_FLAGS_IGNORE_CASE; break;
case 'm': flags |= YP_REGULAR_EXPRESSION_FLAGS_MULTI_LINE; break;
case 'x': flags |= YP_REGULAR_EXPRESSION_FLAGS_EXTENDED; break;
case 'e': flags |= YP_REGULAR_EXPRESSION_FLAGS_EUC_JP; break;
case 'n': flags |= YP_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT; break;
case 's': flags |= YP_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J; break;
case 'u': flags |= YP_REGULAR_EXPRESSION_FLAGS_UTF_8; break;
case 'o': flags |= YP_REGULAR_EXPRESSION_FLAGS_ONCE; break;
default: assert(false && "unreachable");
}
}
}
return flags;
}
// Allocate and initialize a new StatementsNode node.
static yp_statements_node_t *
yp_statements_node_create(yp_parser_t *parser);
// Append a new node to the given StatementsNode node's body.
static void
yp_statements_node_body_append(yp_statements_node_t *node, yp_node_t *statement);
// This function is here to allow us a place to extend in the future when we
// implement our own arena allocation.
static inline void *
yp_alloc_node(YP_ATTRIBUTE_UNUSED yp_parser_t *parser, size_t size) {
return malloc(size);
}
#define YP_ALLOC_NODE(parser, type) (type *) yp_alloc_node(parser, sizeof(type)); if (node == NULL) return NULL
// Allocate a new MissingNode node.
static yp_missing_node_t *
yp_missing_node_create(yp_parser_t *parser, const char *start, const char *end) {
yp_missing_node_t *node = YP_ALLOC_NODE(parser, yp_missing_node_t);
*node = (yp_missing_node_t) {{ .type = YP_NODE_MISSING_NODE, .location = { .start = start, .end = end } }};
return node;
}
// Allocate and initialize a new alias node.
static yp_alias_node_t *
yp_alias_node_create(yp_parser_t *parser, const yp_token_t *keyword, yp_node_t *new_name, yp_node_t *old_name) {
assert(keyword->type == YP_TOKEN_KEYWORD_ALIAS);
yp_alias_node_t *node = YP_ALLOC_NODE(parser, yp_alias_node_t);
*node = (yp_alias_node_t) {
{
.type = YP_NODE_ALIAS_NODE,
.location = {
.start = keyword->start,
.end = old_name->location.end
},
},
.new_name = new_name,
.old_name = old_name,
.keyword_loc = YP_LOCATION_TOKEN_VALUE(keyword)
};
return node;
}
// Allocate a new AlternationPatternNode node.
static yp_alternation_pattern_node_t *
yp_alternation_pattern_node_create(yp_parser_t *parser, yp_node_t *left, yp_node_t *right, const yp_token_t *operator) {
yp_alternation_pattern_node_t *node = YP_ALLOC_NODE(parser, yp_alternation_pattern_node_t);
*node = (yp_alternation_pattern_node_t) {
{
.type = YP_NODE_ALTERNATION_PATTERN_NODE,
.location = {
.start = left->location.start,
.end = right->location.end
},
},
.left = left,
.right = right,
.operator_loc = YP_LOCATION_TOKEN_VALUE(operator)
};
return node;
}
// Allocate and initialize a new and node.
static yp_and_node_t *
yp_and_node_create(yp_parser_t *parser, yp_node_t *left, const yp_token_t *operator, yp_node_t *right) {
yp_and_node_t *node = YP_ALLOC_NODE(parser, yp_and_node_t);
*node = (yp_and_node_t) {
{
.type = YP_NODE_AND_NODE,
.location = {
.start = left->location.start,
.end = right->location.end
},
},
.left = left,
.operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
.right = right
};
return node;
}
// Allocate an initialize a new arguments node.
static yp_arguments_node_t *
yp_arguments_node_create(yp_parser_t *parser) {
yp_arguments_node_t *node = YP_ALLOC_NODE(parser, yp_arguments_node_t);
*node = (yp_arguments_node_t) {
{
.type = YP_NODE_ARGUMENTS_NODE,
.location = YP_LOCATION_NULL_VALUE(parser)
},
.arguments = YP_EMPTY_NODE_LIST
};
return node;
}
// Return the size of the given arguments node.
static size_t
yp_arguments_node_size(yp_arguments_node_t *node) {
return node->arguments.size;
}
// Append an argument to an arguments node.
static void
yp_arguments_node_arguments_append(yp_arguments_node_t *node, yp_node_t *argument) {
if (yp_arguments_node_size(node) == 0) {
node->base.location.start = argument->location.start;
}
node->base.location.end = argument->location.end;
yp_node_list_append(&node->arguments, argument);
}
// Allocate and initialize a new ArrayNode node.
static yp_array_node_t *
yp_array_node_create(yp_parser_t *parser, const yp_token_t *opening) {
yp_array_node_t *node = YP_ALLOC_NODE(parser, yp_array_node_t);
*node = (yp_array_node_t) {
{
.type = YP_NODE_ARRAY_NODE,
.location = {
.start = opening->start,
.end = opening->end
},
},
.opening_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
.closing_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
.elements = YP_EMPTY_NODE_LIST
};
return node;
}
// Return the size of the given array node.
static inline size_t
yp_array_node_size(yp_array_node_t *node) {
return node->elements.size;
}
// Append an argument to an array node.
static inline void
yp_array_node_elements_append(yp_array_node_t *node, yp_node_t *element) {
if (!node->elements.size && !node->opening_loc.start) {
node->base.location.start = element->location.start;
}
yp_node_list_append(&node->elements, element);
node->base.location.end = element->location.end;
}
// Set the closing token and end location of an array node.
static void
yp_array_node_close_set(yp_array_node_t *node, const yp_token_t *closing) {
assert(closing->type == YP_TOKEN_BRACKET_RIGHT || closing->type == YP_TOKEN_STRING_END || closing->type == YP_TOKEN_MISSING || closing->type == YP_TOKEN_NOT_PROVIDED);
node->base.location.end = closing->end;
node->closing_loc = YP_LOCATION_TOKEN_VALUE(closing);
}
// Allocate and initialize a new array pattern node. The node list given in the
// nodes parameter is guaranteed to have at least two nodes.
static yp_array_pattern_node_t *
yp_array_pattern_node_node_list_create(yp_parser_t *parser, yp_node_list_t *nodes) {
yp_array_pattern_node_t *node = YP_ALLOC_NODE(parser, yp_array_pattern_node_t);
*node = (yp_array_pattern_node_t) {
{
.type = YP_NODE_ARRAY_PATTERN_NODE,
.location = {
.start = nodes->nodes[0]->location.start,
.end = nodes->nodes[nodes->size - 1]->location.end
},
},
.constant = NULL,
.rest = NULL,
.requireds = YP_EMPTY_NODE_LIST,
.posts = YP_EMPTY_NODE_LIST,
.opening_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
.closing_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
};
// For now we're going to just copy over each pointer manually. This could be
// much more efficient, as we could instead resize the node list.
bool found_rest = false;
for (size_t index = 0; index < nodes->size; index++) {
yp_node_t *child = nodes->nodes[index];
if (!found_rest && child->type == YP_NODE_SPLAT_NODE) {
node->rest = child;
found_rest = true;
} else if (found_rest) {
yp_node_list_append(&node->posts, child);
} else {
yp_node_list_append(&node->requireds, child);
}
}
return node;
}
// Allocate and initialize a new array pattern node from a single rest node.
static yp_array_pattern_node_t *
yp_array_pattern_node_rest_create(yp_parser_t *parser, yp_node_t *rest) {
yp_array_pattern_node_t *node = YP_ALLOC_NODE(parser, yp_array_pattern_node_t);
*node = (yp_array_pattern_node_t) {
{
.type = YP_NODE_ARRAY_PATTERN_NODE,
.location = rest->location,
},
.constant = NULL,
.rest = rest,
.requireds = YP_EMPTY_NODE_LIST,
.posts = YP_EMPTY_NODE_LIST,
.opening_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
.closing_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
};
return node;
}
// Allocate and initialize a new array pattern node from a constant and opening
// and closing tokens.
static yp_array_pattern_node_t *
yp_array_pattern_node_constant_create(yp_parser_t *parser, yp_node_t *constant, const yp_token_t *opening, const yp_token_t *closing) {
yp_array_pattern_node_t *node = YP_ALLOC_NODE(parser, yp_array_pattern_node_t);
*node = (yp_array_pattern_node_t) {
{
.type = YP_NODE_ARRAY_PATTERN_NODE,
.location = {
.start = constant->location.start,
.end = closing->end
},
},
.constant = constant,
.rest = NULL,
.opening_loc = YP_LOCATION_TOKEN_VALUE(opening),
.closing_loc = YP_LOCATION_TOKEN_VALUE(closing),
.requireds = YP_EMPTY_NODE_LIST,
.posts = YP_EMPTY_NODE_LIST
};
return node;
}
// Allocate and initialize a new array pattern node from an opening and closing
// token.
static yp_array_pattern_node_t *
yp_array_pattern_node_empty_create(yp_parser_t *parser, const yp_token_t *opening, const yp_token_t *closing) {
yp_array_pattern_node_t *node = YP_ALLOC_NODE(parser, yp_array_pattern_node_t);
*node = (yp_array_pattern_node_t) {
{
.type = YP_NODE_ARRAY_PATTERN_NODE,
.location = {
.start = opening->start,
.end = closing->end
},
},
.constant = NULL,
.rest = NULL,
.opening_loc = YP_LOCATION_TOKEN_VALUE(opening),
.closing_loc = YP_LOCATION_TOKEN_VALUE(closing),
.requireds = YP_EMPTY_NODE_LIST,
.posts = YP_EMPTY_NODE_LIST
};
return node;
}
static inline void
yp_array_pattern_node_requireds_append(yp_array_pattern_node_t *node, yp_node_t *inner) {
yp_node_list_append(&node->requireds, inner);
}
// Allocate and initialize a new assoc node.
static yp_assoc_node_t *
yp_assoc_node_create(yp_parser_t *parser, yp_node_t *key, const yp_token_t *operator, yp_node_t *value) {
yp_assoc_node_t *node = YP_ALLOC_NODE(parser, yp_assoc_node_t);
const char *end;
if (value != NULL) {
end = value->location.end;
} else if (operator->type != YP_TOKEN_NOT_PROVIDED) {
end = operator->end;
} else {
end = key->location.end;
}
*node = (yp_assoc_node_t) {
{
.type = YP_NODE_ASSOC_NODE,
.location = {
.start = key->location.start,
.end = end
},
},
.key = key,
.operator_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
.value = value
};
return node;
}
// Allocate and initialize a new assoc splat node.
static yp_assoc_splat_node_t *
yp_assoc_splat_node_create(yp_parser_t *parser, yp_node_t *value, const yp_token_t *operator) {
assert(operator->type == YP_TOKEN_USTAR_STAR);
yp_assoc_splat_node_t *node = YP_ALLOC_NODE(parser, yp_assoc_splat_node_t);
*node = (yp_assoc_splat_node_t) {
{
.type = YP_NODE_ASSOC_SPLAT_NODE,
.location = {
.start = operator->start,
.end = value == NULL ? operator->end : value->location.end
},
},
.value = value,
.operator_loc = YP_LOCATION_TOKEN_VALUE(operator)
};
return node;
}
// Allocate a new BackReferenceReadNode node.
static yp_back_reference_read_node_t *
yp_back_reference_read_node_create(yp_parser_t *parser, const yp_token_t *name) {
assert(name->type == YP_TOKEN_BACK_REFERENCE);
yp_back_reference_read_node_t *node = YP_ALLOC_NODE(parser, yp_back_reference_read_node_t);
*node = (yp_back_reference_read_node_t) {
{
.type = YP_NODE_BACK_REFERENCE_READ_NODE,
.location = YP_LOCATION_TOKEN_VALUE(name),
}
};
return node;
}
// Allocate and initialize new a begin node.
static yp_begin_node_t *
yp_begin_node_create(yp_parser_t *parser, const yp_token_t *begin_keyword, yp_statements_node_t *statements) {
yp_begin_node_t *node = YP_ALLOC_NODE(parser, yp_begin_node_t);
*node = (yp_begin_node_t) {
{
.type = YP_NODE_BEGIN_NODE,
.location = {
.start = begin_keyword->start,
.end = statements == NULL ? begin_keyword->end : statements->base.location.end
},
},
.begin_keyword_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(begin_keyword),
.statements = statements,
.end_keyword_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
};
return node;
}
// Set the rescue clause, optionally start, and end location of a begin node.
static void
yp_begin_node_rescue_clause_set(yp_begin_node_t *node, yp_rescue_node_t *rescue_clause) {
// If the begin keyword doesn't exist, we set the start on the begin_node
if (!node->begin_keyword_loc.start) {
node->base.location.start = rescue_clause->base.location.start;
}
node->base.location.end = rescue_clause->base.location.end;
node->rescue_clause = rescue_clause;
}
// Set the else clause and end location of a begin node.
static void
yp_begin_node_else_clause_set(yp_begin_node_t *node, yp_else_node_t *else_clause) {
node->base.location.end = else_clause->base.location.end;
node->else_clause = else_clause;
}
// Set the ensure clause and end location of a begin node.
static void
yp_begin_node_ensure_clause_set(yp_begin_node_t *node, yp_ensure_node_t *ensure_clause) {
node->base.location.end = ensure_clause->base.location.end;
node->ensure_clause = ensure_clause;
}
// Set the end keyword and end location of a begin node.
static void
yp_begin_node_end_keyword_set(yp_begin_node_t *node, const yp_token_t *end_keyword) {
assert(end_keyword->type == YP_TOKEN_KEYWORD_END || end_keyword->type == YP_TOKEN_MISSING);
node->base.location.end = end_keyword->end;
node->end_keyword_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword);
}
// Allocate and initialize a new BlockArgumentNode node.
static yp_block_argument_node_t *
yp_block_argument_node_create(yp_parser_t *parser, const yp_token_t *operator, yp_node_t *expression) {
yp_block_argument_node_t *node = YP_ALLOC_NODE(parser, yp_block_argument_node_t);
*node = (yp_block_argument_node_t) {
{
.type = YP_NODE_BLOCK_ARGUMENT_NODE,
.location = {
.start = operator->start,
.end = expression == NULL ? operator->end : expression->location.end
},
},
.expression = expression,
.operator_loc = YP_LOCATION_TOKEN_VALUE(operator)
};
return node;
}
// Allocate and initialize a new BlockNode node.
static yp_block_node_t *
yp_block_node_create(yp_parser_t *parser, yp_constant_id_list_t *locals, const yp_token_t *opening, yp_block_parameters_node_t *parameters, yp_node_t *statements, const yp_token_t *closing) {
yp_block_node_t *node = YP_ALLOC_NODE(parser, yp_block_node_t);
*node = (yp_block_node_t) {
{
.type = YP_NODE_BLOCK_NODE,
.location = { .start = opening->start, .end = closing->end },
},
.locals = *locals,
.parameters = parameters,
.statements = statements,
.opening_loc = YP_LOCATION_TOKEN_VALUE(opening),
.closing_loc = YP_LOCATION_TOKEN_VALUE(closing)
};
return node;
}
// Allocate and initialize a new BlockParameterNode node.
static yp_block_parameter_node_t *
yp_block_parameter_node_create(yp_parser_t *parser, const yp_token_t *name, const yp_token_t *operator) {
assert(operator->type == YP_TOKEN_NOT_PROVIDED || operator->type == YP_TOKEN_AMPERSAND);
yp_block_parameter_node_t *node = YP_ALLOC_NODE(parser, yp_block_parameter_node_t);
*node = (yp_block_parameter_node_t) {
{
.type = YP_NODE_BLOCK_PARAMETER_NODE,
.location = {
.start = operator->start,
.end = (name->type == YP_TOKEN_NOT_PROVIDED ? operator->end : name->end)
},
},
.name_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(name),
.operator_loc = YP_LOCATION_TOKEN_VALUE(operator)
};
return node;
}
// Allocate and initialize a new BlockParametersNode node.
static yp_block_parameters_node_t *
yp_block_parameters_node_create(yp_parser_t *parser, yp_parameters_node_t *parameters, const yp_token_t *opening) {
yp_block_parameters_node_t *node = YP_ALLOC_NODE(parser, yp_block_parameters_node_t);
const char *start;
if (opening->type != YP_TOKEN_NOT_PROVIDED) {
start = opening->start;
} else if (parameters != NULL) {
start = parameters->base.location.start;
} else {
start = NULL;
}
const char *end;
if (parameters != NULL) {
end = parameters->base.location.end;
} else if (opening->type != YP_TOKEN_NOT_PROVIDED) {
end = opening->end;
} else {
end = NULL;
}
*node = (yp_block_parameters_node_t) {
{
.type = YP_NODE_BLOCK_PARAMETERS_NODE,
.location = {
.start = start,
.end = end
}
},
.parameters = parameters,
.opening_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
.closing_loc = { .start = NULL, .end = NULL },
.locals = YP_EMPTY_LOCATION_LIST
};
return node;
}
// Set the closing location of a BlockParametersNode node.
static void
yp_block_parameters_node_closing_set(yp_block_parameters_node_t *node, const yp_token_t *closing) {
assert(closing->type == YP_TOKEN_PIPE || closing->type == YP_TOKEN_PARENTHESIS_RIGHT || closing->type == YP_TOKEN_MISSING);
node->base.location.end = closing->end;
node->closing_loc = YP_LOCATION_TOKEN_VALUE(closing);
}
// Append a new block-local variable to a BlockParametersNode node.
static void
yp_block_parameters_node_append_local(yp_block_parameters_node_t *node, const yp_token_t *local) {
assert(local->type == YP_TOKEN_IDENTIFIER);
yp_location_list_append(&node->locals, local);
if (node->base.location.start == NULL) node->base.location.start = local->start;
node->base.location.end = local->end;
}
// Allocate and initialize a new BreakNode node.
static yp_break_node_t *
yp_break_node_create(yp_parser_t *parser, const yp_token_t *keyword, yp_arguments_node_t *arguments) {
assert(keyword->type == YP_TOKEN_KEYWORD_BREAK);
yp_break_node_t *node = YP_ALLOC_NODE(parser, yp_break_node_t);
*node = (yp_break_node_t) {
{
.type = YP_NODE_BREAK_NODE,
.location = {
.start = keyword->start,
.end = (arguments == NULL ? keyword->end : arguments->base.location.end)
},
},
.arguments = arguments,
.keyword_loc = YP_LOCATION_TOKEN_VALUE(keyword)
};
return node;
}
// Allocate and initialize a new CallNode node. This sets everything to NULL or
// YP_TOKEN_NOT_PROVIDED as appropriate such that its values can be overridden
// in the various specializations of this function.
static yp_call_node_t *
yp_call_node_create(yp_parser_t *parser) {
yp_call_node_t *node = YP_ALLOC_NODE(parser, yp_call_node_t);
*node = (yp_call_node_t) {
{
.type = YP_NODE_CALL_NODE,
.location = YP_LOCATION_NULL_VALUE(parser),
},
.receiver = NULL,
.operator_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
.message_loc = YP_LOCATION_NULL_VALUE(parser),
.opening_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
.arguments = NULL,
.closing_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
.block = NULL,
.flags = 0
};
return node;
}
// Allocate and initialize a new CallNode node from an aref or an aset
// expression.
static yp_call_node_t *
yp_call_node_aref_create(yp_parser_t *parser, yp_node_t *receiver, yp_arguments_t *arguments) {
yp_call_node_t *node = yp_call_node_create(parser);
node->base.location.start = receiver->location.start;
if (arguments->block != NULL) {
node->base.location.end = arguments->block->base.location.end;
} else {
node->base.location.end = arguments->closing_loc.end;
}
node->receiver = receiver;
node->message_loc.start = arguments->opening_loc.start;
node->message_loc.end = arguments->closing_loc.end;
node->opening_loc = arguments->opening_loc;
node->arguments = arguments->arguments;
node->closing_loc = arguments->closing_loc;
node->block = arguments->block;
yp_string_constant_init(&node->name, "[]", 2);
return node;
}
// Allocate and initialize a new CallNode node from a binary expression.
static yp_call_node_t *
yp_call_node_binary_create(yp_parser_t *parser, yp_node_t *receiver, yp_token_t *operator, yp_node_t *argument) {
yp_call_node_t *node = yp_call_node_create(parser);
node->base.location.start = receiver->location.start;
node->base.location.end = argument->location.end;
node->receiver = receiver;
node->message_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
yp_arguments_node_t *arguments = yp_arguments_node_create(parser);
yp_arguments_node_arguments_append(arguments, argument);
node->arguments = arguments;
yp_string_shared_init(&node->name, operator->start, operator->end);
return node;
}
// Allocate and initialize a new CallNode node from a call expression.
static yp_call_node_t *
yp_call_node_call_create(yp_parser_t *parser, yp_node_t *receiver, yp_token_t *operator, yp_token_t *message, yp_arguments_t *arguments) {
yp_call_node_t *node = yp_call_node_create(parser);
node->base.location.start = receiver->location.start;
if (arguments->block != NULL) {
node->base.location.end = arguments->block->base.location.end;
} else if (arguments->closing_loc.start != NULL) {
node->base.location.end = arguments->closing_loc.end;
} else if (arguments->arguments != NULL) {
node->base.location.end = arguments->arguments->base.location.end;
} else {
node->base.location.end = message->end;
}
node->receiver = receiver;
node->operator_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
node->message_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(message);
node->opening_loc = arguments->opening_loc;
node->arguments = arguments->arguments;
node->closing_loc = arguments->closing_loc;
node->block = arguments->block;
if (operator->type == YP_TOKEN_AMPERSAND_DOT) {
node->flags |= YP_CALL_NODE_FLAGS_SAFE_NAVIGATION;
}
yp_string_shared_init(&node->name, message->start, message->end);
return node;
}
// Allocate and initialize a new CallNode node from a call to a method name
// without a receiver that could not have been a local variable read.
static yp_call_node_t *
yp_call_node_fcall_create(yp_parser_t *parser, yp_token_t *message, yp_arguments_t *arguments) {
yp_call_node_t *node = yp_call_node_create(parser);
node->base.location.start = message->start;
if (arguments->block != NULL) {
node->base.location.end = arguments->block->base.location.end;
} else if (arguments->closing_loc.start != NULL) {
node->base.location.end = arguments->closing_loc.end;
} else if (arguments->arguments != NULL) {
node->base.location.end = arguments->arguments->base.location.end;
} else {
node->base.location.end = arguments->closing_loc.end;
}
node->message_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(message);
node->opening_loc = arguments->opening_loc;
node->arguments = arguments->arguments;
node->closing_loc = arguments->closing_loc;
node->block = arguments->block;
yp_string_shared_init(&node->name, message->start, message->end);
return node;
}
// Allocate and initialize a new CallNode node from a not expression.
static yp_call_node_t *
yp_call_node_not_create(yp_parser_t *parser, yp_node_t *receiver, yp_token_t *message, yp_arguments_t *arguments) {
yp_call_node_t *node = yp_call_node_create(parser);
node->base.location.start = message->start;
if (arguments->closing_loc.start != NULL) {
node->base.location.end = arguments->closing_loc.end;
} else {
node->base.location.end = receiver->location.end;
}
node->receiver = receiver;
node->message_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(message);
node->opening_loc = arguments->opening_loc;
node->arguments = arguments->arguments;
node->closing_loc = arguments->closing_loc;
yp_string_constant_init(&node->name, "!", 1);
return node;
}
// Allocate and initialize a new CallNode node from a call shorthand expression.
static yp_call_node_t *
yp_call_node_shorthand_create(yp_parser_t *parser, yp_node_t *receiver, yp_token_t *operator, yp_arguments_t *arguments) {
yp_call_node_t *node = yp_call_node_create(parser);
node->base.location.start = receiver->location.start;
if (arguments->block != NULL) {
node->base.location.end = arguments->block->base.location.end;
} else {
node->base.location.end = arguments->closing_loc.end;
}
node->receiver = receiver;
node->operator_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
node->opening_loc = arguments->opening_loc;
node->arguments = arguments->arguments;
node->closing_loc = arguments->closing_loc;
node->block = arguments->block;
if (operator->type == YP_TOKEN_AMPERSAND_DOT) {
node->flags |= YP_CALL_NODE_FLAGS_SAFE_NAVIGATION;
}
yp_string_constant_init(&node->name, "call", 4);
return node;
}
// Allocate and initialize a new CallNode node from a unary operator expression.
static yp_call_node_t *
yp_call_node_unary_create(yp_parser_t *parser, yp_token_t *operator, yp_node_t *receiver, const char *name) {
yp_call_node_t *node = yp_call_node_create(parser);
node->base.location.start = operator->start;
node->base.location.end = receiver->location.end;
node->receiver = receiver;
node->message_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
yp_string_constant_init(&node->name, name, strlen(name));
return node;
}
// Allocate and initialize a new CallNode node from a call to a method name
// without a receiver that could also have been a local variable read.
static yp_call_node_t *
yp_call_node_vcall_create(yp_parser_t *parser, yp_token_t *message) {
yp_call_node_t *node = yp_call_node_create(parser);
node->base.location.start = message->start;
node->base.location.end = message->end;
node->message_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(message);
yp_string_shared_init(&node->name, message->start, message->end);
return node;
}
// Returns whether or not this call node is a "vcall" (a call to a method name
// without a receiver that could also have been a local variable read).
static inline bool
yp_call_node_vcall_p(yp_call_node_t *node) {
return (
(node->opening_loc.start == NULL) &&
(node->arguments == NULL) &&
(node->block == NULL) &&
(node->receiver == NULL)
);
}
// Allocate and initialize a new CallOperatorAndWriteNode node.
static yp_call_operator_and_write_node_t *
yp_call_operator_and_write_node_create(yp_parser_t *parser, yp_call_node_t *target, const yp_token_t *operator, yp_node_t *value) {
assert(operator->type == YP_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
yp_call_operator_and_write_node_t *node = YP_ALLOC_NODE(parser, yp_call_operator_and_write_node_t);
*node = (yp_call_operator_and_write_node_t) {
{
.type = YP_NODE_CALL_OPERATOR_AND_WRITE_NODE,
.location = {
.start = target->base.location.start,
.end = value->location.end
}
},
.target = target,
.operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
.value = value
};
return node;
}
// Allocate a new CallOperatorWriteNode node.
static yp_call_operator_write_node_t *
yp_call_operator_write_node_create(yp_parser_t *parser, yp_call_node_t *target, const yp_token_t *operator, yp_node_t *value) {
yp_call_operator_write_node_t *node = YP_ALLOC_NODE(parser, yp_call_operator_write_node_t);
*node = (yp_call_operator_write_node_t) {
{
.type = YP_NODE_CALL_OPERATOR_WRITE_NODE,
.location = {
.start = target->base.location.start,
.end = value->location.end
}
},
.target = target,
.operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
.value = value,
.operator_id = yp_parser_constant_id_location(parser, operator->start, operator->end - 1)
};
return node;
}
// Allocate and initialize a new CallOperatorOrWriteNode node.
static yp_call_operator_or_write_node_t *
yp_call_operator_or_write_node_create(yp_parser_t *parser, yp_call_node_t *target, const yp_token_t *operator, yp_node_t *value) {
assert(operator->type == YP_TOKEN_PIPE_PIPE_EQUAL);
yp_call_operator_or_write_node_t *node = YP_ALLOC_NODE(parser, yp_call_operator_or_write_node_t);
*node = (yp_call_operator_or_write_node_t) {
{
.type = YP_NODE_CALL_OPERATOR_OR_WRITE_NODE,
.location = {
.start = target->base.location.start,
.end = value->location.end
}
},
.target = target,
.operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
.value = value
};
return node;
}
// Allocate and initialize a new CapturePatternNode node.
static yp_capture_pattern_node_t *
yp_capture_pattern_node_create(yp_parser_t *parser, yp_node_t *value, yp_node_t *target, const yp_token_t *operator) {
yp_capture_pattern_node_t *node = YP_ALLOC_NODE(parser, yp_capture_pattern_node_t);
*node = (yp_capture_pattern_node_t) {
{
.type = YP_NODE_CAPTURE_PATTERN_NODE,
.location = {
.start = value->location.start,
.end = target->location.end
},
},
.value = value,
.target = target,
.operator_loc = YP_LOCATION_TOKEN_VALUE(operator)
};
return node;
}
// Allocate and initialize a new CaseNode node.
static yp_case_node_t *
yp_case_node_create(yp_parser_t *parser, const yp_token_t *case_keyword, yp_node_t *predicate, yp_else_node_t *consequent, const yp_token_t *end_keyword) {
yp_case_node_t *node = YP_ALLOC_NODE(parser, yp_case_node_t);
*node = (yp_case_node_t) {
{
.type = YP_NODE_CASE_NODE,
.location = {
.start = case_keyword->start,
.end = end_keyword->end
},
},
.predicate = predicate,
.consequent = consequent,
.case_keyword_loc = YP_LOCATION_TOKEN_VALUE(case_keyword),
.end_keyword_loc = YP_LOCATION_TOKEN_VALUE(end_keyword),
.conditions = YP_EMPTY_NODE_LIST
};
return node;
}
// Append a new condition to a CaseNode node.
static void
yp_case_node_condition_append(yp_case_node_t *node, yp_node_t *condition) {
assert(condition->type == YP_NODE_WHEN_NODE || condition->type == YP_NODE_IN_NODE);
yp_node_list_append(&node->conditions, condition);
node->base.location.end = condition->location.end;
}
// Set the consequent of a CaseNode node.
static void
yp_case_node_consequent_set(yp_case_node_t *node, yp_else_node_t *consequent) {
node->consequent = consequent;
node->base.location.end = consequent->base.location.end;
}
// Set the end location for a CaseNode node.
static void
yp_case_node_end_keyword_loc_set(yp_case_node_t *node, const yp_token_t *end_keyword) {
node->base.location.end = end_keyword->end;
node->end_keyword_loc = YP_LOCATION_TOKEN_VALUE(end_keyword);
}
// Allocate a new ClassNode node.
static yp_class_node_t *
yp_class_node_create(yp_parser_t *parser, yp_constant_id_list_t *locals, const yp_token_t *class_keyword, yp_node_t *constant_path, const yp_token_t *inheritance_operator, yp_node_t *superclass, yp_node_t *statements, const yp_token_t *end_keyword) {
yp_class_node_t *node = YP_ALLOC_NODE(parser, yp_class_node_t);
*node = (yp_class_node_t) {
{
.type = YP_NODE_CLASS_NODE,
.location = { .start = class_keyword->start, .end = end_keyword->end },
},
.locals = *locals,
.class_keyword_loc = YP_LOCATION_TOKEN_VALUE(class_keyword),
.constant_path = constant_path,
.inheritance_operator_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(inheritance_operator),
.superclass = superclass,
.statements = statements,
.end_keyword_loc = YP_LOCATION_TOKEN_VALUE(end_keyword)
};
return node;
}
// Allocate and initialize a new ClassVariableOperatorAndWriteNode node.
static yp_class_variable_operator_and_write_node_t *
yp_class_variable_operator_and_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value) {
assert(target->type == YP_NODE_CLASS_VARIABLE_READ_NODE);
assert(operator->type == YP_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
yp_class_variable_operator_and_write_node_t *node = YP_ALLOC_NODE(parser, yp_class_variable_operator_and_write_node_t);
*node = (yp_class_variable_operator_and_write_node_t) {
{
.type = YP_NODE_CLASS_VARIABLE_OPERATOR_AND_WRITE_NODE,
.location = {
.start = target->location.start,
.end = value->location.end
}
},
.name_loc = target->location,
.operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
.value = value
};
return node;
}
// Allocate and initialize a new ClassVariableOperatorWriteNode node.
static yp_class_variable_operator_write_node_t *
yp_class_variable_operator_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value) {
yp_class_variable_operator_write_node_t *node = YP_ALLOC_NODE(parser, yp_class_variable_operator_write_node_t);
*node = (yp_class_variable_operator_write_node_t) {
{
.type = YP_NODE_CLASS_VARIABLE_OPERATOR_WRITE_NODE,
.location = {
.start = target->location.start,
.end = value->location.end
}
},
.name_loc = target->location,
.operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
.value = value,
.operator = yp_parser_constant_id_location(parser, operator->start, operator->end - 1)
};
return node;
}
// Allocate and initialize a new ClassVariableOperatorOrWriteNode node.
static yp_class_variable_operator_or_write_node_t *
yp_class_variable_operator_or_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value) {
assert(target->type == YP_NODE_CLASS_VARIABLE_READ_NODE);
assert(operator->type == YP_TOKEN_PIPE_PIPE_EQUAL);
yp_class_variable_operator_or_write_node_t *node = YP_ALLOC_NODE(parser, yp_class_variable_operator_or_write_node_t);
*node = (yp_class_variable_operator_or_write_node_t) {
{
.type = YP_NODE_CLASS_VARIABLE_OPERATOR_OR_WRITE_NODE,
.location = {
.start = target->location.start,
.end = value->location.end
}
},
.name_loc = target->location,
.operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
.value = value
};
return node;
}
// Allocate and initialize a new ClassVariableReadNode node.
static yp_class_variable_read_node_t *
yp_class_variable_read_node_create(yp_parser_t *parser, const yp_token_t *token) {
assert(token->type == YP_TOKEN_CLASS_VARIABLE);
yp_class_variable_read_node_t *node = YP_ALLOC_NODE(parser, yp_class_variable_read_node_t);
*node = (yp_class_variable_read_node_t) {{ .type = YP_NODE_CLASS_VARIABLE_READ_NODE, .location = YP_LOCATION_TOKEN_VALUE(token) }};
return node;
}
// Initialize a new ClassVariableWriteNode node from a ClassVariableRead node.
static yp_class_variable_write_node_t *
yp_class_variable_read_node_to_class_variable_write_node(yp_parser_t *parser, yp_class_variable_read_node_t *read_node, yp_token_t *operator, yp_node_t *value) {
yp_class_variable_write_node_t *node = YP_ALLOC_NODE(parser, yp_class_variable_write_node_t);
*node = (yp_class_variable_write_node_t) {
{
.type = YP_NODE_CLASS_VARIABLE_WRITE_NODE,
.location = {
.start = read_node->base.location.start,
.end = value != NULL ? value->location.end : read_node->base.location.end
},
},
.name_loc = YP_LOCATION_NODE_VALUE((yp_node_t *)read_node),
.operator_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
.value = value
};
return node;
}
// Allocate and initialize a new ConstantPathOperatorAndWriteNode node.
static yp_constant_path_operator_and_write_node_t *
yp_constant_path_operator_and_write_node_create(yp_parser_t *parser, yp_constant_path_node_t *target, const yp_token_t *operator, yp_node_t *value) {
assert(operator->type == YP_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
yp_constant_path_operator_and_write_node_t *node = YP_ALLOC_NODE(parser, yp_constant_path_operator_and_write_node_t);
*node = (yp_constant_path_operator_and_write_node_t) {
{
.type = YP_NODE_CONSTANT_PATH_OPERATOR_AND_WRITE_NODE,
.location = {
.start = target->base.location.start,
.end = value->location.end
}
},
.target = target,
.operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
.value = value
};
return node;
}
// Allocate and initialize a new ConstantPathOperatorWriteNode node.
static yp_constant_path_operator_write_node_t *
yp_constant_path_operator_write_node_create(yp_parser_t *parser, yp_constant_path_node_t *target, const yp_token_t *operator, yp_node_t *value) {
yp_constant_path_operator_write_node_t *node = YP_ALLOC_NODE(parser, yp_constant_path_operator_write_node_t);
*node = (yp_constant_path_operator_write_node_t) {
{
.type = YP_NODE_CONSTANT_PATH_OPERATOR_WRITE_NODE,
.location = {
.start = target->base.location.start,
.end = value->location.end
}
},
.target = target,
.operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
.value = value,
.operator = yp_parser_constant_id_location(parser, operator->start, operator->end - 1)
};
return node;
}
// Allocate and initialize a new ConstantPathOperatorOrWriteNode node.
static yp_constant_path_operator_or_write_node_t *
yp_constant_path_operator_or_write_node_create(yp_parser_t *parser, yp_constant_path_node_t *target, const yp_token_t *operator, yp_node_t *value) {
assert(operator->type == YP_TOKEN_PIPE_PIPE_EQUAL);
yp_constant_path_operator_or_write_node_t *node = YP_ALLOC_NODE(parser, yp_constant_path_operator_or_write_node_t);
*node = (yp_constant_path_operator_or_write_node_t) {
{
.type = YP_NODE_CONSTANT_PATH_OPERATOR_OR_WRITE_NODE,
.location = {
.start = target->base.location.start,
.end = value->location.end
}
},
.target = target,
.operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
.value = value
};
return node;
}
// Allocate and initialize a new ConstantPathNode node.
static yp_constant_path_node_t *
yp_constant_path_node_create(yp_parser_t *parser, yp_node_t *parent, const yp_token_t *delimiter, yp_node_t *child) {
yp_constant_path_node_t *node = YP_ALLOC_NODE(parser, yp_constant_path_node_t);
*node = (yp_constant_path_node_t) {
{
.type = YP_NODE_CONSTANT_PATH_NODE,
.location = {
.start = parent == NULL ? delimiter->start : parent->location.start,
.end = child->location.end
},
},
.parent = parent,
.child = child,
.delimiter_loc = YP_LOCATION_TOKEN_VALUE(delimiter)
};
return node;
}
// Allocate a new ConstantPathWriteNode node.
static yp_constant_path_write_node_t *
yp_constant_path_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value) {
yp_constant_path_write_node_t *node = YP_ALLOC_NODE(parser, yp_constant_path_write_node_t);
*node = (yp_constant_path_write_node_t) {
{
.type = YP_NODE_CONSTANT_PATH_WRITE_NODE,
.location = {
.start = target->location.start,
.end = (value == NULL ? target->location.end : value->location.end)
},
},
.target = target,
.operator_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
.value = value
};
return node;
}
// Allocate and initialize a new ConstantOperatorAndWriteNode node.
static yp_constant_operator_and_write_node_t *
yp_constant_operator_and_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value) {
assert(target->type == YP_NODE_CONSTANT_READ_NODE);
assert(operator->type == YP_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
yp_constant_operator_and_write_node_t *node = YP_ALLOC_NODE(parser, yp_constant_operator_and_write_node_t);
*node = (yp_constant_operator_and_write_node_t) {
{
.type = YP_NODE_CONSTANT_OPERATOR_AND_WRITE_NODE,
.location = {
.start = target->location.start,
.end = value->location.end
}
},
.name_loc = target->location,
.operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
.value = value
};
return node;
}
// Allocate and initialize a new ConstantOperatorWriteNode node.
static yp_constant_operator_write_node_t *
yp_constant_operator_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value) {
yp_constant_operator_write_node_t *node = YP_ALLOC_NODE(parser, yp_constant_operator_write_node_t);
*node = (yp_constant_operator_write_node_t) {
{
.type = YP_NODE_CONSTANT_OPERATOR_WRITE_NODE,
.location = {
.start = target->location.start,
.end = value->location.end
}
},
.name_loc = target->location,
.operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
.value = value,
.operator = yp_parser_constant_id_location(parser, operator->start, operator->end - 1)
};
return node;
}
// Allocate and initialize a new ConstantOperatorOrWriteNode node.
static yp_constant_operator_or_write_node_t *
yp_constant_operator_or_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value) {
assert(target->type == YP_NODE_CONSTANT_READ_NODE);
assert(operator->type == YP_TOKEN_PIPE_PIPE_EQUAL);
yp_constant_operator_or_write_node_t *node = YP_ALLOC_NODE(parser, yp_constant_operator_or_write_node_t);
*node = (yp_constant_operator_or_write_node_t) {
{
.type = YP_NODE_CONSTANT_OPERATOR_OR_WRITE_NODE,
.location = {
.start = target->location.start,
.end = value->location.end
}
},
.name_loc = target->location,
.operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
.value = value
};
return node;
}
// Allocate and initialize a new ConstantReadNode node.
static yp_constant_read_node_t *
yp_constant_read_node_create(yp_parser_t *parser, const yp_token_t *name) {
assert(name->type == YP_TOKEN_CONSTANT || name->type == YP_TOKEN_MISSING);
yp_constant_read_node_t *node = YP_ALLOC_NODE(parser, yp_constant_read_node_t);
*node = (yp_constant_read_node_t) {{ .type = YP_NODE_CONSTANT_READ_NODE, .location = YP_LOCATION_TOKEN_VALUE(name) }};
return node;
}
// Allocate and initialize a new DefNode node.
static yp_def_node_t *
yp_def_node_create(
yp_parser_t *parser,
const yp_token_t *name,
yp_node_t *receiver,
yp_parameters_node_t *parameters,
yp_node_t *statements,
yp_constant_id_list_t *locals,
const yp_token_t *def_keyword,
const yp_token_t *operator,
const yp_token_t *lparen,
const yp_token_t *rparen,
const yp_token_t *equal,
const yp_token_t *end_keyword
) {
yp_def_node_t *node = YP_ALLOC_NODE(parser, yp_def_node_t);
const char *end;
if (end_keyword->type == YP_TOKEN_NOT_PROVIDED) {
end = statements->location.end;
} else {
end = end_keyword->end;
}
*node = (yp_def_node_t) {
{
.type = YP_NODE_DEF_NODE,
.location = { .start = def_keyword->start, .end = end },
},
.name_loc = YP_LOCATION_TOKEN_VALUE(name),
.receiver = receiver,
.parameters = parameters,
.statements = statements,
.locals = *locals,
.def_keyword_loc = YP_LOCATION_TOKEN_VALUE(def_keyword),
.operator_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
.lparen_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(lparen),
.rparen_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(rparen),
.equal_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(equal),
.end_keyword_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword)
};
return node;
}
// Allocate a new DefinedNode node.
static yp_defined_node_t *
yp_defined_node_create(yp_parser_t *parser, const yp_token_t *lparen, yp_node_t *value, const yp_token_t *rparen, const yp_location_t *keyword_loc) {
yp_defined_node_t *node = YP_ALLOC_NODE(parser, yp_defined_node_t);
*node = (yp_defined_node_t) {
{
.type = YP_NODE_DEFINED_NODE,
.location = {
.start = keyword_loc->start,
.end = (rparen->type == YP_TOKEN_NOT_PROVIDED ? value->location.end : rparen->end)
},
},
.lparen_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(lparen),
.value = value,
.rparen_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(rparen),
.keyword_loc = *keyword_loc
};
return node;
}
// Allocate and initialize a new ElseNode node.
static yp_else_node_t *
yp_else_node_create(yp_parser_t *parser, const yp_token_t *else_keyword, yp_statements_node_t *statements, const yp_token_t *end_keyword) {
yp_else_node_t *node = YP_ALLOC_NODE(parser, yp_else_node_t);
const char *end = NULL;
if ((end_keyword->type == YP_TOKEN_NOT_PROVIDED) && (statements != NULL)) {
end = statements->base.location.end;
} else {
end = end_keyword->end;
}
*node = (yp_else_node_t) {
{
.type = YP_NODE_ELSE_NODE,
.location = {
.start = else_keyword->start,
.end = end,
},
},
.else_keyword_loc = YP_LOCATION_TOKEN_VALUE(else_keyword),
.statements = statements,
.end_keyword_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword)
};
return node;
}
// Allocate and initialize a new EmbeddedStatementsNode node.
static yp_embedded_statements_node_t *
yp_embedded_statements_node_create(yp_parser_t *parser, const yp_token_t *opening, yp_statements_node_t *statements, const yp_token_t *closing) {
yp_embedded_statements_node_t *node = YP_ALLOC_NODE(parser, yp_embedded_statements_node_t);
*node = (yp_embedded_statements_node_t) {
{
.type = YP_NODE_EMBEDDED_STATEMENTS_NODE,
.location = {
.start = opening->start,
.end = closing->end
}
},
.opening_loc = YP_LOCATION_TOKEN_VALUE(opening),
.statements = statements,
.closing_loc = YP_LOCATION_TOKEN_VALUE(closing)
};
return node;
}
// Allocate and initialize a new EmbeddedVariableNode node.
static yp_embedded_variable_node_t *
yp_embedded_variable_node_create(yp_parser_t *parser, const yp_token_t *operator, yp_node_t *variable) {
yp_embedded_variable_node_t *node = YP_ALLOC_NODE(parser, yp_embedded_variable_node_t);
*node = (yp_embedded_variable_node_t) {
{
.type = YP_NODE_EMBEDDED_VARIABLE_NODE,
.location = {
.start = operator->start,
.end = variable->location.end
}
},
.operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
.variable = variable
};
return node;
}
// Allocate a new EnsureNode node.
static yp_ensure_node_t *
yp_ensure_node_create(yp_parser_t *parser, const yp_token_t *ensure_keyword, yp_statements_node_t *statements, const yp_token_t *end_keyword) {
yp_ensure_node_t *node = YP_ALLOC_NODE(parser, yp_ensure_node_t);
*node = (yp_ensure_node_t) {
{
.type = YP_NODE_ENSURE_NODE,
.location = {
.start = ensure_keyword->start,
.end = end_keyword->end
},
},
.ensure_keyword_loc = YP_LOCATION_TOKEN_VALUE(ensure_keyword),
.statements = statements,
.end_keyword_loc = YP_LOCATION_TOKEN_VALUE(end_keyword)
};
return node;
}
// Allocate and initialize a new FalseNode node.
static yp_false_node_t *
yp_false_node_create(yp_parser_t *parser, const yp_token_t *token) {
assert(token->type == YP_TOKEN_KEYWORD_FALSE);
yp_false_node_t *node = YP_ALLOC_NODE(parser, yp_false_node_t);
*node = (yp_false_node_t) {{ .type = YP_NODE_FALSE_NODE, .location = YP_LOCATION_TOKEN_VALUE(token) }};
return node;
}
// Allocate and initialize a new find pattern node. The node list given in the
// nodes parameter is guaranteed to have at least two nodes.
static yp_find_pattern_node_t *
yp_find_pattern_node_create(yp_parser_t *parser, yp_node_list_t *nodes) {
yp_find_pattern_node_t *node = YP_ALLOC_NODE(parser, yp_find_pattern_node_t);
yp_node_t *left = nodes->nodes[0];
yp_node_t *right;
if (nodes->size == 1) {
right = (yp_node_t *) yp_missing_node_create(parser, left->location.end, left->location.end);
} else {
right = nodes->nodes[nodes->size - 1];
}
*node = (yp_find_pattern_node_t) {
{
.type = YP_NODE_FIND_PATTERN_NODE,
.location = {
.start = left->location.start,
.end = right->location.end,
},
},
.constant = NULL,
.left = left,
.right = right,
.requireds = YP_EMPTY_NODE_LIST,
.opening_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
.closing_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
};
// For now we're going to just copy over each pointer manually. This could be
// much more efficient, as we could instead resize the node list to only point
// to 1...-1.
for (size_t index = 1; index < nodes->size - 1; index++) {
yp_node_list_append(&node->requireds, nodes->nodes[index]);
}
return node;
}
// Allocate and initialize a new FloatNode node.
static yp_float_node_t *
yp_float_node_create(yp_parser_t *parser, const yp_token_t *token) {
assert(token->type == YP_TOKEN_FLOAT);
yp_float_node_t *node = YP_ALLOC_NODE(parser, yp_float_node_t);
*node = (yp_float_node_t) {{ .type = YP_NODE_FLOAT_NODE, .location = YP_LOCATION_TOKEN_VALUE(token) }};
return node;
}
// Allocate and initialize a new ForNode node.
static yp_for_node_t *
yp_for_node_create(
yp_parser_t *parser,
yp_node_t *index,
yp_node_t *collection,
yp_statements_node_t *statements,
const yp_token_t *for_keyword,
const yp_token_t *in_keyword,
const yp_token_t *do_keyword,
const yp_token_t *end_keyword
) {
yp_for_node_t *node = YP_ALLOC_NODE(parser, yp_for_node_t);
*node = (yp_for_node_t) {
{
.type = YP_NODE_FOR_NODE,
.location = {
.start = for_keyword->start,
.end = end_keyword->end
},
},
.index = index,
.collection = collection,
.statements = statements,
.for_keyword_loc = YP_LOCATION_TOKEN_VALUE(for_keyword),
.in_keyword_loc = YP_LOCATION_TOKEN_VALUE(in_keyword),
.do_keyword_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(do_keyword),
.end_keyword_loc = YP_LOCATION_TOKEN_VALUE(end_keyword)
};
return node;
}
// Allocate and initialize a new ForwardingArgumentsNode node.
static yp_forwarding_arguments_node_t *
yp_forwarding_arguments_node_create(yp_parser_t *parser, const yp_token_t *token) {
assert(token->type == YP_TOKEN_UDOT_DOT_DOT);
yp_forwarding_arguments_node_t *node = YP_ALLOC_NODE(parser, yp_forwarding_arguments_node_t);
*node = (yp_forwarding_arguments_node_t) {{ .type = YP_NODE_FORWARDING_ARGUMENTS_NODE, .location = YP_LOCATION_TOKEN_VALUE(token) }};
return node;
}
// Allocate and initialize a new ForwardingParameterNode node.
static yp_forwarding_parameter_node_t *
yp_forwarding_parameter_node_create(yp_parser_t *parser, const yp_token_t *token) {
assert(token->type == YP_TOKEN_UDOT_DOT_DOT);
yp_forwarding_parameter_node_t *node = YP_ALLOC_NODE(parser, yp_forwarding_parameter_node_t);
*node = (yp_forwarding_parameter_node_t) {{ .type = YP_NODE_FORWARDING_PARAMETER_NODE, .location = YP_LOCATION_TOKEN_VALUE(token) }};
return node;
}
// Allocate and initialize a new ForwardingSuper node.
static yp_forwarding_super_node_t *
yp_forwarding_super_node_create(yp_parser_t *parser, const yp_token_t *token, yp_arguments_t *arguments) {
assert(token->type == YP_TOKEN_KEYWORD_SUPER);
yp_forwarding_super_node_t *node = YP_ALLOC_NODE(parser, yp_forwarding_super_node_t);
*node = (yp_forwarding_super_node_t) {
{
.type = YP_NODE_FORWARDING_SUPER_NODE,
.location = {
.start = token->start,
.end = arguments->block != NULL ? arguments->block->base.location.end : token->end
},
},
.block = arguments->block
};
return node;
}
// Allocate and initialize a new hash pattern node from an opening and closing
// token.
static yp_hash_pattern_node_t *
yp_hash_pattern_node_empty_create(yp_parser_t *parser, const yp_token_t *opening, const yp_token_t *closing) {
yp_hash_pattern_node_t *node = YP_ALLOC_NODE(parser, yp_hash_pattern_node_t);
*node = (yp_hash_pattern_node_t) {
{
.type = YP_NODE_HASH_PATTERN_NODE,
.location = {
.start = opening->start,
.end = closing->end
},
},
.constant = NULL,
.kwrest = NULL,
.opening_loc = YP_LOCATION_TOKEN_VALUE(opening),
.closing_loc = YP_LOCATION_TOKEN_VALUE(closing),
.assocs = YP_EMPTY_NODE_LIST
};
return node;
}
// Allocate and initialize a new hash pattern node.
static yp_hash_pattern_node_t *
yp_hash_pattern_node_node_list_create(yp_parser_t *parser, yp_node_list_t *assocs) {
yp_hash_pattern_node_t *node = YP_ALLOC_NODE(parser, yp_hash_pattern_node_t);
*node = (yp_hash_pattern_node_t) {
{
.type = YP_NODE_HASH_PATTERN_NODE,
.location = {
.start = assocs->nodes[0]->location.start,
.end = assocs->nodes[assocs->size - 1]->location.end
},
},
.constant = NULL,
.kwrest = NULL,
.assocs = YP_EMPTY_NODE_LIST,
.opening_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
.closing_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
};
for (size_t index = 0; index < assocs->size; index++) {
yp_node_t *assoc = assocs->nodes[index];
yp_node_list_append(&node->assocs, assoc);
}
return node;
}
// Allocate and initialize a new GlobalVariableOperatorAndWriteNode node.
static yp_global_variable_operator_and_write_node_t *
yp_global_variable_operator_and_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value) {
assert(target->type == YP_NODE_GLOBAL_VARIABLE_READ_NODE);
assert(operator->type == YP_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
yp_global_variable_operator_and_write_node_t *node = YP_ALLOC_NODE(parser, yp_global_variable_operator_and_write_node_t);
*node = (yp_global_variable_operator_and_write_node_t) {
{
.type = YP_NODE_GLOBAL_VARIABLE_OPERATOR_AND_WRITE_NODE,
.location = {
.start = target->location.start,
.end = value->location.end
}
},
.name_loc = target->location,
.operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
.value = value
};
return node;
}
// Allocate and initialize a new GlobalVariableOperatorWriteNode node.
static yp_global_variable_operator_write_node_t *
yp_global_variable_operator_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value) {
yp_global_variable_operator_write_node_t *node = YP_ALLOC_NODE(parser, yp_global_variable_operator_write_node_t);
*node = (yp_global_variable_operator_write_node_t) {
{
.type = YP_NODE_GLOBAL_VARIABLE_OPERATOR_WRITE_NODE,
.location = {
.start = target->location.start,
.end = value->location.end
}
},
.name_loc = target->location,
.operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
.value = value,
.operator = yp_parser_constant_id_location(parser, operator->start, operator->end - 1)
};
return node;
}
// Allocate and initialize a new GlobalVariableOperatorOrWriteNode node.
static yp_global_variable_operator_or_write_node_t *
yp_global_variable_operator_or_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value) {
assert(target->type == YP_NODE_GLOBAL_VARIABLE_READ_NODE);
assert(operator->type == YP_TOKEN_PIPE_PIPE_EQUAL);
yp_global_variable_operator_or_write_node_t *node = YP_ALLOC_NODE(parser, yp_global_variable_operator_or_write_node_t);
*node = (yp_global_variable_operator_or_write_node_t) {
{
.type = YP_NODE_GLOBAL_VARIABLE_OPERATOR_OR_WRITE_NODE,
.location = {
.start = target->location.start,
.end = value->location.end
}
},
.name_loc = target->location,
.operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
.value = value
};
return node;
}
// Allocate a new GlobalVariableReadNode node.
static yp_global_variable_read_node_t *
yp_global_variable_read_node_create(yp_parser_t *parser, const yp_token_t *name) {
yp_global_variable_read_node_t *node = YP_ALLOC_NODE(parser, yp_global_variable_read_node_t);
*node = (yp_global_variable_read_node_t) {
{
.type = YP_NODE_GLOBAL_VARIABLE_READ_NODE,
.location = YP_LOCATION_TOKEN_VALUE(name),
}
};
return node;
}
// Allocate a new GlobalVariableWriteNode node.
static yp_global_variable_write_node_t *
yp_global_variable_write_node_create(yp_parser_t *parser, const yp_location_t *name_loc, const yp_token_t *operator, yp_node_t *value) {
yp_global_variable_write_node_t *node = YP_ALLOC_NODE(parser, yp_global_variable_write_node_t);
*node = (yp_global_variable_write_node_t) {
{
.type = YP_NODE_GLOBAL_VARIABLE_WRITE_NODE,
.location = {
.start = name_loc->start,
.end = (value == NULL ? name_loc->end : value->location.end)
},
},
.name_loc = *name_loc,
.operator_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
.value = value
};
return node;
}
// Allocate a new HashNode node.
static yp_hash_node_t *
yp_hash_node_create(yp_parser_t *parser, const yp_token_t *opening) {
assert(opening != NULL);
yp_hash_node_t *node = YP_ALLOC_NODE(parser, yp_hash_node_t);
*node = (yp_hash_node_t) {
{
.type = YP_NODE_HASH_NODE,
.location = {
.start = opening->start,
.end = opening->end
},
},
.opening_loc = YP_LOCATION_TOKEN_VALUE(opening),
.closing_loc = YP_LOCATION_NULL_VALUE(parser),
.elements = YP_EMPTY_NODE_LIST
};
return node;
}
static inline void
yp_hash_node_elements_append(yp_hash_node_t *hash, yp_node_t *element) {
yp_node_list_append(&hash->elements, element);
}
static inline void
yp_hash_node_closing_loc_set(yp_hash_node_t *hash, yp_token_t *token) {
hash->base.location.end = token->end;
hash->closing_loc = YP_LOCATION_TOKEN_VALUE(token);
}
// Allocate a new IfNode node.
static yp_if_node_t *
yp_if_node_create(yp_parser_t *parser,
const yp_token_t *if_keyword,
yp_node_t *predicate,
yp_statements_node_t *statements,
yp_node_t *consequent,
const yp_token_t *end_keyword
) {
yp_if_node_t *node = YP_ALLOC_NODE(parser, yp_if_node_t);
const char *end;
if (end_keyword->type != YP_TOKEN_NOT_PROVIDED) {
end = end_keyword->end;
} else if (consequent != NULL) {
end = consequent->location.end;
} else if ((statements != NULL) && (statements->body.size != 0)) {
end = statements->base.location.end;
} else {
end = predicate->location.end;
}
*node = (yp_if_node_t) {
{
.type = YP_NODE_IF_NODE,
.location = {
.start = if_keyword->start,
.end = end
},
},
.if_keyword_loc = YP_LOCATION_TOKEN_VALUE(if_keyword),
.predicate = predicate,
.statements = statements,
.consequent = consequent,
.end_keyword_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword)
};
return node;
}
// Allocate and initialize new IfNode node in the modifier form.
static yp_if_node_t *
yp_if_node_modifier_create(yp_parser_t *parser, yp_node_t *statement, const yp_token_t *if_keyword, yp_node_t *predicate) {
yp_if_node_t *node = YP_ALLOC_NODE(parser, yp_if_node_t);
yp_statements_node_t *statements = yp_statements_node_create(parser);
yp_statements_node_body_append(statements, statement);
*node = (yp_if_node_t) {
{
.type = YP_NODE_IF_NODE,
.location = {
.start = statement->location.start,
.end = predicate->location.end
},
},
.if_keyword_loc = YP_LOCATION_TOKEN_VALUE(if_keyword),
.predicate = predicate,
.statements = statements,
.consequent = NULL,
.end_keyword_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
};
return node;
}
// Allocate and initialize an if node from a ternary expression.
static yp_if_node_t *
yp_if_node_ternary_create(yp_parser_t *parser, yp_node_t *predicate, yp_node_t *true_expression, const yp_token_t *colon, yp_node_t *false_expression) {
yp_statements_node_t *if_statements = yp_statements_node_create(parser);
yp_statements_node_body_append(if_statements, true_expression);
yp_statements_node_t *else_statements = yp_statements_node_create(parser);
yp_statements_node_body_append(else_statements, false_expression);
yp_token_t end_keyword = not_provided(parser);
yp_else_node_t *else_node = yp_else_node_create(parser, colon, else_statements, &end_keyword);
yp_if_node_t *node = YP_ALLOC_NODE(parser, yp_if_node_t);
*node = (yp_if_node_t) {
{
.type = YP_NODE_IF_NODE,
.location = {
.start = predicate->location.start,
.end = false_expression->location.end,
},
},
.if_keyword_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
.predicate = predicate,
.statements = if_statements,
.consequent = (yp_node_t *)else_node,
.end_keyword_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
};
return node;
}
static inline void
yp_if_node_end_keyword_loc_set(yp_if_node_t *node, const yp_token_t *keyword) {
node->base.location.end = keyword->end;
node->end_keyword_loc = YP_LOCATION_TOKEN_VALUE(keyword);
}
static inline void
yp_else_node_end_keyword_loc_set(yp_else_node_t *node, const yp_token_t *keyword) {
node->base.location.end = keyword->end;
node->end_keyword_loc = YP_LOCATION_TOKEN_VALUE(keyword);
}
// Allocate and initialize a new IntegerNode node.
static yp_integer_node_t *
yp_integer_node_create(yp_parser_t *parser, const yp_token_t *token) {
assert(token->type == YP_TOKEN_INTEGER);
yp_integer_node_t *node = YP_ALLOC_NODE(parser, yp_integer_node_t);
*node = (yp_integer_node_t) {{ .type = YP_NODE_INTEGER_NODE, .location = YP_LOCATION_TOKEN_VALUE(token) }};
return node;
}
// Allocate and initialize a new RationalNode node.
static yp_rational_node_t *
yp_rational_node_create(yp_parser_t *parser, const yp_token_t *token) {
assert(token->type == YP_TOKEN_RATIONAL_NUMBER);
assert(parser->lex_modes.current->mode == YP_LEX_NUMERIC);
yp_node_t *numeric_node;
yp_token_t numeric_token = {
.type = parser->lex_modes.current->as.numeric.type,
.start = parser->lex_modes.current->as.numeric.start,
.end = parser->lex_modes.current->as.numeric.end
};
switch (parser->lex_modes.current->as.numeric.type) {
case YP_TOKEN_INTEGER: {
lex_mode_pop(parser);
numeric_node = (yp_node_t *)yp_integer_node_create(parser, &numeric_token);
break;
}
case YP_TOKEN_FLOAT: {
lex_mode_pop(parser);
numeric_node = (yp_node_t *)yp_float_node_create(parser, &numeric_token);
break;
}
default: {
lex_mode_pop(parser);
numeric_node = (yp_node_t *)yp_missing_node_create(parser, numeric_token.start, numeric_token.end);
(void)numeric_node; // Suppress clang-analyzer-deadcode.DeadStores warning
assert(false && "unreachable");
}
}
yp_rational_node_t *node = YP_ALLOC_NODE(parser, yp_rational_node_t);
*node = (yp_rational_node_t) {
{ .type = YP_NODE_RATIONAL_NODE, .location = YP_LOCATION_TOKEN_VALUE(token) },
.numeric = numeric_node,
};
assert(parser->lex_modes.current->mode != YP_LEX_NUMERIC);
return node;
}
// Allocate and initialize a new ImaginaryNode node.
static yp_imaginary_node_t *
yp_imaginary_node_create(yp_parser_t *parser, const yp_token_t *token) {
assert(token->type == YP_TOKEN_IMAGINARY_NUMBER);
assert(parser->lex_modes.current->mode == YP_LEX_NUMERIC);
yp_node_t *numeric_node;
yp_token_t numeric_token = {
.type = parser->lex_modes.current->as.numeric.type,
.start = parser->lex_modes.current->as.numeric.start,
.end = parser->lex_modes.current->as.numeric.end
};
switch (parser->lex_modes.current->as.numeric.type) {
case YP_TOKEN_INTEGER: {
lex_mode_pop(parser);
numeric_node = (yp_node_t *)yp_integer_node_create(parser, &numeric_token);
break;
}
case YP_TOKEN_FLOAT: {
lex_mode_pop(parser);
numeric_node = (yp_node_t *)yp_float_node_create(parser, &numeric_token);
break;
}
case YP_TOKEN_RATIONAL_NUMBER: {
lex_mode_pop(parser);
numeric_node = (yp_node_t *)yp_rational_node_create(parser, &numeric_token);
break;
}
default: {
lex_mode_pop(parser);
numeric_node = (yp_node_t *)yp_missing_node_create(parser, numeric_token.start, numeric_token.end);
(void)numeric_node; // Suppress clang-analyzer-deadcode.DeadStores warning
assert(false && "unreachable");
}
}
yp_imaginary_node_t *node = YP_ALLOC_NODE(parser, yp_imaginary_node_t);
*node = (yp_imaginary_node_t) {
{ .type = YP_NODE_IMAGINARY_NODE, .location = YP_LOCATION_TOKEN_VALUE(token) },
.numeric = numeric_node
};
assert(parser->lex_modes.current->mode != YP_LEX_NUMERIC);
return node;
}
// Allocate and initialize a new InNode node.
static yp_in_node_t *
yp_in_node_create(yp_parser_t *parser, yp_node_t *pattern, yp_statements_node_t *statements, const yp_token_t *in_keyword, const yp_token_t *then_keyword) {
yp_in_node_t *node = YP_ALLOC_NODE(parser, yp_in_node_t);
const char *end;
if (statements != NULL) {
end = statements->base.location.end;
} else if (then_keyword->type != YP_TOKEN_NOT_PROVIDED) {
end = then_keyword->end;
} else {
end = pattern->location.end;
}
*node = (yp_in_node_t) {
{
.type = YP_NODE_IN_NODE,
.location = {
.start = in_keyword->start,
.end = end
},
},
.pattern = pattern,
.statements = statements,
.in_loc = YP_LOCATION_TOKEN_VALUE(in_keyword),
.then_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(then_keyword)
};
return node;
}
// Allocate and initialize a new InstanceVariableOperatorAndWriteNode node.
static yp_instance_variable_operator_and_write_node_t *
yp_instance_variable_operator_and_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value) {
assert(target->type == YP_NODE_INSTANCE_VARIABLE_READ_NODE);
assert(operator->type == YP_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
yp_instance_variable_operator_and_write_node_t *node = YP_ALLOC_NODE(parser, yp_instance_variable_operator_and_write_node_t);
*node = (yp_instance_variable_operator_and_write_node_t) {
{
.type = YP_NODE_INSTANCE_VARIABLE_OPERATOR_AND_WRITE_NODE,
.location = {
.start = target->location.start,
.end = value->location.end
}
},
.name_loc = target->location,
.operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
.value = value
};
return node;
}
// Allocate and initialize a new InstanceVariableOperatorWriteNode node.
static yp_instance_variable_operator_write_node_t *
yp_instance_variable_operator_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value) {
yp_instance_variable_operator_write_node_t *node = YP_ALLOC_NODE(parser, yp_instance_variable_operator_write_node_t);
*node = (yp_instance_variable_operator_write_node_t) {
{
.type = YP_NODE_INSTANCE_VARIABLE_OPERATOR_WRITE_NODE,
.location = {
.start = target->location.start,
.end = value->location.end
}
},
.name_loc = target->location,
.operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
.value = value,
.operator = yp_parser_constant_id_location(parser, operator->start, operator->end - 1)
};
return node;
}
// Allocate and initialize a new InstanceVariableOperatorOrWriteNode node.
static yp_instance_variable_operator_or_write_node_t *
yp_instance_variable_operator_or_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value) {
assert(target->type == YP_NODE_INSTANCE_VARIABLE_READ_NODE);
assert(operator->type == YP_TOKEN_PIPE_PIPE_EQUAL);
yp_instance_variable_operator_or_write_node_t *node = YP_ALLOC_NODE(parser, yp_instance_variable_operator_or_write_node_t);
*node = (yp_instance_variable_operator_or_write_node_t) {
{
.type = YP_NODE_INSTANCE_VARIABLE_OPERATOR_OR_WRITE_NODE,
.location = {
.start = target->location.start,
.end = value->location.end
}
},
.name_loc = target->location,
.operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
.value = value
};
return node;
}
// Allocate and initialize a new InstanceVariableReadNode node.
static yp_instance_variable_read_node_t *
yp_instance_variable_read_node_create(yp_parser_t *parser, const yp_token_t *token) {
assert(token->type == YP_TOKEN_INSTANCE_VARIABLE);
yp_instance_variable_read_node_t *node = YP_ALLOC_NODE(parser, yp_instance_variable_read_node_t);
*node = (yp_instance_variable_read_node_t) {{
.type = YP_NODE_INSTANCE_VARIABLE_READ_NODE, .location = YP_LOCATION_TOKEN_VALUE(token)
}};
return node;
}
// Initialize a new InstanceVariableWriteNode node from an InstanceVariableRead node.
static yp_instance_variable_write_node_t *
yp_instance_variable_write_node_create(yp_parser_t *parser, yp_instance_variable_read_node_t *read_node, yp_token_t *operator, yp_node_t *value) {
yp_instance_variable_write_node_t *node = YP_ALLOC_NODE(parser, yp_instance_variable_write_node_t);
*node = (yp_instance_variable_write_node_t) {
{
.type = YP_NODE_INSTANCE_VARIABLE_WRITE_NODE,
.location = {
.start = read_node->base.location.start,
.end = value == NULL ? read_node->base.location.end : value->location.end
}
},
.name_loc = YP_LOCATION_NODE_BASE_VALUE(read_node),
.operator_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
.value = value
};
return node;
}
// Allocate a new InterpolatedRegularExpressionNode node.
static yp_interpolated_regular_expression_node_t *
yp_interpolated_regular_expression_node_create(yp_parser_t *parser, const yp_token_t *opening) {
yp_interpolated_regular_expression_node_t *node = YP_ALLOC_NODE(parser, yp_interpolated_regular_expression_node_t);
*node = (yp_interpolated_regular_expression_node_t) {
{
.type = YP_NODE_INTERPOLATED_REGULAR_EXPRESSION_NODE,
.location = {
.start = opening->start,
.end = NULL,
},
},
.opening_loc = YP_LOCATION_TOKEN_VALUE(opening),
.closing_loc = YP_LOCATION_TOKEN_VALUE(opening),
.flags = 0,
.parts = YP_EMPTY_NODE_LIST
};
return node;
}
static inline void
yp_interpolated_regular_expression_node_append(yp_interpolated_regular_expression_node_t *node, yp_node_t *part) {
yp_node_list_append(&node->parts, part);
node->base.location.end = part->location.end;
}
static inline void
yp_interpolated_regular_expression_node_closing_set(yp_interpolated_regular_expression_node_t *node, const yp_token_t *closing) {
node->closing_loc = YP_LOCATION_TOKEN_VALUE(closing);
node->base.location.end = closing->end;
node->flags = yp_regular_expression_flags_create(closing);
}
// Allocate and initialize a new InterpolatedStringNode node.
static yp_interpolated_string_node_t *
yp_interpolated_string_node_create(yp_parser_t *parser, const yp_token_t *opening, const yp_node_list_t *parts, const yp_token_t *closing) {
yp_interpolated_string_node_t *node = YP_ALLOC_NODE(parser, yp_interpolated_string_node_t);
*node = (yp_interpolated_string_node_t) {
{
.type = YP_NODE_INTERPOLATED_STRING_NODE,
.location = {
.start = opening->start,
.end = closing->end,
},
},
.opening_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
.closing_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
.parts = parts == NULL ? YP_EMPTY_NODE_LIST : *parts
};
return node;
}
// Append a part to an InterpolatedStringNode node.
static inline void
yp_interpolated_string_node_append(yp_interpolated_string_node_t *node, yp_node_t *part) {
yp_node_list_append(&node->parts, part);
node->base.location.end = part->location.end;
}
// Set the closing token of the given InterpolatedStringNode node.
static void
yp_interpolated_string_node_closing_set(yp_interpolated_string_node_t *node, const yp_token_t *closing) {
node->closing_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(closing);
node->base.location.end = closing->end;
}
// Allocate and initialize a new InterpolatedSymbolNode node.
static yp_interpolated_symbol_node_t *
yp_interpolated_symbol_node_create(yp_parser_t *parser, const yp_token_t *opening, const yp_node_list_t *parts, const yp_token_t *closing) {
yp_interpolated_symbol_node_t *node = YP_ALLOC_NODE(parser, yp_interpolated_symbol_node_t);
*node = (yp_interpolated_symbol_node_t) {
{
.type = YP_NODE_INTERPOLATED_SYMBOL_NODE,
.location = {
.start = opening->start,
.end = closing->end,
},
},
.opening_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
.closing_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
.parts = parts == NULL ? YP_EMPTY_NODE_LIST : *parts
};
return node;
}
static inline void
yp_interpolated_symbol_node_append(yp_interpolated_symbol_node_t *node, yp_node_t *part) {
yp_node_list_append(&node->parts, part);
if (!node->base.location.start) {
node->base.location.start = part->location.start;
}
node->base.location.end = part->location.end;
}
static inline void
yp_interpolated_symbol_node_closing_set(yp_interpolated_symbol_node_t *node, const yp_token_t *closing) {
node->closing_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(closing);
node->base.location.end = closing->end;
}
// Allocate a new InterpolatedXStringNode node.
static yp_interpolated_x_string_node_t *
yp_interpolated_xstring_node_create(yp_parser_t *parser, const yp_token_t *opening, const yp_token_t *closing) {
yp_interpolated_x_string_node_t *node = YP_ALLOC_NODE(parser, yp_interpolated_x_string_node_t);
*node = (yp_interpolated_x_string_node_t) {
{
.type = YP_NODE_INTERPOLATED_X_STRING_NODE,
.location = {
.start = opening->start,
.end = closing->end
},
},
.opening_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
.closing_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
.parts = YP_EMPTY_NODE_LIST
};
return node;
}
static inline void
yp_interpolated_xstring_node_append(yp_interpolated_x_string_node_t *node, yp_node_t *part) {
yp_node_list_append(&node->parts, part);
node->base.location.end = part->location.end;
}
static inline void
yp_interpolated_xstring_node_closing_set(yp_interpolated_x_string_node_t *node, const yp_token_t *closing) {
node->closing_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(closing);
node->base.location.end = closing->end;
}
// Allocate a new KeywordHashNode node.
static yp_keyword_hash_node_t *
yp_keyword_hash_node_create(yp_parser_t *parser) {
yp_keyword_hash_node_t *node = YP_ALLOC_NODE(parser, yp_keyword_hash_node_t);
*node = (yp_keyword_hash_node_t) {
.base = {
.type = YP_NODE_KEYWORD_HASH_NODE,
.location = {
.start = NULL,
.end = NULL
},
},
.elements = YP_EMPTY_NODE_LIST
};
return node;
}
// Append an element to a KeywordHashNode node.
static void
yp_keyword_hash_node_elements_append(yp_keyword_hash_node_t *hash, yp_node_t *element) {
yp_node_list_append(&hash->elements, element);
if (hash->base.location.start == NULL) {
hash->base.location.start = element->location.start;
}
hash->base.location.end = element->location.end;
}
// Allocate a new KeywordParameterNode node.
static yp_keyword_parameter_node_t *
yp_keyword_parameter_node_create(yp_parser_t *parser, const yp_token_t *name, yp_node_t *value) {
yp_keyword_parameter_node_t *node = YP_ALLOC_NODE(parser, yp_keyword_parameter_node_t);
*node = (yp_keyword_parameter_node_t) {
{
.type = YP_NODE_KEYWORD_PARAMETER_NODE,
.location = {
.start = name->start,
.end = value == NULL ? name->end : value->location.end
},
},
.name_loc = YP_LOCATION_TOKEN_VALUE(name),
.value = value
};
return node;
}
// Allocate a new KeywordRestParameterNode node.
static yp_keyword_rest_parameter_node_t *
yp_keyword_rest_parameter_node_create(yp_parser_t *parser, const yp_token_t *operator, const yp_token_t *name) {
yp_keyword_rest_parameter_node_t *node = YP_ALLOC_NODE(parser, yp_keyword_rest_parameter_node_t);
*node = (yp_keyword_rest_parameter_node_t) {
{
.type = YP_NODE_KEYWORD_REST_PARAMETER_NODE,
.location = {
.start = operator->start,
.end = (name->type == YP_TOKEN_NOT_PROVIDED ? operator->end : name->end)
},
},
.operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
.name_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(name)
};
return node;
}
// Allocate a new LambdaNode node.
static yp_lambda_node_t *
yp_lambda_node_create(
yp_parser_t *parser,
yp_constant_id_list_t *locals,
const yp_token_t *opening,
yp_block_parameters_node_t *parameters,
yp_node_t *statements,
const yp_token_t *closing
) {
yp_lambda_node_t *node = YP_ALLOC_NODE(parser, yp_lambda_node_t);
*node = (yp_lambda_node_t) {
{
.type = YP_NODE_LAMBDA_NODE,
.location = {
.start = opening->start,
.end = closing->end
},
},
.locals = *locals,
.opening_loc = YP_LOCATION_TOKEN_VALUE(opening),
.parameters = parameters,
.statements = statements
};
return node;
}
// Allocate and initialize a new LocalVariableOperatorAndWriteNode node.
static yp_local_variable_operator_and_write_node_t *
yp_local_variable_operator_and_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value, yp_constant_id_t constant_id) {
assert(target->type == YP_NODE_LOCAL_VARIABLE_READ_NODE || target->type == YP_NODE_CALL_NODE);
assert(operator->type == YP_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
yp_local_variable_operator_and_write_node_t *node = YP_ALLOC_NODE(parser, yp_local_variable_operator_and_write_node_t);
*node = (yp_local_variable_operator_and_write_node_t) {
{
.type = YP_NODE_LOCAL_VARIABLE_OPERATOR_AND_WRITE_NODE,
.location = {
.start = target->location.start,
.end = value->location.end
}
},
.name_loc = target->location,
.operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
.value = value,
.constant_id = constant_id
};
return node;
}
// Allocate and initialize a new LocalVariableOperatorWriteNode node.
static yp_local_variable_operator_write_node_t *
yp_local_variable_operator_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value, yp_constant_id_t constant_id) {
yp_local_variable_operator_write_node_t *node = YP_ALLOC_NODE(parser, yp_local_variable_operator_write_node_t);
*node = (yp_local_variable_operator_write_node_t) {
{
.type = YP_NODE_LOCAL_VARIABLE_OPERATOR_WRITE_NODE,
.location = {
.start = target->location.start,
.end = value->location.end
}
},
.name_loc = target->location,
.operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
.value = value,
.constant_id = constant_id,
.operator_id = yp_parser_constant_id_location(parser, operator->start, operator->end - 1)
};
return node;
}
// Allocate and initialize a new LocalVariableOperatorOrWriteNode node.
static yp_local_variable_operator_or_write_node_t *
yp_local_variable_operator_or_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value, yp_constant_id_t constant_id) {
assert(target->type == YP_NODE_LOCAL_VARIABLE_READ_NODE || target->type == YP_NODE_CALL_NODE);
assert(operator->type == YP_TOKEN_PIPE_PIPE_EQUAL);
yp_local_variable_operator_or_write_node_t *node = YP_ALLOC_NODE(parser, yp_local_variable_operator_or_write_node_t);
*node = (yp_local_variable_operator_or_write_node_t) {
{
.type = YP_NODE_LOCAL_VARIABLE_OPERATOR_OR_WRITE_NODE,
.location = {
.start = target->location.start,
.end = value->location.end
}
},
.name_loc = target->location,
.operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
.value = value,
.constant_id = constant_id
};
return node;
}
// Allocate a new LocalVariableReadNode node.
static yp_local_variable_read_node_t *
yp_local_variable_read_node_create(yp_parser_t *parser, const yp_token_t *name, uint32_t depth) {
yp_local_variable_read_node_t *node = YP_ALLOC_NODE(parser, yp_local_variable_read_node_t);
*node = (yp_local_variable_read_node_t) {
{
.type = YP_NODE_LOCAL_VARIABLE_READ_NODE,
.location = YP_LOCATION_TOKEN_VALUE(name)
},
.constant_id = yp_parser_constant_id_token(parser, name),
.depth = depth
};
return node;
}
// Allocate and initialize a new LocalVariableWriteNode node.
static yp_local_variable_write_node_t *
yp_local_variable_write_node_create(yp_parser_t *parser, yp_constant_id_t constant_id, uint32_t depth, yp_node_t *value, const yp_location_t *name_loc, const yp_token_t *operator) {
yp_local_variable_write_node_t *node = YP_ALLOC_NODE(parser, yp_local_variable_write_node_t);
*node = (yp_local_variable_write_node_t) {
{
.type = YP_NODE_LOCAL_VARIABLE_WRITE_NODE,
.location = {
.start = name_loc->start,
.end = value == NULL ? name_loc->end : value->location.end
}
},
.constant_id = constant_id,
.depth = depth,
.value = value,
.name_loc = *name_loc,
.operator_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(operator)
};
return node;
}
// Allocate and initialize a new LocalVariableWriteNode node without an operator or target.
static yp_local_variable_write_node_t *
yp_local_variable_target_node_create(yp_parser_t *parser, const yp_token_t *name) {
yp_local_variable_write_node_t *node = YP_ALLOC_NODE(parser, yp_local_variable_write_node_t);
*node = (yp_local_variable_write_node_t) {
{
.type = YP_NODE_LOCAL_VARIABLE_WRITE_NODE,
.location = YP_LOCATION_TOKEN_VALUE(name)
},
.constant_id = yp_parser_constant_id_token(parser, name),
.depth = 0,
.value = NULL,
.name_loc = YP_LOCATION_TOKEN_VALUE(name),
.operator_loc = { .start = NULL, .end = NULL }
};
return node;
}
// Allocate and initialize a new MatchPredicateNode node.
static yp_match_predicate_node_t *
yp_match_predicate_node_create(yp_parser_t *parser, yp_node_t *value, yp_node_t *pattern, const yp_token_t *operator) {
yp_match_predicate_node_t *node = YP_ALLOC_NODE(parser, yp_match_predicate_node_t);
*node = (yp_match_predicate_node_t) {
{
.type = YP_NODE_MATCH_PREDICATE_NODE,
.location = {
.start = value->location.start,
.end = pattern->location.end
}
},
.value = value,
.pattern = pattern,
.operator_loc = YP_LOCATION_TOKEN_VALUE(operator)
};
return node;
}
// Allocate and initialize a new MatchRequiredNode node.
static yp_match_required_node_t *
yp_match_required_node_create(yp_parser_t *parser, yp_node_t *value, yp_node_t *pattern, const yp_token_t *operator) {
yp_match_required_node_t *node = YP_ALLOC_NODE(parser, yp_match_required_node_t);
*node = (yp_match_required_node_t) {
{
.type = YP_NODE_MATCH_REQUIRED_NODE,
.location = {
.start = value->location.start,
.end = pattern->location.end
}
},
.value = value,
.pattern = pattern,
.operator_loc = YP_LOCATION_TOKEN_VALUE(operator)
};
return node;
}
// Allocate a new ModuleNode node.
static yp_module_node_t *
yp_module_node_create(yp_parser_t *parser, yp_constant_id_list_t *locals, const yp_token_t *module_keyword, yp_node_t *constant_path, yp_node_t *statements, const yp_token_t *end_keyword) {
yp_module_node_t *node = YP_ALLOC_NODE(parser, yp_module_node_t);
*node = (yp_module_node_t) {
{
.type = YP_NODE_MODULE_NODE,
.location = {
.start = module_keyword->start,
.end = end_keyword->end
}
},
.locals = (locals == NULL ? ((yp_constant_id_list_t) { .ids = NULL, .size = 0, .capacity = 0 }) : *locals),
.module_keyword_loc = YP_LOCATION_TOKEN_VALUE(module_keyword),
.constant_path = constant_path,
.statements = statements,
.end_keyword_loc = YP_LOCATION_TOKEN_VALUE(end_keyword)
};
return node;
}
// Allocate a new MultiWriteNode node.
static yp_multi_write_node_t *
yp_multi_write_node_create(yp_parser_t *parser, const yp_token_t *operator, yp_node_t *value, const yp_location_t *lparen_loc, const yp_location_t *rparen_loc) {
yp_multi_write_node_t *node = YP_ALLOC_NODE(parser, yp_multi_write_node_t);
*node = (yp_multi_write_node_t) {
{
.type = YP_NODE_MULTI_WRITE_NODE,
.location = { .start = NULL, .end = NULL },
},
.operator_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
.value = value,
.lparen_loc = *lparen_loc,
.rparen_loc = *rparen_loc,
.targets = YP_EMPTY_NODE_LIST
};
return node;
}
// Append a target to a MultiWriteNode node.
static void
yp_multi_write_node_targets_append(yp_multi_write_node_t *node, yp_node_t *target) {
yp_node_list_append(&node->targets, target);
if (node->base.location.start == NULL || (node->base.location.start > target->location.start)) {
node->base.location.start = target->location.start;
}
if (node->base.location.end == NULL || (node->base.location.end < target->location.end)) {
node->base.location.end = target->location.end;
}
}
static inline void
yp_multi_write_node_operator_loc_set(yp_multi_write_node_t *node, const yp_token_t *operator) {
node->operator_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
}
// Allocate and initialize a new NextNode node.
static yp_next_node_t *
yp_next_node_create(yp_parser_t *parser, const yp_token_t *keyword, yp_arguments_node_t *arguments) {
assert(keyword->type == YP_TOKEN_KEYWORD_NEXT);
yp_next_node_t *node = YP_ALLOC_NODE(parser, yp_next_node_t);
*node = (yp_next_node_t) {
{
.type = YP_NODE_NEXT_NODE,
.location = {
.start = keyword->start,
.end = (arguments == NULL ? keyword->end : arguments->base.location.end)
}
},
.keyword_loc = YP_LOCATION_TOKEN_VALUE(keyword),
.arguments = arguments
};
return node;
}
// Allocate and initialize a new NilNode node.
static yp_nil_node_t *
yp_nil_node_create(yp_parser_t *parser, const yp_token_t *token) {
assert(token->type == YP_TOKEN_KEYWORD_NIL);
yp_nil_node_t *node = YP_ALLOC_NODE(parser, yp_nil_node_t);
*node = (yp_nil_node_t) {{ .type = YP_NODE_NIL_NODE, .location = YP_LOCATION_TOKEN_VALUE(token) }};
return node;
}
// Allocate and initialize a new NoKeywordsParameterNode node.
static yp_no_keywords_parameter_node_t *
yp_no_keywords_parameter_node_create(yp_parser_t *parser, const yp_token_t *operator, const yp_token_t *keyword) {
assert(operator->type == YP_TOKEN_USTAR_STAR);
assert(keyword->type == YP_TOKEN_KEYWORD_NIL);
yp_no_keywords_parameter_node_t *node = YP_ALLOC_NODE(parser, yp_no_keywords_parameter_node_t);
*node = (yp_no_keywords_parameter_node_t) {
{
.type = YP_NODE_NO_KEYWORDS_PARAMETER_NODE,
.location = {
.start = operator->start,
.end = keyword->end
}
},
.operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
.keyword_loc = YP_LOCATION_TOKEN_VALUE(keyword)
};
return node;
}
// Allocate a new NthReferenceReadNode node.
static yp_numbered_reference_read_node_t *
yp_numbered_reference_read_node_create(yp_parser_t *parser, const yp_token_t *name) {
assert(name->type == YP_TOKEN_NUMBERED_REFERENCE);
yp_numbered_reference_read_node_t *node = YP_ALLOC_NODE(parser, yp_numbered_reference_read_node_t);
*node = (yp_numbered_reference_read_node_t) {
{
.type = YP_NODE_NUMBERED_REFERENCE_READ_NODE,
.location = YP_LOCATION_TOKEN_VALUE(name),
}
};
return node;
}
// Allocate a new OptionalParameterNode node.
static yp_optional_parameter_node_t *
yp_optional_parameter_node_create(yp_parser_t *parser, const yp_token_t *name, const yp_token_t *operator, yp_node_t *value) {
yp_optional_parameter_node_t *node = YP_ALLOC_NODE(parser, yp_optional_parameter_node_t);
*node = (yp_optional_parameter_node_t) {
{
.type = YP_NODE_OPTIONAL_PARAMETER_NODE,
.location = {
.start = name->start,
.end = value->location.end
}
},
.constant_id = yp_parser_constant_id_token(parser, name),
.name_loc = YP_LOCATION_TOKEN_VALUE(name),
.operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
.value = value
};
return node;
}
// Allocate and initialize a new OrNode node.
static yp_or_node_t *
yp_or_node_create(yp_parser_t *parser, yp_node_t *left, const yp_token_t *operator, yp_node_t *right) {
yp_or_node_t *node = YP_ALLOC_NODE(parser, yp_or_node_t);
*node = (yp_or_node_t) {
{
.type = YP_NODE_OR_NODE,
.location = {
.start = left->location.start,
.end = right->location.end
}
},
.left = left,
.right = right,
.operator_loc = YP_LOCATION_TOKEN_VALUE(operator)
};
return node;
}
// Allocate and initialize a new ParametersNode node.
static yp_parameters_node_t *
yp_parameters_node_create(yp_parser_t *parser) {
yp_parameters_node_t *node = YP_ALLOC_NODE(parser, yp_parameters_node_t);
*node = (yp_parameters_node_t) {
{
.type = YP_NODE_PARAMETERS_NODE,
.location = { .start = parser->current.start, .end = parser->current.start },
},
.rest = NULL,
.keyword_rest = NULL,
.block = NULL,
.requireds = YP_EMPTY_NODE_LIST,
.optionals = YP_EMPTY_NODE_LIST,
.posts = YP_EMPTY_NODE_LIST,
.keywords = YP_EMPTY_NODE_LIST
};
return node;
}
// Set the location properly for the parameters node.
static void
yp_parameters_node_location_set(yp_parameters_node_t *params, yp_node_t *param) {
if (params->base.location.start == NULL) {
params->base.location.start = param->location.start;
} else {
params->base.location.start = params->base.location.start < param->location.start ? params->base.location.start : param->location.start;
}
if (params->base.location.end == NULL) {
params->base.location.end = param->location.end;
} else {
params->base.location.end = params->base.location.end > param->location.end ? params->base.location.end : param->location.end;
}
}
// Append a required parameter to a ParametersNode node.
static void
yp_parameters_node_requireds_append(yp_parameters_node_t *params, yp_node_t *param) {
yp_parameters_node_location_set(params, param);
yp_node_list_append(&params->requireds, param);
}
// Append an optional parameter to a ParametersNode node.
static void
yp_parameters_node_optionals_append(yp_parameters_node_t *params, yp_optional_parameter_node_t *param) {
yp_parameters_node_location_set(params, (yp_node_t *) param);
yp_node_list_append(&params->optionals, (yp_node_t *) param);
}
// Append a post optional arguments parameter to a ParametersNode node.
static void
yp_parameters_node_posts_append(yp_parameters_node_t *params, yp_node_t *param) {
yp_parameters_node_location_set(params, param);
yp_node_list_append(&params->posts, param);
}
// Set the rest parameter on a ParametersNode node.
static void
yp_parameters_node_rest_set(yp_parameters_node_t *params, yp_rest_parameter_node_t *param) {
yp_parameters_node_location_set(params, (yp_node_t *) param);
params->rest = param;
}
// Append a keyword parameter to a ParametersNode node.
static void
yp_parameters_node_keywords_append(yp_parameters_node_t *params, yp_node_t *param) {
yp_parameters_node_location_set(params, param);
yp_node_list_append(&params->keywords, param);
}
// Set the keyword rest parameter on a ParametersNode node.
static void
yp_parameters_node_keyword_rest_set(yp_parameters_node_t *params, yp_node_t *param) {
yp_parameters_node_location_set(params, param);
params->keyword_rest = param;
}
// Set the block parameter on a ParametersNode node.
static void
yp_parameters_node_block_set(yp_parameters_node_t *params, yp_block_parameter_node_t *param) {
yp_parameters_node_location_set(params, (yp_node_t *) param);
params->block = param;
}
// Allocate a new ProgramNode node.
static yp_program_node_t *
yp_program_node_create(yp_parser_t *parser, yp_constant_id_list_t *locals, yp_statements_node_t *statements) {
yp_program_node_t *node = YP_ALLOC_NODE(parser, yp_program_node_t);
*node = (yp_program_node_t) {
{
.type = YP_NODE_PROGRAM_NODE,
.location = {
.start = statements == NULL ? parser->start : statements->base.location.start,
.end = statements == NULL ? parser->end : statements->base.location.end
}
},
.locals = *locals,
.statements = statements
};
return node;
}
// Allocate and initialize new ParenthesesNode node.
static yp_parentheses_node_t *
yp_parentheses_node_create(yp_parser_t *parser, const yp_token_t *opening, yp_node_t *statements, const yp_token_t *closing) {
yp_parentheses_node_t *node = YP_ALLOC_NODE(parser, yp_parentheses_node_t);
*node = (yp_parentheses_node_t) {
{
.type = YP_NODE_PARENTHESES_NODE,
.location = {
.start = opening->start,
.end = closing->end
}
},
.statements = statements,
.opening_loc = YP_LOCATION_TOKEN_VALUE(opening),
.closing_loc = YP_LOCATION_TOKEN_VALUE(closing)
};
return node;
}
// Allocate and initialize a new PinnedExpressionNode node.
static yp_pinned_expression_node_t *
yp_pinned_expression_node_create(yp_parser_t *parser, yp_node_t *expression, const yp_token_t *operator, const yp_token_t *lparen, const yp_token_t *rparen) {
yp_pinned_expression_node_t *node = YP_ALLOC_NODE(parser, yp_pinned_expression_node_t);
*node = (yp_pinned_expression_node_t) {
{
.type = YP_NODE_PINNED_EXPRESSION_NODE,
.location = {
.start = operator->start,
.end = rparen->end
}
},
.expression = expression,
.operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
.lparen_loc = YP_LOCATION_TOKEN_VALUE(lparen),
.rparen_loc = YP_LOCATION_TOKEN_VALUE(rparen)
};
return node;
}
// Allocate and initialize a new PinnedVariableNode node.
static yp_pinned_variable_node_t *
yp_pinned_variable_node_create(yp_parser_t *parser, const yp_token_t *operator, yp_node_t *variable) {
yp_pinned_variable_node_t *node = YP_ALLOC_NODE(parser, yp_pinned_variable_node_t);
*node = (yp_pinned_variable_node_t) {
{
.type = YP_NODE_PINNED_VARIABLE_NODE,
.location = {
.start = operator->start,
.end = variable->location.end
}
},
.variable = variable,
.operator_loc = YP_LOCATION_TOKEN_VALUE(operator)
};
return node;
}
// Allocate and initialize a new PostExecutionNode node.
static yp_post_execution_node_t *
yp_post_execution_node_create(yp_parser_t *parser, const yp_token_t *keyword, const yp_token_t *opening, yp_statements_node_t *statements, const yp_token_t *closing) {
yp_post_execution_node_t *node = YP_ALLOC_NODE(parser, yp_post_execution_node_t);
*node = (yp_post_execution_node_t) {
{
.type = YP_NODE_POST_EXECUTION_NODE,
.location = {
.start = keyword->start,
.end = closing->end
}
},
.statements = statements,
.keyword_loc = YP_LOCATION_TOKEN_VALUE(keyword),
.opening_loc = YP_LOCATION_TOKEN_VALUE(opening),
.closing_loc = YP_LOCATION_TOKEN_VALUE(closing)
};
return node;
}
// Allocate and initialize a new PreExecutionNode node.
static yp_pre_execution_node_t *
yp_pre_execution_node_create(yp_parser_t *parser, const yp_token_t *keyword, const yp_token_t *opening, yp_statements_node_t *statements, const yp_token_t *closing) {
yp_pre_execution_node_t *node = YP_ALLOC_NODE(parser, yp_pre_execution_node_t);
*node = (yp_pre_execution_node_t) {
{
.type = YP_NODE_PRE_EXECUTION_NODE,
.location = {
.start = keyword->start,
.end = closing->end
}
},
.statements = statements,
.keyword_loc = YP_LOCATION_TOKEN_VALUE(keyword),
.opening_loc = YP_LOCATION_TOKEN_VALUE(opening),
.closing_loc = YP_LOCATION_TOKEN_VALUE(closing)
};
return node;
}
// Allocate and initialize new RangeNode node.
static yp_range_node_t *
yp_range_node_create(yp_parser_t *parser, yp_node_t *left, const yp_token_t *operator, yp_node_t *right) {
yp_range_node_t *node = YP_ALLOC_NODE(parser, yp_range_node_t);
*node = (yp_range_node_t) {
{
.type = YP_NODE_RANGE_NODE,
.location = {
.start = (left == NULL ? operator->start : left->location.start),
.end = (right == NULL ? operator->end : right->location.end)
}
},
.left = left,
.right = right,
.operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
.flags = 0,
};
switch (operator->type) {
case YP_TOKEN_DOT_DOT_DOT:
case YP_TOKEN_UDOT_DOT_DOT:
node->flags |= YP_RANGE_NODE_FLAGS_EXCLUDE_END;
break;
default:
break;
}
return node;
}
// Allocate and initialize a new RedoNode node.
static yp_redo_node_t *
yp_redo_node_create(yp_parser_t *parser, const yp_token_t *token) {
assert(token->type == YP_TOKEN_KEYWORD_REDO);
yp_redo_node_t *node = YP_ALLOC_NODE(parser, yp_redo_node_t);
*node = (yp_redo_node_t) {{ .type = YP_NODE_REDO_NODE, .location = YP_LOCATION_TOKEN_VALUE(token) }};
return node;
}
// Allocate a new RegularExpressionNode node.
static yp_regular_expression_node_t *
yp_regular_expression_node_create(yp_parser_t *parser, const yp_token_t *opening, const yp_token_t *content, const yp_token_t *closing) {
yp_regular_expression_node_t *node = YP_ALLOC_NODE(parser, yp_regular_expression_node_t);
*node = (yp_regular_expression_node_t) {
{
.type = YP_NODE_REGULAR_EXPRESSION_NODE,
.location = {
.start = opening->start,
.end = closing->end
}
},
.opening_loc = YP_LOCATION_TOKEN_VALUE(opening),
.content_loc = YP_LOCATION_TOKEN_VALUE(content),
.closing_loc = YP_LOCATION_TOKEN_VALUE(closing),
.flags = yp_regular_expression_flags_create(closing)
};
return node;
}
// Allocate a new RequiredDestructuredParameterNode node.
static yp_required_destructured_parameter_node_t *
yp_required_destructured_parameter_node_create(yp_parser_t *parser, const yp_token_t *opening) {
yp_required_destructured_parameter_node_t *node = YP_ALLOC_NODE(parser, yp_required_destructured_parameter_node_t);
*node = (yp_required_destructured_parameter_node_t) {
{
.type = YP_NODE_REQUIRED_DESTRUCTURED_PARAMETER_NODE,
.location = YP_LOCATION_TOKEN_VALUE(opening)
},
.opening_loc = YP_LOCATION_TOKEN_VALUE(opening),
.closing_loc = { .start = NULL, .end = NULL },
.parameters = YP_EMPTY_NODE_LIST
};
return node;
}
// Append a new parameter to the given RequiredDestructuredParameterNode node.
static void
yp_required_destructured_parameter_node_append_parameter(yp_required_destructured_parameter_node_t *node, yp_node_t *parameter) {
yp_node_list_append(&node->parameters, parameter);
}
// Set the closing token of the given RequiredDestructuredParameterNode node.
static void
yp_required_destructured_parameter_node_closing_set(yp_required_destructured_parameter_node_t *node, const yp_token_t *closing) {
node->closing_loc = YP_LOCATION_TOKEN_VALUE(closing);
node->base.location.end = closing->end;
}
// Allocate a new RequiredParameterNode node.
static yp_required_parameter_node_t *
yp_required_parameter_node_create(yp_parser_t *parser, const yp_token_t *token) {
yp_required_parameter_node_t *node = YP_ALLOC_NODE(parser, yp_required_parameter_node_t);
*node = (yp_required_parameter_node_t) {
{
.type = YP_NODE_REQUIRED_PARAMETER_NODE,
.location = YP_LOCATION_TOKEN_VALUE(token)
},
.constant_id = yp_parser_constant_id_token(parser, token)
};
return node;
}
// Allocate a new RescueModifierNode node.
static yp_rescue_modifier_node_t *
yp_rescue_modifier_node_create(yp_parser_t *parser, yp_node_t *expression, const yp_token_t *keyword, yp_node_t *rescue_expression) {
yp_rescue_modifier_node_t *node = YP_ALLOC_NODE(parser, yp_rescue_modifier_node_t);
*node = (yp_rescue_modifier_node_t) {
{
.type = YP_NODE_RESCUE_MODIFIER_NODE,
.location = {
.start = expression->location.start,
.end = rescue_expression->location.end
}
},
.expression = expression,
.keyword_loc = YP_LOCATION_TOKEN_VALUE(keyword),
.rescue_expression = rescue_expression
};
return node;
}
// Allocate and initiliaze a new RescueNode node.
static yp_rescue_node_t *
yp_rescue_node_create(yp_parser_t *parser, const yp_token_t *keyword) {
yp_rescue_node_t *node = YP_ALLOC_NODE(parser, yp_rescue_node_t);
*node = (yp_rescue_node_t) {
{
.type = YP_NODE_RESCUE_NODE,
.location = {
.start = keyword->start,
.end = keyword->end
}
},
.keyword_loc = YP_LOCATION_TOKEN_VALUE(keyword),
.operator_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
.exception = NULL,
.statements = NULL,
.consequent = NULL,
.exceptions = YP_EMPTY_NODE_LIST
};
return node;
}
static inline void
yp_rescue_node_operator_set(yp_rescue_node_t *node, const yp_token_t *operator) {
node->operator_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
}
// Set the exception of a rescue node, and update the location of the node.
static void
yp_rescue_node_exception_set(yp_rescue_node_t *node, yp_node_t *exception) {
node->exception = exception;
node->base.location.end = exception->location.end;
}
// Set the statements of a rescue node, and update the location of the node.
static void
yp_rescue_node_statements_set(yp_rescue_node_t *node, yp_statements_node_t *statements) {
node->statements = statements;
if ((statements != NULL) && (statements->body.size > 0)) {
node->base.location.end = statements->base.location.end;
}
}
// Set the consequent of a rescue node, and update the location.
static void
yp_rescue_node_consequent_set(yp_rescue_node_t *node, yp_rescue_node_t *consequent) {
node->consequent = consequent;
node->base.location.end = consequent->base.location.end;
}
// Append an exception node to a rescue node, and update the location.
static void
yp_rescue_node_exceptions_append(yp_rescue_node_t *node, yp_node_t *exception) {
yp_node_list_append(&node->exceptions, exception);
node->base.location.end = exception->location.end;
}
// Allocate a new RestParameterNode node.
static yp_rest_parameter_node_t *
yp_rest_parameter_node_create(yp_parser_t *parser, const yp_token_t *operator, const yp_token_t *name) {
yp_rest_parameter_node_t *node = YP_ALLOC_NODE(parser, yp_rest_parameter_node_t);
*node = (yp_rest_parameter_node_t) {
{
.type = YP_NODE_REST_PARAMETER_NODE,
.location = {
.start = operator->start,
.end = (name->type == YP_TOKEN_NOT_PROVIDED ? operator->end : name->end)
}
},
.operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
.name_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(name)
};
return node;
}
// Allocate and initialize a new RetryNode node.
static yp_retry_node_t *
yp_retry_node_create(yp_parser_t *parser, const yp_token_t *token) {
assert(token->type == YP_TOKEN_KEYWORD_RETRY);
yp_retry_node_t *node = YP_ALLOC_NODE(parser, yp_retry_node_t);
*node = (yp_retry_node_t) {{ .type = YP_NODE_RETRY_NODE, .location = YP_LOCATION_TOKEN_VALUE(token) }};
return node;
}
// Allocate a new ReturnNode node.
static yp_return_node_t *
yp_return_node_create(yp_parser_t *parser, const yp_token_t *keyword, yp_arguments_node_t *arguments) {
yp_return_node_t *node = YP_ALLOC_NODE(parser, yp_return_node_t);
*node = (yp_return_node_t) {
{
.type = YP_NODE_RETURN_NODE,
.location = {
.start = keyword->start,
.end = (arguments == NULL ? keyword->end : arguments->base.location.end)
}
},
.keyword_loc = YP_LOCATION_TOKEN_VALUE(keyword),
.arguments = arguments
};
return node;
}
// Allocate and initialize a new SelfNode node.
static yp_self_node_t *
yp_self_node_create(yp_parser_t *parser, const yp_token_t *token) {
assert(token->type == YP_TOKEN_KEYWORD_SELF);
yp_self_node_t *node = YP_ALLOC_NODE(parser, yp_self_node_t);
*node = (yp_self_node_t) {{ .type = YP_NODE_SELF_NODE, .location = YP_LOCATION_TOKEN_VALUE(token) }};
return node;
}
// Allocate a new SingletonClassNode node.
static yp_singleton_class_node_t *
yp_singleton_class_node_create(yp_parser_t *parser, yp_constant_id_list_t *locals, const yp_token_t *class_keyword, const yp_token_t *operator, yp_node_t *expression, yp_node_t *statements, const yp_token_t *end_keyword) {
yp_singleton_class_node_t *node = YP_ALLOC_NODE(parser, yp_singleton_class_node_t);
*node = (yp_singleton_class_node_t) {
{
.type = YP_NODE_SINGLETON_CLASS_NODE,
.location = {
.start = class_keyword->start,
.end = end_keyword->end
}
},
.locals = *locals,
.class_keyword_loc = YP_LOCATION_TOKEN_VALUE(class_keyword),
.operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
.expression = expression,
.statements = statements,
.end_keyword_loc = YP_LOCATION_TOKEN_VALUE(end_keyword)
};
return node;
}
// Allocate and initialize a new SourceEncodingNode node.
static yp_source_encoding_node_t *
yp_source_encoding_node_create(yp_parser_t *parser, const yp_token_t *token) {
assert(token->type == YP_TOKEN_KEYWORD___ENCODING__);
yp_source_encoding_node_t *node = YP_ALLOC_NODE(parser, yp_source_encoding_node_t);
*node = (yp_source_encoding_node_t) {{ .type = YP_NODE_SOURCE_ENCODING_NODE, .location = YP_LOCATION_TOKEN_VALUE(token) }};
return node;
}
// Allocate and initialize a new SourceFileNode node.
static yp_source_file_node_t*
yp_source_file_node_create(yp_parser_t *parser, const yp_token_t *file_keyword) {
yp_source_file_node_t *node = YP_ALLOC_NODE(parser, yp_source_file_node_t);
assert(file_keyword->type == YP_TOKEN_KEYWORD___FILE__);
*node = (yp_source_file_node_t) {
{
.type = YP_NODE_SOURCE_FILE_NODE,
.location = YP_LOCATION_TOKEN_VALUE(file_keyword),
},
.filepath = parser->filepath_string,
};
return node;
}
// Allocate and initialize a new SourceLineNode node.
static yp_source_line_node_t *
yp_source_line_node_create(yp_parser_t *parser, const yp_token_t *token) {
assert(token->type == YP_TOKEN_KEYWORD___LINE__);
yp_source_line_node_t *node = YP_ALLOC_NODE(parser, yp_source_line_node_t);
*node = (yp_source_line_node_t) {{ .type = YP_NODE_SOURCE_LINE_NODE, .location = YP_LOCATION_TOKEN_VALUE(token) }};
return node;
}
// Allocate a new SplatNode node.
static yp_splat_node_t *
yp_splat_node_create(yp_parser_t *parser, const yp_token_t *operator, yp_node_t *expression) {
yp_splat_node_t *node = YP_ALLOC_NODE(parser, yp_splat_node_t);
*node = (yp_splat_node_t) {
{
.type = YP_NODE_SPLAT_NODE,
.location = {
.start = operator->start,
.end = (expression == NULL ? operator->end : expression->location.end)
}
},
.operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
.expression = expression
};
return node;
}
// Allocate and initialize a new StatementsNode node.
static yp_statements_node_t *
yp_statements_node_create(yp_parser_t *parser) {
yp_statements_node_t *node = YP_ALLOC_NODE(parser, yp_statements_node_t);
*node = (yp_statements_node_t) {
{
.type = YP_NODE_STATEMENTS_NODE,
.location = YP_LOCATION_NULL_VALUE(parser)
},
.body = YP_EMPTY_NODE_LIST
};
return node;
}
// Get the length of the given StatementsNode node's body.
static size_t
yp_statements_node_body_length(yp_statements_node_t *node) {
return node && node->body.size;
}
// Set the location of the given StatementsNode.
static void
yp_statements_node_location_set(yp_statements_node_t *node, const char *start, const char *end) {
node->base.location = (yp_location_t) { .start = start, .end = end };
}
// Append a new node to the given StatementsNode node's body.
static void
yp_statements_node_body_append(yp_statements_node_t *node, yp_node_t *statement) {
if (yp_statements_node_body_length(node) == 0) {
node->base.location.start = statement->location.start;
}
yp_node_list_append(&node->body, statement);
node->base.location.end = statement->location.end;
}
// Allocate a new StringConcatNode node.
static yp_string_concat_node_t *
yp_string_concat_node_create(yp_parser_t *parser, yp_node_t *left, yp_node_t *right) {
yp_string_concat_node_t *node = YP_ALLOC_NODE(parser, yp_string_concat_node_t);
*node = (yp_string_concat_node_t) {
{
.type = YP_NODE_STRING_CONCAT_NODE,
.location = {
.start = left->location.start,
.end = right->location.end
}
},
.left = left,
.right = right
};
return node;
}
// Allocate a new StringNode node.
static yp_string_node_t *
yp_string_node_create(yp_parser_t *parser, const yp_token_t *opening, const yp_token_t *content, const yp_token_t *closing) {
yp_string_node_t *node = YP_ALLOC_NODE(parser, yp_string_node_t);
*node = (yp_string_node_t) {
{
.type = YP_NODE_STRING_NODE,
.location = {
.start = (opening->type == YP_TOKEN_NOT_PROVIDED ? content->start : opening->start),
.end = (closing->type == YP_TOKEN_NOT_PROVIDED ? content->end : closing->end)
}
},
.opening_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
.content_loc = YP_LOCATION_TOKEN_VALUE(content),
.closing_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
.unescaped = YP_EMPTY_STRING
};
return node;
}
// Allocate and initialize a new SuperNode node.
static yp_super_node_t *
yp_super_node_create(yp_parser_t *parser, const yp_token_t *keyword, yp_arguments_t *arguments) {
assert(keyword->type == YP_TOKEN_KEYWORD_SUPER);
yp_super_node_t *node = YP_ALLOC_NODE(parser, yp_super_node_t);
const char *end;
if (arguments->block != NULL) {
end = arguments->block->base.location.end;
} else if (arguments->closing_loc.start != NULL) {
end = arguments->closing_loc.end;
} else if (arguments->arguments != NULL) {
end = arguments->arguments->base.location.end;
} else {
assert(false && "unreachable");
end = NULL;
}
*node = (yp_super_node_t) {
{
.type = YP_NODE_SUPER_NODE,
.location = {
.start = keyword->start,
.end = end,
}
},
.keyword_loc = YP_LOCATION_TOKEN_VALUE(keyword),
.lparen_loc = arguments->opening_loc,
.arguments = arguments->arguments,
.rparen_loc = arguments->closing_loc,
.block = arguments->block
};
return node;
}
// Allocate a new SymbolNode node.
static yp_symbol_node_t *
yp_symbol_node_create(yp_parser_t *parser, const yp_token_t *opening, const yp_token_t *value, const yp_token_t *closing) {
yp_symbol_node_t *node = YP_ALLOC_NODE(parser, yp_symbol_node_t);
*node = (yp_symbol_node_t) {
{
.type = YP_NODE_SYMBOL_NODE,
.location = {
.start = (opening->type == YP_TOKEN_NOT_PROVIDED ? value->start : opening->start),
.end = (closing->type == YP_TOKEN_NOT_PROVIDED ? value->end : closing->end)
}
},
.opening_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
.value_loc = YP_LOCATION_TOKEN_VALUE(value),
.closing_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
.unescaped = YP_EMPTY_STRING
};
return node;
}
// Allocate and initialize a new SymbolNode node from a label.
static yp_symbol_node_t *
yp_symbol_node_label_create(yp_parser_t *parser, const yp_token_t *token) {
yp_symbol_node_t *node;
switch (token->type) {
case YP_TOKEN_LABEL: {
yp_token_t opening = not_provided(parser);
yp_token_t closing = { .type = YP_TOKEN_LABEL_END, .start = token->end - 1, .end = token->end };
yp_token_t label = { .type = YP_TOKEN_LABEL, .start = token->start, .end = token->end - 1 };
node = yp_symbol_node_create(parser, &opening, &label, &closing);
ptrdiff_t length = label.end - label.start;
assert(length >= 0);
yp_unescape_manipulate_string(parser, label.start, (size_t) length, &node->unescaped, YP_UNESCAPE_ALL, &parser->error_list);
break;
}
case YP_TOKEN_MISSING: {
yp_token_t opening = not_provided(parser);
yp_token_t closing = not_provided(parser);
yp_token_t label = { .type = YP_TOKEN_LABEL, .start = token->start, .end = token->end };
node = yp_symbol_node_create(parser, &opening, &label, &closing);
break;
}
default:
assert(false && "unreachable");
node = NULL;
break;
}
return node;
}
// Check if the given node is a label in a hash.
static bool
yp_symbol_node_label_p(yp_node_t *node) {
const char *end = NULL;
switch (node->type) {
case YP_NODE_SYMBOL_NODE:
end = ((yp_symbol_node_t *) node)->closing_loc.end;
break;
case YP_NODE_INTERPOLATED_SYMBOL_NODE:
end = ((yp_interpolated_symbol_node_t *) node)->closing_loc.end;
break;
default:
return false;
}
return (end != NULL) && (end[-1] == ':');
}
// Convert the given SymbolNode node to a StringNode node.
static yp_string_node_t *
yp_symbol_node_to_string_node(yp_parser_t *parser, yp_symbol_node_t *node) {
yp_string_node_t *new_node = YP_ALLOC_NODE(parser, yp_string_node_t);
*new_node = (yp_string_node_t) {
{
.type = YP_NODE_STRING_NODE,
.location = node->base.location
},
.opening_loc = node->opening_loc,
.content_loc = node->value_loc,
.closing_loc = node->closing_loc,
.unescaped = node->unescaped
};
// We are explicitly _not_ using yp_node_destroy here because we don't want
// to trash the unescaped string. We could instead copy the string if we
// know that it is owned, but we're taking the fast path for now.
free(node);
return new_node;
}
// Allocate and initialize a new TrueNode node.
static yp_true_node_t *
yp_true_node_create(yp_parser_t *parser, const yp_token_t *token) {
assert(token->type == YP_TOKEN_KEYWORD_TRUE);
yp_true_node_t *node = YP_ALLOC_NODE(parser, yp_true_node_t);
*node = (yp_true_node_t) {{ .type = YP_NODE_TRUE_NODE, .location = YP_LOCATION_TOKEN_VALUE(token) }};
return node;
}
// Allocate and initialize a new UndefNode node.
static yp_undef_node_t *
yp_undef_node_create(yp_parser_t *parser, const yp_token_t *token) {
assert(token->type == YP_TOKEN_KEYWORD_UNDEF);
yp_undef_node_t *node = YP_ALLOC_NODE(parser, yp_undef_node_t);
*node = (yp_undef_node_t) {
{
.type = YP_NODE_UNDEF_NODE,
.location = YP_LOCATION_TOKEN_VALUE(token),
},
.keyword_loc = YP_LOCATION_TOKEN_VALUE(token),
.names = YP_EMPTY_NODE_LIST
};
return node;
}
// Append a name to an undef node.
static void
yp_undef_node_append(yp_undef_node_t *node, yp_node_t *name) {
node->base.location.end = name->location.end;
yp_node_list_append(&node->names, name);
}
// Allocate a new UnlessNode node.
static yp_unless_node_t *
yp_unless_node_create(yp_parser_t *parser, const yp_token_t *keyword, yp_node_t *predicate, yp_statements_node_t *statements) {
yp_unless_node_t *node = YP_ALLOC_NODE(parser, yp_unless_node_t);
const char *end;
if (statements != NULL) {
end = statements->base.location.end;
} else {
end = predicate->location.end;
}
*node = (yp_unless_node_t) {
{
.type = YP_NODE_UNLESS_NODE,
.location = {
.start = keyword->start,
.end = end
},
},
.keyword_loc = YP_LOCATION_TOKEN_VALUE(keyword),
.predicate = predicate,
.statements = statements,
.consequent = NULL,
.end_keyword_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
};
return node;
}
// Allocate and initialize new UnlessNode node in the modifier form.
static yp_unless_node_t *
yp_unless_node_modifier_create(yp_parser_t *parser, yp_node_t *statement, const yp_token_t *unless_keyword, yp_node_t *predicate) {
yp_unless_node_t *node = YP_ALLOC_NODE(parser, yp_unless_node_t);
yp_statements_node_t *statements = yp_statements_node_create(parser);
yp_statements_node_body_append(statements, statement);
*node = (yp_unless_node_t) {
{
.type = YP_NODE_UNLESS_NODE,
.location = {
.start = statement->location.start,
.end = predicate->location.end
},
},
.keyword_loc = YP_LOCATION_TOKEN_VALUE(unless_keyword),
.predicate = predicate,
.statements = statements,
.consequent = NULL,
.end_keyword_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
};
return node;
}
static inline void
yp_unless_node_end_keyword_loc_set(yp_unless_node_t *node, const yp_token_t *end_keyword) {
node->end_keyword_loc = YP_LOCATION_TOKEN_VALUE(end_keyword);
node->base.location.end = end_keyword->end;
}
// Allocate a new UntilNode node.
static yp_until_node_t *
yp_until_node_create(yp_parser_t *parser, const yp_token_t *keyword, yp_node_t *predicate, yp_statements_node_t *statements) {
yp_until_node_t *node = YP_ALLOC_NODE(parser, yp_until_node_t);
bool has_statements = (statements != NULL) && (statements->body.size != 0);
const char *start = NULL;
if (has_statements && (keyword->start > statements->base.location.start)) {
start = statements->base.location.start;
} else {
start = keyword->start;
}
const char *end = NULL;
if (has_statements && (predicate->location.end < statements->base.location.end)) {
end = statements->base.location.end;
} else {
end = predicate->location.end;
}
*node = (yp_until_node_t) {
{
.type = YP_NODE_UNTIL_NODE,
.location = {
.start = start,
.end = end,
},
},
.keyword_loc = YP_LOCATION_TOKEN_VALUE(keyword),
.predicate = predicate,
.statements = statements
};
return node;
}
// Allocate and initialize a new WhenNode node.
static yp_when_node_t *
yp_when_node_create(yp_parser_t *parser, const yp_token_t *keyword) {
yp_when_node_t *node = YP_ALLOC_NODE(parser, yp_when_node_t);
*node = (yp_when_node_t) {
{
.type = YP_NODE_WHEN_NODE,
.location = {
.start = keyword->start,
.end = NULL
}
},
.keyword_loc = YP_LOCATION_TOKEN_VALUE(keyword),
.statements = NULL,
.conditions = YP_EMPTY_NODE_LIST
};
return node;
}
// Append a new condition to a when node.
static void
yp_when_node_conditions_append(yp_when_node_t *node, yp_node_t *condition) {
node->base.location.end = condition->location.end;
yp_node_list_append(&node->conditions, condition);
}
// Set the statements list of a when node.
static void
yp_when_node_statements_set(yp_when_node_t *node, yp_statements_node_t *statements) {
if (statements->base.location.end > node->base.location.end) {
node->base.location.end = statements->base.location.end;
}
node->statements = statements;
}
// Allocate a new WhileNode node.
static yp_while_node_t *
yp_while_node_create(yp_parser_t *parser, const yp_token_t *keyword, yp_node_t *predicate, yp_statements_node_t *statements) {
yp_while_node_t *node = YP_ALLOC_NODE(parser, yp_while_node_t);
const char *start = NULL;
bool has_statements = (statements != NULL) && (statements->body.size != 0);
if (has_statements && (keyword->start > statements->base.location.start)) {
start = statements->base.location.start;
} else {
start = keyword->start;
}
const char *end = NULL;
if (has_statements && (predicate->location.end < statements->base.location.end)) {
end = statements->base.location.end;
} else {
end = predicate->location.end;
}
*node = (yp_while_node_t) {
{
.type = YP_NODE_WHILE_NODE,
.location = {
.start = start,
.end = end,
},
},
.keyword_loc = YP_LOCATION_TOKEN_VALUE(keyword),
.predicate = predicate,
.statements = statements
};
return node;
}
// Allocate and initialize a new XStringNode node.
static yp_x_string_node_t *
yp_xstring_node_create(yp_parser_t *parser, const yp_token_t *opening, const yp_token_t *content, const yp_token_t *closing) {
yp_x_string_node_t *node = YP_ALLOC_NODE(parser, yp_x_string_node_t);
*node = (yp_x_string_node_t) {
{
.type = YP_NODE_X_STRING_NODE,
.location = {
.start = opening->start,
.end = closing->end
},
},
.opening_loc = YP_LOCATION_TOKEN_VALUE(opening),
.content_loc = YP_LOCATION_TOKEN_VALUE(content),
.closing_loc = YP_LOCATION_TOKEN_VALUE(closing),
.unescaped = YP_EMPTY_STRING
};
return node;
}
// Allocate a new YieldNode node.
static yp_yield_node_t *
yp_yield_node_create(yp_parser_t *parser, const yp_token_t *keyword, const yp_location_t *lparen_loc, yp_arguments_node_t *arguments, const yp_location_t *rparen_loc) {
yp_yield_node_t *node = YP_ALLOC_NODE(parser, yp_yield_node_t);
const char *end;
if (rparen_loc->start != NULL) {
end = rparen_loc->end;
} else if (arguments != NULL) {
end = arguments->base.location.end;
} else if (lparen_loc->start != NULL) {
end = lparen_loc->end;
} else {
end = keyword->end;
}
*node = (yp_yield_node_t) {
{
.type = YP_NODE_YIELD_NODE,
.location = {
.start = keyword->start,
.end = end
},
},
.keyword_loc = YP_LOCATION_TOKEN_VALUE(keyword),
.lparen_loc = *lparen_loc,
.arguments = arguments,
.rparen_loc = *rparen_loc
};
return node;
}
#undef YP_EMPTY_STRING
#undef YP_LOCATION_NULL_VALUE
#undef YP_LOCATION_TOKEN_VALUE
#undef YP_LOCATION_NODE_VALUE
#undef YP_LOCATION_NODE_BASE_VALUE
#undef YP_TOKEN_NOT_PROVIDED_VALUE
#undef YP_ALLOC_NODE
/******************************************************************************/
/* Scope-related functions */
/******************************************************************************/
// Allocate and initialize a new scope. Push it onto the scope stack.
static bool
yp_parser_scope_push(yp_parser_t *parser, bool closed) {
yp_scope_t *scope = (yp_scope_t *) malloc(sizeof(yp_scope_t));
if (scope == NULL) return false;
*scope = (yp_scope_t) { .closed = closed, .previous = parser->current_scope };
yp_constant_id_list_init(&scope->locals);
parser->current_scope = scope;
return true;
}
// Check if the current scope has a given local variables.
static int
yp_parser_local_depth(yp_parser_t *parser, yp_token_t *token) {
yp_constant_id_t constant_id = yp_parser_constant_id_token(parser, token);
yp_scope_t *scope = parser->current_scope;
int depth = 0;
while (scope != NULL) {
if (yp_constant_id_list_includes(&scope->locals, constant_id)) return depth;
if (scope->closed) break;
scope = scope->previous;
depth++;
}
return -1;
}
// Add a local variable from a location to the current scope.
static void
yp_parser_local_add_location(yp_parser_t *parser, const char *start, const char *end) {
yp_constant_id_t constant_id = yp_parser_constant_id_location(parser, start, end);
if (!yp_constant_id_list_includes(&parser->current_scope->locals, constant_id)) {
yp_constant_id_list_append(&parser->current_scope->locals, constant_id);
}
}
// Add a local variable from a token to the current scope.
static inline void
yp_parser_local_add_token(yp_parser_t *parser, yp_token_t *token) {
yp_parser_local_add_location(parser, token->start, token->end);
}
// Add a parameter name to the current scope and check whether the name of the
// parameter is unique or not.
static void
yp_parser_parameter_name_check(yp_parser_t *parser, yp_token_t *name) {
// We want to ignore any parameter name that starts with an underscore.
if ((*name->start == '_')) return;
// Otherwise we'll fetch the constant id for the parameter name and check
// whether it's already in the current scope.
yp_constant_id_t constant_id = yp_parser_constant_id_token(parser, name);
if (yp_constant_id_list_includes(&parser->current_scope->locals, constant_id)) {
yp_diagnostic_list_append(&parser->error_list, name->start, name->end, "Duplicated parameter name.");
}
}
// Pop the current scope off the scope stack.
static void
yp_parser_scope_pop(yp_parser_t *parser) {
yp_scope_t *scope = parser->current_scope;
parser->current_scope = scope->previous;
free(scope);
}
/******************************************************************************/
/* Basic character checks */
/******************************************************************************/
// This function is used extremely frequently to lex all of the identifiers in a
// source file, so it's important that it be as fast as possible. For this
// reason we have the encoding_changed boolean to check if we need to go through
// the function pointer or can just directly use the UTF-8 functions.
static inline size_t
char_is_identifier_start(yp_parser_t *parser, const char *c) {
const unsigned char uc = (unsigned char) *c;
return (
(parser->encoding_changed
? parser->encoding.alpha_char(c)
: (uc < 0x80 ? (yp_encoding_unicode_table[uc] & YP_ENCODING_ALPHABETIC_BIT ? 1 : 0) : yp_encoding_utf_8_alpha_char(c))
) || (uc == '_') || (uc >= 0x80)
);
}
// Like the above, this function is also used extremely frequently to lex all of
// the identifiers in a source file once the first character has been found. So
// it's important that it be as fast as possible.
static inline size_t
char_is_identifier(yp_parser_t *parser, const char *c) {
const unsigned char uc = (unsigned char) *c;
return (
(parser->encoding_changed
? parser->encoding.alnum_char(c)
: (uc < 0x80 ? (yp_encoding_unicode_table[uc] & YP_ENCODING_ALPHANUMERIC_BIT ? 1 : 0) : yp_encoding_utf_8_alnum_char(c))
) || (uc == '_') || (uc >= 0x80)
);
}
// Here we're defining a perfect hash for the characters that are allowed in
// global names. This is used to quickly check the next character after a $ to
// see if it's a valid character for a global name.
#define BIT(c, idx) (((c) / 32 - 1 == idx) ? (1U << ((c) % 32)) : 0)
#define PUNCT(idx) ( \
BIT('~', idx) | BIT('*', idx) | BIT('$', idx) | BIT('?', idx) | \
BIT('!', idx) | BIT('@', idx) | BIT('/', idx) | BIT('\\', idx) | \
BIT(';', idx) | BIT(',', idx) | BIT('.', idx) | BIT('=', idx) | \
BIT(':', idx) | BIT('<', idx) | BIT('>', idx) | BIT('\"', idx) | \
BIT('&', idx) | BIT('`', idx) | BIT('\'', idx) | BIT('+', idx) | \
BIT('0', idx))
const unsigned int yp_global_name_punctuation_hash[(0x7e - 0x20 + 31) / 32] = { PUNCT(0), PUNCT(1), PUNCT(2) };
#undef BIT
#undef PUNCT
static inline bool
char_is_global_name_punctuation(const char c) {
const unsigned int i = (const unsigned int) c;
if (i <= 0x20 || 0x7e < i) return false;
return (yp_global_name_punctuation_hash[(i - 0x20) / 32] >> (c % 32)) & 1;
}
static inline bool
token_is_numbered_parameter(const char *start, const char *end) {
return (end - start == 2) && (start[0] == '_') && (start[1] != '0') && (yp_char_is_decimal_digit(start[1]));
}
static inline bool
token_is_setter_name(yp_token_t *token) {
return (
(token->type == YP_TOKEN_IDENTIFIER) &&
(token->end - token->start >= 2) &&
(token->end[-1] == '=')
);
}
/******************************************************************************/
/* Stack helpers */
/******************************************************************************/
static inline void
yp_accepts_block_stack_push(yp_parser_t *parser, bool value) {
// Use the negation of the value to prevent stack overflow.
yp_state_stack_push(&parser->accepts_block_stack, !value);
}
static inline void
yp_accepts_block_stack_pop(yp_parser_t *parser) {
yp_state_stack_pop(&parser->accepts_block_stack);
}
static inline bool
yp_accepts_block_stack_p(yp_parser_t *parser) {
return !yp_state_stack_p(&parser->accepts_block_stack);
}
static inline void
yp_do_loop_stack_push(yp_parser_t *parser, bool value) {
yp_state_stack_push(&parser->do_loop_stack, value);
}
static inline void
yp_do_loop_stack_pop(yp_parser_t *parser) {
yp_state_stack_pop(&parser->do_loop_stack);
}
static inline bool
yp_do_loop_stack_p(yp_parser_t *parser) {
return yp_state_stack_p(&parser->do_loop_stack);
}
/******************************************************************************/
/* Lexer check helpers */
/******************************************************************************/
// Get the next character in the source starting from parser->current.end and
// adding the given offset. If that position is beyond the end of the source
// then return '\0'.
static inline char
peek_at(yp_parser_t *parser, size_t offset) {
if (parser->current.end + offset < parser->end) {
return parser->current.end[offset];
} else {
return '\0';
}
}
// Get the next character in the source starting from parser->current.end. If
// that position is beyond the end of the source then return '\0'.
static inline char
peek(yp_parser_t *parser) {
if (parser->current.end < parser->end) {
return *parser->current.end;
} else {
return '\0';
}
}
// Get the next string of length len in the source starting from parser->current.end.
// If the string extends beyond the end of the source, return the empty string ""
static inline const char*
peek_string(yp_parser_t *parser, size_t len) {
if (parser->current.end + len <= parser->end) {
return parser->current.end;
} else {
return "";
}
}
// If the character to be read matches the given value, then returns true and
// advanced the current pointer.
static inline bool
match(yp_parser_t *parser, char value) {
if (peek(parser) == value) {
parser->current.end++;
return true;
}
return false;
}
// Skip to the next newline character or NUL byte.
static inline const char *
next_newline(const char *cursor, ptrdiff_t length) {
assert(length >= 0);
// Note that it's okay for us to use memchr here to look for \n because none
// of the encodings that we support have \n as a component of a multi-byte
// character.
return memchr(cursor, '\n', (size_t) length);
}
// Find the start of the encoding comment. This is effectively an inlined
// version of strnstr with some modifications.
static inline const char *
parser_lex_encoding_comment_start(yp_parser_t *parser, const char *cursor, ptrdiff_t remaining) {
assert(remaining >= 0);
size_t length = (size_t) remaining;
size_t key_length = strlen("coding:");
if (key_length > length) return NULL;
const char *cursor_limit = cursor + length - key_length + 1;
while ((cursor = yp_memchr(parser, cursor, 'c', (size_t) (cursor_limit - cursor))) != NULL) {
if (
(strncmp(cursor, "coding", key_length - 1) == 0) &&
(cursor[key_length - 1] == ':' || cursor[key_length - 1] == '=')
) {
return cursor + key_length;
}
cursor++;
}
return NULL;
}
// Here we're going to check if this is a "magic" comment, and perform whatever
// actions are necessary for it here.
static void
parser_lex_encoding_comment(yp_parser_t *parser) {
const char *start = parser->current.start + 1;
const char *end = next_newline(start, parser->end - start);
if (end == NULL) end = parser->end;
// These are the patterns we're going to match to find the encoding comment.
// This is definitely not complete or even really correct.
const char *encoding_start = parser_lex_encoding_comment_start(parser, start, end - start);
// If we didn't find anything that matched our patterns, then return. Note
// that this does a _very_ poor job of actually finding the encoding, and
// there is a lot of work to do here to better reflect actual magic comment
// parsing from CRuby, but this at least gets us part of the way there.
if (encoding_start == NULL) return;
// Skip any non-newline whitespace after the "coding:" or "coding=".
encoding_start += yp_strspn_inline_whitespace(encoding_start, end - encoding_start);
// Now determine the end of the encoding string. This is either the end of
// the line, the first whitespace character, or a punctuation mark.
const char *encoding_end = yp_strpbrk(parser, encoding_start, " \t\f\r\v\n;,", end - encoding_start);
encoding_end = encoding_end == NULL ? end : encoding_end;
// Finally, we can determine the width of the encoding string.
size_t width = (size_t) (encoding_end - encoding_start);
// First, we're going to call out to a user-defined callback if one was
// provided. If they return an encoding struct that we can use, then we'll
// use that here.
if (parser->encoding_decode_callback != NULL) {
yp_encoding_t *encoding = parser->encoding_decode_callback(parser, encoding_start, width);
if (encoding != NULL) {
parser->encoding = *encoding;
return;
}
}
// Next, we're going to check for UTF-8. This is the most common encoding.
// Extensions like utf-8 can contain extra encoding details like,
// utf-8-dos, utf-8-linux, utf-8-mac. We treat these all as utf-8 should
// treat any encoding starting utf-8 as utf-8.
if ((encoding_start + 5 <= parser->end) && (yp_strncasecmp(encoding_start, "utf-8", 5) == 0)) {
// We don't need to do anything here because the default encoding is
// already UTF-8. We'll just return.
return;
}
// Next, we're going to loop through each of the encodings that we handle
// explicitly. If we found one that we understand, we'll use that value.
#define ENCODING(value, prebuilt) \
if (width == sizeof(value) - 1 && encoding_start + width <= parser->end && yp_strncasecmp(encoding_start, value, width) == 0) { \
parser->encoding = prebuilt; \
parser->encoding_changed |= true; \
if (parser->encoding_changed_callback != NULL) parser->encoding_changed_callback(parser); \
return; \
}
// Check most common first. (This is pretty arbitrary.)
ENCODING("ascii", yp_encoding_ascii);
ENCODING("ascii-8bit", yp_encoding_ascii_8bit);
ENCODING("us-ascii", yp_encoding_ascii);
ENCODING("binary", yp_encoding_ascii_8bit);
ENCODING("shift_jis", yp_encoding_shift_jis);
ENCODING("euc-jp", yp_encoding_euc_jp);
// Then check all the others.
ENCODING("big5", yp_encoding_big5);
ENCODING("gbk", yp_encoding_gbk);
ENCODING("iso-8859-1", yp_encoding_iso_8859_1);
ENCODING("iso-8859-2", yp_encoding_iso_8859_2);
ENCODING("iso-8859-3", yp_encoding_iso_8859_3);
ENCODING("iso-8859-4", yp_encoding_iso_8859_4);
ENCODING("iso-8859-5", yp_encoding_iso_8859_5);
ENCODING("iso-8859-6", yp_encoding_iso_8859_6);
ENCODING("iso-8859-7", yp_encoding_iso_8859_7);
ENCODING("iso-8859-8", yp_encoding_iso_8859_8);
ENCODING("iso-8859-9", yp_encoding_iso_8859_9);
ENCODING("iso-8859-10", yp_encoding_iso_8859_10);
ENCODING("iso-8859-11", yp_encoding_iso_8859_11);
ENCODING("iso-8859-13", yp_encoding_iso_8859_13);
ENCODING("iso-8859-14", yp_encoding_iso_8859_14);
ENCODING("iso-8859-15", yp_encoding_iso_8859_15);
ENCODING("iso-8859-16", yp_encoding_iso_8859_16);
ENCODING("koi8-r", yp_encoding_koi8_r);
ENCODING("windows-31j", yp_encoding_windows_31j);
ENCODING("windows-1251", yp_encoding_windows_1251);
ENCODING("windows-1252", yp_encoding_windows_1252);
ENCODING("cp1251", yp_encoding_windows_1251);
ENCODING("cp1252", yp_encoding_windows_1252);
ENCODING("cp932", yp_encoding_windows_31j);
ENCODING("sjis", yp_encoding_windows_31j);
#undef ENCODING
// If nothing was returned by this point, then we've got an issue because we
// didn't understand the encoding that the user was trying to use. In this
// case we'll keep using the default encoding but add an error to the
// parser to indicate an unsuccessful parse.
yp_diagnostic_list_append(&parser->error_list, encoding_start, encoding_end, "Could not understand the encoding specified in the magic comment.");
}
/******************************************************************************/
/* Context manipulations */
/******************************************************************************/
static bool
context_terminator(yp_context_t context, yp_token_t *token) {
switch (context) {
case YP_CONTEXT_MAIN:
case YP_CONTEXT_DEF_PARAMS:
return token->type == YP_TOKEN_EOF;
case YP_CONTEXT_DEFAULT_PARAMS:
return token->type == YP_TOKEN_COMMA || token->type == YP_TOKEN_PARENTHESIS_RIGHT;
case YP_CONTEXT_PREEXE:
case YP_CONTEXT_POSTEXE:
return token->type == YP_TOKEN_BRACE_RIGHT;
case YP_CONTEXT_MODULE:
case YP_CONTEXT_CLASS:
case YP_CONTEXT_SCLASS:
case YP_CONTEXT_LAMBDA_DO_END:
case YP_CONTEXT_DEF:
case YP_CONTEXT_BLOCK_KEYWORDS:
return token->type == YP_TOKEN_KEYWORD_END || token->type == YP_TOKEN_KEYWORD_RESCUE || token->type == YP_TOKEN_KEYWORD_ENSURE;
case YP_CONTEXT_WHILE:
case YP_CONTEXT_UNTIL:
case YP_CONTEXT_ELSE:
case YP_CONTEXT_FOR:
case YP_CONTEXT_ENSURE:
return token->type == YP_TOKEN_KEYWORD_END;
case YP_CONTEXT_CASE_WHEN:
return token->type == YP_TOKEN_KEYWORD_WHEN || token->type == YP_TOKEN_KEYWORD_END || token->type == YP_TOKEN_KEYWORD_ELSE;
case YP_CONTEXT_CASE_IN:
return token->type == YP_TOKEN_KEYWORD_IN || token->type == YP_TOKEN_KEYWORD_END || token->type == YP_TOKEN_KEYWORD_ELSE;
case YP_CONTEXT_IF:
case YP_CONTEXT_ELSIF:
return token->type == YP_TOKEN_KEYWORD_ELSE || token->type == YP_TOKEN_KEYWORD_ELSIF || token->type == YP_TOKEN_KEYWORD_END;
case YP_CONTEXT_UNLESS:
return token->type == YP_TOKEN_KEYWORD_ELSE || token->type == YP_TOKEN_KEYWORD_END;
case YP_CONTEXT_EMBEXPR:
return token->type == YP_TOKEN_EMBEXPR_END;
case YP_CONTEXT_BLOCK_BRACES:
return token->type == YP_TOKEN_BRACE_RIGHT;
case YP_CONTEXT_PARENS:
return token->type == YP_TOKEN_PARENTHESIS_RIGHT;
case YP_CONTEXT_BEGIN:
case YP_CONTEXT_RESCUE:
return token->type == YP_TOKEN_KEYWORD_ENSURE || token->type == YP_TOKEN_KEYWORD_RESCUE || token->type == YP_TOKEN_KEYWORD_ELSE || token->type == YP_TOKEN_KEYWORD_END;
case YP_CONTEXT_RESCUE_ELSE:
return token->type == YP_TOKEN_KEYWORD_ENSURE || token->type == YP_TOKEN_KEYWORD_END;
case YP_CONTEXT_LAMBDA_BRACES:
return token->type == YP_TOKEN_BRACE_RIGHT;
case YP_CONTEXT_PREDICATE:
return token->type == YP_TOKEN_KEYWORD_THEN || token->type == YP_TOKEN_NEWLINE || token->type == YP_TOKEN_SEMICOLON;
}
return false;
}
static bool
context_recoverable(yp_parser_t *parser, yp_token_t *token) {
yp_context_node_t *context_node = parser->current_context;
while (context_node != NULL) {
if (context_terminator(context_node->context, token)) return true;
context_node = context_node->prev;
}
return false;
}
static bool
context_push(yp_parser_t *parser, yp_context_t context) {
yp_context_node_t *context_node = (yp_context_node_t *) malloc(sizeof(yp_context_node_t));
if (context_node == NULL) return false;
*context_node = (yp_context_node_t) { .context = context, .prev = NULL };
if (parser->current_context == NULL) {
parser->current_context = context_node;
} else {
context_node->prev = parser->current_context;
parser->current_context = context_node;
}
return true;
}
static void
context_pop(yp_parser_t *parser) {
if (parser->current_context->prev == NULL) {
free(parser->current_context);
parser->current_context = NULL;
} else {
yp_context_node_t *prev = parser->current_context->prev;
free(parser->current_context);
parser->current_context = prev;
}
}
static bool
context_p(yp_parser_t *parser, yp_context_t context) {
yp_context_node_t *context_node = parser->current_context;
while (context_node != NULL) {
if (context_node->context == context) return true;
context_node = context_node->prev;
}
return false;
}
static bool
context_def_p(yp_parser_t *parser) {
yp_context_node_t *context_node = parser->current_context;
while (context_node != NULL) {
switch (context_node->context) {
case YP_CONTEXT_DEF:
return true;
case YP_CONTEXT_CLASS:
case YP_CONTEXT_MODULE:
case YP_CONTEXT_SCLASS:
return false;
default:
context_node = context_node->prev;
}
}
return false;
}
/******************************************************************************/
/* Specific token lexers */
/******************************************************************************/
static yp_token_type_t
lex_optional_float_suffix(yp_parser_t *parser) {
yp_token_type_t type = YP_TOKEN_INTEGER;
// Here we're going to attempt to parse the optional decimal portion of a
// float. If it's not there, then it's okay and we'll just continue on.
if (peek(parser) == '.') {
if (yp_char_is_decimal_digit(peek_at(parser, 1))) {
parser->current.end += 2;
parser->current.end += yp_strspn_decimal_number(parser->current.end, parser->end - parser->current.end);
type = YP_TOKEN_FLOAT;
} else {
// If we had a . and then something else, then it's not a float suffix on
// a number it's a method call or something else.
return type;
}
}
// Here we're going to attempt to parse the optional exponent portion of a
// float. If it's not there, it's okay and we'll just continue on.
if (match(parser, 'e') || match(parser, 'E')) {
(void) (match(parser, '+') || match(parser, '-'));
if (yp_char_is_decimal_digit(*parser->current.end)) {
parser->current.end++;
parser->current.end += yp_strspn_decimal_number(parser->current.end, parser->end - parser->current.end);
type = YP_TOKEN_FLOAT;
} else {
yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, "Missing exponent.");
type = YP_TOKEN_FLOAT;
}
}
return type;
}
static yp_token_type_t
lex_numeric_prefix(yp_parser_t *parser) {
yp_token_type_t type = YP_TOKEN_INTEGER;
if (parser->current.end[-1] == '0') {
switch (*parser->current.end) {
// 0d1111 is a decimal number
case 'd':
case 'D':
if (yp_char_is_decimal_digit(*++parser->current.end)) {
parser->current.end += yp_strspn_decimal_number(parser->current.end, parser->end - parser->current.end);
} else {
yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, "Invalid decimal number.");
}
break;
// 0b1111 is a binary number
case 'b':
case 'B':
if (yp_char_is_binary_digit(*++parser->current.end)) {
parser->current.end += yp_strspn_binary_number(parser->current.end, parser->end - parser->current.end);
} else {
yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, "Invalid binary number.");
}
break;
// 0o1111 is an octal number
case 'o':
case 'O':
if (yp_char_is_octal_digit(*++parser->current.end)) {
parser->current.end += yp_strspn_octal_number(parser->current.end, parser->end - parser->current.end);
} else {
yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, "Invalid octal number.");
}
break;
// 01111 is an octal number
case '_':
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
parser->current.end += yp_strspn_octal_number(parser->current.end, parser->end - parser->current.end);
break;
// 0x1111 is a hexadecimal number
case 'x':
case 'X':
if (yp_char_is_hexadecimal_digit(*++parser->current.end)) {
parser->current.end += yp_strspn_hexadecimal_number(parser->current.end, parser->end - parser->current.end);
} else {
yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, "Invalid hexadecimal number.");
}
break;
// 0.xxx is a float
case '.': {
type = lex_optional_float_suffix(parser);
break;
}
// 0exxx is a float
case 'e':
case 'E': {
type = lex_optional_float_suffix(parser);
break;
}
}
} else {
// If it didn't start with a 0, then we'll lex as far as we can into a
// decimal number.
parser->current.end += yp_strspn_decimal_number(parser->current.end, parser->end - parser->current.end);
// Afterward, we'll lex as far as we can into an optional float suffix.
type = lex_optional_float_suffix(parser);
}
// If the last character that we consumed was an underscore, then this is
// actually an invalid integer value, and we should return an invalid token.
if (parser->current.end[-1] == '_') {
yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, "Number literal cannot end with a `_`.");
}
return type;
}
static yp_token_type_t
lex_finalize_numeric_type(yp_parser_t *parser, yp_token_type_t numeric_type, const char *numeric_end, const char *rational_end, const char *imaginary_end) {
if (rational_end || imaginary_end) {
lex_mode_push(parser, (yp_lex_mode_t) {
.mode = YP_LEX_NUMERIC,
.as.numeric.type = numeric_type,
.as.numeric.start = parser->current.start,
.as.numeric.end = numeric_end
});
}
if (rational_end && imaginary_end) {
lex_mode_push(parser, (yp_lex_mode_t) {
.mode = YP_LEX_NUMERIC,
.as.numeric.type = YP_TOKEN_RATIONAL_NUMBER,
.as.numeric.start = parser->current.start,
.as.numeric.end = rational_end
});
}
if (imaginary_end) {
return YP_TOKEN_IMAGINARY_NUMBER;
}
if (rational_end) {
return YP_TOKEN_RATIONAL_NUMBER;
}
return numeric_type;
}
static yp_token_type_t
lex_numeric(yp_parser_t *parser) {
yp_token_type_t type = YP_TOKEN_INTEGER;
if (parser->current.end < parser->end) {
type = lex_numeric_prefix(parser);
const char *end = parser->current.end;
const char *rational_end = NULL;
const char *imaginary_end = NULL;
if (match(parser, 'r')) {
rational_end = parser->current.end;
}
if (match(parser, 'i')) {
imaginary_end = parser->current.end;
}
const unsigned char uc = (const unsigned char) peek(parser);
if (uc != '\0' && (uc >= 0x80 || ((uc >= 'a' && uc <= 'z') || (uc >= 'A' && uc <= 'Z')) || uc == '_')) {
parser->current.end = end;
} else {
type = lex_finalize_numeric_type(parser, type, end, rational_end, imaginary_end);
}
}
return type;
}
static yp_token_type_t
lex_global_variable(yp_parser_t *parser) {
switch (*parser->current.end) {
case '~': // $~: match-data
case '*': // $*: argv
case '$': // $$: pid
case '?': // $?: last status
case '!': // $!: error string
case '@': // $@: error position
case '/': // $/: input record separator
case '\\': // $\: output record separator
case ';': // $;: field separator
case ',': // $,: output field separator
case '.': // $.: last read line number
case '=': // $=: ignorecase
case ':': // $:: load path
case '<': // $<: reading filename
case '>': // $>: default output handle
case '\"': // $": already loaded files
parser->current.end++;
return YP_TOKEN_GLOBAL_VARIABLE;
case '&': // $&: last match
case '`': // $`: string before last match
case '\'': // $': string after last match
case '+': // $+: string matches last paren.
parser->current.end++;
return lex_state_p(parser, YP_LEX_STATE_FNAME) ? YP_TOKEN_GLOBAL_VARIABLE : YP_TOKEN_BACK_REFERENCE;
case '0': {
parser->current.end++;
size_t width;
if (parser->current.end < parser->end && (width = char_is_identifier(parser, parser->current.end)) > 0) {
do {
parser->current.end += width;
} while (parser->current.end < parser->end && (width = char_is_identifier(parser, parser->current.end)) > 0);
// $0 isn't allowed to be followed by anything.
yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, "Invalid global variable.");
}
return YP_TOKEN_GLOBAL_VARIABLE;
}
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
parser->current.end += yp_strspn_decimal_digit(parser->current.end, parser->end - parser->current.end);
return lex_state_p(parser, YP_LEX_STATE_FNAME) ? YP_TOKEN_GLOBAL_VARIABLE : YP_TOKEN_NUMBERED_REFERENCE;
case '-':
parser->current.end++;
/* fallthrough */
default: {
size_t width;
if ((width = char_is_identifier(parser, parser->current.end)) > 0) {
do {
parser->current.end += width;
} while (parser->current.end < parser->end && (width = char_is_identifier(parser, parser->current.end)) > 0);
} else {
// If we get here, then we have a $ followed by something that isn't
// recognized as a global variable.
yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, "Invalid global variable.");
}
return YP_TOKEN_GLOBAL_VARIABLE;
}
}
}
// This function checks if the current token matches a keyword. If it does, it
// returns true. Otherwise, it returns false. The arguments are as follows:
//
// * `value` - the literal string that we're checking for
// * `width` - the length of the token
// * `state` - the state that we should transition to if the token matches
//
static yp_token_type_t
lex_keyword(yp_parser_t *parser, const char *value, yp_lex_state_t state, yp_token_type_t type, yp_token_type_t modifier_type) {
yp_lex_state_t last_state = parser->lex_state;
const size_t vlen = strlen(value);
if (parser->current.start + vlen <= parser->end && strncmp(parser->current.start, value, vlen) == 0) {
if (parser->lex_state & YP_LEX_STATE_FNAME) {
lex_state_set(parser, YP_LEX_STATE_ENDFN);
} else {
lex_state_set(parser, state);
if (state == YP_LEX_STATE_BEG) {
parser->command_start = true;
}
if ((modifier_type != YP_TOKEN_EOF) && !(last_state & (YP_LEX_STATE_BEG | YP_LEX_STATE_LABELED | YP_LEX_STATE_CLASS))) {
lex_state_set(parser, YP_LEX_STATE_BEG | YP_LEX_STATE_LABEL);
return modifier_type;
}
}
return type;
}
return YP_TOKEN_EOF;
}
static yp_token_type_t
lex_identifier(yp_parser_t *parser, bool previous_command_start) {
// Lex as far as we can into the current identifier.
size_t width;
while (parser->current.end < parser->end && (width = char_is_identifier(parser, parser->current.end)) > 0) {
parser->current.end += width;
}
// Now cache the length of the identifier so that we can quickly compare it
// against known keywords.
width = (size_t) (parser->current.end - parser->current.start);
if (parser->current.end < parser->end) {
if (((parser->current.end + 1 >= parser->end) || (parser->current.end[1] != '=')) && (match(parser, '!') || match(parser, '?'))) {
// First we'll attempt to extend the identifier by a ! or ?. Then we'll
// check if we're returning the defined? keyword or just an identifier.
width++;
if (
((lex_state_p(parser, YP_LEX_STATE_LABEL | YP_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser)) &&
(peek(parser) == ':') && (peek_at(parser, 1) != ':')
) {
// If we're in a position where we can accept a : at the end of an
// identifier, then we'll optionally accept it.
lex_state_set(parser, YP_LEX_STATE_ARG | YP_LEX_STATE_LABELED);
(void) match(parser, ':');
return YP_TOKEN_LABEL;
}
if (parser->lex_state != YP_LEX_STATE_DOT) {
if (width == 8 && (lex_keyword(parser, "defined?", YP_LEX_STATE_ARG, YP_TOKEN_KEYWORD_DEFINED, YP_TOKEN_EOF) != YP_TOKEN_EOF)) {
return YP_TOKEN_KEYWORD_DEFINED;
}
}
return YP_TOKEN_IDENTIFIER;
} else if (lex_state_p(parser, YP_LEX_STATE_FNAME) && peek_at(parser, 1) != '~' && peek_at(parser, 1) != '>' && (peek_at(parser, 1) != '=' || peek_at(parser, 2) == '>') && match(parser, '=')) {
// If we're in a position where we can accept a = at the end of an
// identifier, then we'll optionally accept it.
return YP_TOKEN_IDENTIFIER;
}
if (
((lex_state_p(parser, YP_LEX_STATE_LABEL | YP_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser)) &&
peek(parser) == ':' && peek_at(parser, 1) != ':'
) {
// If we're in a position where we can accept a : at the end of an
// identifier, then we'll optionally accept it.
lex_state_set(parser, YP_LEX_STATE_ARG | YP_LEX_STATE_LABELED);
(void) match(parser, ':');
return YP_TOKEN_LABEL;
}
}
if (parser->lex_state != YP_LEX_STATE_DOT) {
yp_token_type_t type;
switch (width) {
case 2:
if (lex_keyword(parser, "do", YP_LEX_STATE_BEG, YP_TOKEN_KEYWORD_DO, YP_TOKEN_EOF) != YP_TOKEN_EOF) {
if (yp_do_loop_stack_p(parser)) {
return YP_TOKEN_KEYWORD_DO_LOOP;
}
return YP_TOKEN_KEYWORD_DO;
}
if ((type = lex_keyword(parser, "if", YP_LEX_STATE_BEG, YP_TOKEN_KEYWORD_IF, YP_TOKEN_KEYWORD_IF_MODIFIER)) != YP_TOKEN_EOF) return type;
if ((type = lex_keyword(parser, "in", YP_LEX_STATE_BEG, YP_TOKEN_KEYWORD_IN, YP_TOKEN_EOF)) != YP_TOKEN_EOF) return type;
if ((type = lex_keyword(parser, "or", YP_LEX_STATE_BEG, YP_TOKEN_KEYWORD_OR, YP_TOKEN_EOF)) != YP_TOKEN_EOF) return type;
break;
case 3:
if ((type = lex_keyword(parser, "and", YP_LEX_STATE_BEG, YP_TOKEN_KEYWORD_AND, YP_TOKEN_EOF)) != YP_TOKEN_EOF) return type;
if ((type = lex_keyword(parser, "def", YP_LEX_STATE_FNAME, YP_TOKEN_KEYWORD_DEF, YP_TOKEN_EOF)) != YP_TOKEN_EOF) return type;
if ((type = lex_keyword(parser, "end", YP_LEX_STATE_END, YP_TOKEN_KEYWORD_END, YP_TOKEN_EOF)) != YP_TOKEN_EOF) return type;
if ((type = lex_keyword(parser, "END", YP_LEX_STATE_END, YP_TOKEN_KEYWORD_END_UPCASE, YP_TOKEN_EOF)) != YP_TOKEN_EOF) return type;
if ((type = lex_keyword(parser, "for", YP_LEX_STATE_BEG, YP_TOKEN_KEYWORD_FOR, YP_TOKEN_EOF)) != YP_TOKEN_EOF) return type;
if ((type = lex_keyword(parser, "nil", YP_LEX_STATE_END, YP_TOKEN_KEYWORD_NIL, YP_TOKEN_EOF)) != YP_TOKEN_EOF) return type;
if ((type = lex_keyword(parser, "not", YP_LEX_STATE_ARG, YP_TOKEN_KEYWORD_NOT, YP_TOKEN_EOF)) != YP_TOKEN_EOF) return type;
break;
case 4:
if ((type = lex_keyword(parser, "case", YP_LEX_STATE_BEG, YP_TOKEN_KEYWORD_CASE, YP_TOKEN_EOF)) != YP_TOKEN_EOF) return type;
if ((type = lex_keyword(parser, "else", YP_LEX_STATE_BEG, YP_TOKEN_KEYWORD_ELSE, YP_TOKEN_EOF)) != YP_TOKEN_EOF) return type;
if ((type = lex_keyword(parser, "next", YP_LEX_STATE_MID, YP_TOKEN_KEYWORD_NEXT, YP_TOKEN_EOF)) != YP_TOKEN_EOF) return type;
if ((type = lex_keyword(parser, "redo", YP_LEX_STATE_END, YP_TOKEN_KEYWORD_REDO, YP_TOKEN_EOF)) != YP_TOKEN_EOF) return type;
if ((type = lex_keyword(parser, "self", YP_LEX_STATE_END, YP_TOKEN_KEYWORD_SELF, YP_TOKEN_EOF)) != YP_TOKEN_EOF) return type;
if ((type = lex_keyword(parser, "then", YP_LEX_STATE_BEG, YP_TOKEN_KEYWORD_THEN, YP_TOKEN_EOF)) != YP_TOKEN_EOF) return type;
if ((type = lex_keyword(parser, "true", YP_LEX_STATE_END, YP_TOKEN_KEYWORD_TRUE, YP_TOKEN_EOF)) != YP_TOKEN_EOF) return type;
if ((type = lex_keyword(parser, "when", YP_LEX_STATE_BEG, YP_TOKEN_KEYWORD_WHEN, YP_TOKEN_EOF)) != YP_TOKEN_EOF) return type;
break;
case 5:
if ((type = lex_keyword(parser, "alias", YP_LEX_STATE_FNAME | YP_LEX_STATE_FITEM, YP_TOKEN_KEYWORD_ALIAS, YP_TOKEN_EOF)) != YP_TOKEN_EOF) return type;
if ((type = lex_keyword(parser, "begin", YP_LEX_STATE_BEG, YP_TOKEN_KEYWORD_BEGIN, YP_TOKEN_EOF)) != YP_TOKEN_EOF) return type;
if ((type = lex_keyword(parser, "BEGIN", YP_LEX_STATE_END, YP_TOKEN_KEYWORD_BEGIN_UPCASE, YP_TOKEN_EOF)) != YP_TOKEN_EOF) return type;
if ((type = lex_keyword(parser, "break", YP_LEX_STATE_MID, YP_TOKEN_KEYWORD_BREAK, YP_TOKEN_EOF)) != YP_TOKEN_EOF) return type;
if ((type = lex_keyword(parser, "class", YP_LEX_STATE_CLASS, YP_TOKEN_KEYWORD_CLASS, YP_TOKEN_EOF)) != YP_TOKEN_EOF) return type;
if ((type = lex_keyword(parser, "elsif", YP_LEX_STATE_BEG, YP_TOKEN_KEYWORD_ELSIF, YP_TOKEN_EOF)) != YP_TOKEN_EOF) return type;
if ((type = lex_keyword(parser, "false", YP_LEX_STATE_END, YP_TOKEN_KEYWORD_FALSE, YP_TOKEN_EOF)) != YP_TOKEN_EOF) return type;
if ((type = lex_keyword(parser, "retry", YP_LEX_STATE_END, YP_TOKEN_KEYWORD_RETRY, YP_TOKEN_EOF)) != YP_TOKEN_EOF) return type;
if ((type = lex_keyword(parser, "super", YP_LEX_STATE_ARG, YP_TOKEN_KEYWORD_SUPER, YP_TOKEN_EOF)) != YP_TOKEN_EOF) return type;
if ((type = lex_keyword(parser, "undef", YP_LEX_STATE_FNAME | YP_LEX_STATE_FITEM, YP_TOKEN_KEYWORD_UNDEF, YP_TOKEN_EOF)) != YP_TOKEN_EOF) return type;
if ((type = lex_keyword(parser, "until", YP_LEX_STATE_BEG, YP_TOKEN_KEYWORD_UNTIL, YP_TOKEN_KEYWORD_UNTIL_MODIFIER)) != YP_TOKEN_EOF) return type;
if ((type = lex_keyword(parser, "while", YP_LEX_STATE_BEG, YP_TOKEN_KEYWORD_WHILE, YP_TOKEN_KEYWORD_WHILE_MODIFIER)) != YP_TOKEN_EOF) return type;
if ((type = lex_keyword(parser, "yield", YP_LEX_STATE_ARG, YP_TOKEN_KEYWORD_YIELD, YP_TOKEN_EOF)) != YP_TOKEN_EOF) return type;
break;
case 6:
if ((type = lex_keyword(parser, "ensure", YP_LEX_STATE_BEG, YP_TOKEN_KEYWORD_ENSURE, YP_TOKEN_EOF)) != YP_TOKEN_EOF) return type;
if ((type = lex_keyword(parser, "module", YP_LEX_STATE_BEG, YP_TOKEN_KEYWORD_MODULE, YP_TOKEN_EOF)) != YP_TOKEN_EOF) return type;
if ((type = lex_keyword(parser, "rescue", YP_LEX_STATE_MID, YP_TOKEN_KEYWORD_RESCUE, YP_TOKEN_KEYWORD_RESCUE_MODIFIER)) != YP_TOKEN_EOF) return type;
if ((type = lex_keyword(parser, "return", YP_LEX_STATE_MID, YP_TOKEN_KEYWORD_RETURN, YP_TOKEN_EOF)) != YP_TOKEN_EOF) return type;
if ((type = lex_keyword(parser, "unless", YP_LEX_STATE_BEG, YP_TOKEN_KEYWORD_UNLESS, YP_TOKEN_KEYWORD_UNLESS_MODIFIER)) != YP_TOKEN_EOF) return type;
break;
case 8:
if ((type = lex_keyword(parser, "__LINE__", YP_LEX_STATE_END, YP_TOKEN_KEYWORD___LINE__, YP_TOKEN_EOF)) != YP_TOKEN_EOF) return type;
if ((type = lex_keyword(parser, "__FILE__", YP_LEX_STATE_END, YP_TOKEN_KEYWORD___FILE__, YP_TOKEN_EOF)) != YP_TOKEN_EOF) return type;
break;
case 12:
if ((type = lex_keyword(parser, "__ENCODING__", YP_LEX_STATE_END, YP_TOKEN_KEYWORD___ENCODING__, YP_TOKEN_EOF)) != YP_TOKEN_EOF) return type;
break;
}
}
return parser->encoding.isupper_char(parser->current.start) ? YP_TOKEN_CONSTANT : YP_TOKEN_IDENTIFIER;
}
// Returns true if the current token that the parser is considering is at the
// beginning of a line or the beginning of the source.
static bool
current_token_starts_line(yp_parser_t *parser) {
return (parser->current.start == parser->start) || (parser->current.start[-1] == '\n');
}
// When we hit a # while lexing something like a string, we need to potentially
// handle interpolation. This function performs that check. It returns a token
// type representing what it found. Those cases are:
//
// * YP_TOKEN_NOT_PROVIDED - No interpolation was found at this point. The
// caller should keep lexing.
// * YP_TOKEN_STRING_CONTENT - No interpolation was found at this point. The
// caller should return this token type.
// * YP_TOKEN_EMBEXPR_BEGIN - An embedded expression was found. The caller
// should return this token type.
// * YP_TOKEN_EMBVAR - An embedded variable was found. The caller should return
// this token type.
//
static yp_token_type_t
lex_interpolation(yp_parser_t *parser, const char *pound) {
// If there is no content following this #, then we're at the end of
// the string and we can safely return string content.
if (pound + 1 >= parser->end) {
parser->current.end = pound + 1;
return YP_TOKEN_STRING_CONTENT;
}
// Now we'll check against the character the follows the #. If it constitutes
// valid interplation, we'll handle that, otherwise we'll return
// YP_TOKEN_NOT_PROVIDED.
switch (pound[1]) {
case '@': {
// In this case we may have hit an embedded instance or class variable.
if (pound + 2 >= parser->end) {
parser->current.end = pound + 1;
return YP_TOKEN_STRING_CONTENT;
}
// If we're looking at a @ and there's another @, then we'll skip past the
// second @.
const char *variable = pound + 2;
if (*variable == '@' && pound + 3 < parser->end) variable++;
if (char_is_identifier_start(parser, variable)) {
// At this point we're sure that we've either hit an embedded instance
// or class variable. In this case we'll first need to check if we've
// already consumed content.
if (pound > parser->current.start) {
parser->current.end = pound;
return YP_TOKEN_STRING_CONTENT;
}
// Otherwise we need to return the embedded variable token
// and then switch to the embedded variable lex mode.
lex_mode_push(parser, (yp_lex_mode_t) { .mode = YP_LEX_EMBVAR });
parser->current.end = pound + 1;
return YP_TOKEN_EMBVAR;
}
// If we didn't get an valid interpolation, then this is just regular
// string content. This is like if we get "#@-". In this case the caller
// should keep lexing.
parser->current.end = variable;
return YP_TOKEN_NOT_PROVIDED;
}
case '$':
// In this case we may have hit an embedded global variable. If there's
// not enough room, then we'll just return string content.
if (pound + 2 >= parser->end) {
parser->current.end = pound + 1;
return YP_TOKEN_STRING_CONTENT;
}
// This is the character that we're going to check to see if it is the
// start of an identifier that would indicate that this is a global
// variable.
const char *check = pound + 2;
if (pound[2] == '-') {
if (pound + 3 >= parser->end) {
parser->current.end = pound + 2;
return YP_TOKEN_STRING_CONTENT;
}
check++;
}
// If the character that we're going to check is the start of an
// identifier, or we don't have a - and the character is a decimal number
// or a global name punctuation character, then we've hit an embedded
// global variable.
if (
char_is_identifier_start(parser, check) ||
(pound[2] != '-' && (yp_char_is_decimal_digit(pound[2]) || char_is_global_name_punctuation(pound[2])))
) {
// In this case we've hit an embedded global variable. First check to
// see if we've already consumed content. If we have, then we need to
// return that content as string content first.
if (pound > parser->current.start) {
parser->current.end = pound;
return YP_TOKEN_STRING_CONTENT;
}
// Otherwise, we need to return the embedded variable token and switch
// to the embedded variable lex mode.
lex_mode_push(parser, (yp_lex_mode_t) { .mode = YP_LEX_EMBVAR });
parser->current.end = pound + 1;
return YP_TOKEN_EMBVAR;
}
// In this case we've hit a #$ that does not indicate a global variable.
// In this case we'll continue lexing past it.
parser->current.end = pound + 1;
return YP_TOKEN_NOT_PROVIDED;
case '{':
// In this case it's the start of an embedded expression. If we have
// already consumed content, then we need to return that content as string
// content first.
if (pound > parser->current.start) {
parser->current.end = pound;
return YP_TOKEN_STRING_CONTENT;
}
parser->enclosure_nesting++;
// Otherwise we'll skip past the #{ and begin lexing the embedded
// expression.
lex_mode_push(parser, (yp_lex_mode_t) { .mode = YP_LEX_EMBEXPR });
parser->current.end = pound + 2;
parser->command_start = true;
yp_do_loop_stack_push(parser, false);
return YP_TOKEN_EMBEXPR_BEGIN;
default:
// In this case we've hit a # that doesn't constitute interpolation. We'll
// mark that by returning the not provided token type. This tells the
// consumer to keep lexing forward.
parser->current.end = pound + 1;
return YP_TOKEN_NOT_PROVIDED;
}
}
// This function is responsible for lexing either a character literal or the ?
// operator. The supported character literals are described below.
//
// \a bell, ASCII 07h (BEL)
// \b backspace, ASCII 08h (BS)
// \t horizontal tab, ASCII 09h (TAB)
// \n newline (line feed), ASCII 0Ah (LF)
// \v vertical tab, ASCII 0Bh (VT)
// \f form feed, ASCII 0Ch (FF)
// \r carriage return, ASCII 0Dh (CR)
// \e escape, ASCII 1Bh (ESC)
// \s space, ASCII 20h (SPC)
// \\ backslash
// \nnn octal bit pattern, where nnn is 1-3 octal digits ([0-7])
// \xnn hexadecimal bit pattern, where nn is 1-2 hexadecimal digits ([0-9a-fA-F])
// \unnnn Unicode character, where nnnn is exactly 4 hexadecimal digits ([0-9a-fA-F])
// \u{nnnn ...} Unicode character(s), where each nnnn is 1-6 hexadecimal digits ([0-9a-fA-F])
// \cx or \C-x control character, where x is an ASCII printable character
// \M-x meta character, where x is an ASCII printable character
// \M-\C-x meta control character, where x is an ASCII printable character
// \M-\cx same as above
// \c\M-x same as above
// \c? or \C-? delete, ASCII 7Fh (DEL)
//
static yp_token_type_t
lex_question_mark(yp_parser_t *parser) {
if (lex_state_end_p(parser)) {
lex_state_set(parser, YP_LEX_STATE_BEG);
return YP_TOKEN_QUESTION_MARK;
}
if (parser->current.end >= parser->end) {
yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, "Incomplete character syntax.");
return YP_TOKEN_CHARACTER_LITERAL;
}
if (yp_char_is_whitespace(*parser->current.end)) {
lex_state_set(parser, YP_LEX_STATE_BEG);
return YP_TOKEN_QUESTION_MARK;
}
lex_state_set(parser, YP_LEX_STATE_BEG);
if (parser->current.start[1] == '\\') {
lex_state_set(parser, YP_LEX_STATE_END);
parser->current.end += yp_unescape_calculate_difference(parser->current.start + 1, parser->end, YP_UNESCAPE_ALL, true, &parser->error_list);
return YP_TOKEN_CHARACTER_LITERAL;
} else {
size_t encoding_width = parser->encoding.char_width(parser->current.end);
// We only want to return a character literal if there's exactly one
// alphanumeric character right after the `?`
if (
!parser->encoding.alnum_char(parser->current.end) ||
!parser->encoding.alnum_char(parser->current.end + encoding_width)
) {
lex_state_set(parser, YP_LEX_STATE_END);
parser->current.end += encoding_width;
return YP_TOKEN_CHARACTER_LITERAL;
}
}
return YP_TOKEN_QUESTION_MARK;
}
// Lex a variable that starts with an @ sign (either an instance or class
// variable).
static yp_token_type_t
lex_at_variable(yp_parser_t *parser) {
yp_token_type_t type = match(parser, '@') ? YP_TOKEN_CLASS_VARIABLE : YP_TOKEN_INSTANCE_VARIABLE;
size_t width;
if (parser->current.end < parser->end && (width = char_is_identifier_start(parser, parser->current.end)) > 0) {
parser->current.end += width;
while (parser->current.end < parser->end && (width = char_is_identifier(parser, parser->current.end)) > 0) {
parser->current.end += width;
}
} else if (type == YP_TOKEN_CLASS_VARIABLE) {
yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, "Incomplete class variable.");
} else {
yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, "Incomplete instance variable.");
}
// If we're lexing an embedded variable, then we need to pop back into the
// parent lex context.
if (parser->lex_modes.current->mode == YP_LEX_EMBVAR) {
lex_mode_pop(parser);
}
return type;
}
// Optionally call out to the lex callback if one is provided.
static inline void
parser_lex_callback(yp_parser_t *parser) {
if (parser->lex_callback) {
parser->lex_callback->callback(parser->lex_callback->data, parser, &parser->current);
}
}
// Return a new comment node of the specified type.
static inline yp_comment_t *
parser_comment(yp_parser_t *parser, yp_comment_type_t type) {
yp_comment_t *comment = (yp_comment_t *) malloc(sizeof(yp_comment_t));
if (comment == NULL) return NULL;
*comment = (yp_comment_t) {
.type = type,
.start = parser->current.start,
.end = parser->current.end
};
return comment;
}
// Lex out embedded documentation, and return when we have either hit the end of
// the file or the end of the embedded documentation. This calls the callback
// manually because only the lexer should see these tokens, not the parser.
static yp_token_type_t
lex_embdoc(yp_parser_t *parser) {
// First, lex out the EMBDOC_BEGIN token.
const char *newline = next_newline(parser->current.end, parser->end - parser->current.end);
if (newline == NULL) {
parser->current.end = parser->end;
} else {
yp_newline_list_append(&parser->newline_list, newline);
parser->current.end = newline + 1;
}
parser->current.type = YP_TOKEN_EMBDOC_BEGIN;
parser_lex_callback(parser);
// Now, create a comment that is going to be attached to the parser.
yp_comment_t *comment = parser_comment(parser, YP_COMMENT_EMBDOC);
if (comment == NULL) return YP_TOKEN_EOF;
// Now, loop until we find the end of the embedded documentation or the end of
// the file.
while (parser->current.end + 4 <= parser->end) {
parser->current.start = parser->current.end;
// If we've hit the end of the embedded documentation then we'll return that
// token here.
if (strncmp(parser->current.end, "=end", 4) == 0 &&
(parser->current.end + 4 == parser->end || yp_char_is_whitespace(parser->current.end[4]))) {
const char *newline = next_newline(parser->current.end, parser->end - parser->current.end);
if (newline == NULL) {
parser->current.end = parser->end;
} else {
yp_newline_list_append(&parser->newline_list, newline);
parser->current.end = newline + 1;
}
parser->current.type = YP_TOKEN_EMBDOC_END;
parser_lex_callback(parser);
comment->end = parser->current.end;
yp_list_append(&parser->comment_list, (yp_list_node_t *) comment);
return YP_TOKEN_EMBDOC_END;
}
// Otherwise, we'll parse until the end of the line and return a line of
// embedded documentation.
const char *newline = next_newline(parser->current.end, parser->end - parser->current.end);
if (newline == NULL) {
parser->current.end = parser->end;
} else {
yp_newline_list_append(&parser->newline_list, newline);
parser->current.end = newline + 1;
}
parser->current.type = YP_TOKEN_EMBDOC_LINE;
parser_lex_callback(parser);
}
yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, "Unterminated embdoc");
comment->end = parser->current.end;
yp_list_append(&parser->comment_list, (yp_list_node_t *) comment);
return YP_TOKEN_EOF;
}
// Set the current type to an ignored newline and then call the lex callback.
// This happens in a couple places depending on whether or not we have already
// lexed a comment.
static inline void
parser_lex_ignored_newline(yp_parser_t *parser) {
parser->current.type = YP_TOKEN_IGNORED_NEWLINE;
parser_lex_callback(parser);
}
// This function will be called when a newline is encountered. In some newlines,
// we need to check if there is a heredoc or heredocs that we have already lexed
// the body of that we need to now skip past. That will be indicated by the
// heredoc_end field on the parser.
//
// If it is set, then we need to skip past the heredoc body and then clear the
// heredoc_end field.
static inline void
parser_flush_heredoc_end(yp_parser_t *parser) {
assert(parser->heredoc_end <= parser->end);
parser->next_start = parser->heredoc_end;
parser->heredoc_end = NULL;
}
// This is a convenience macro that will set the current token type, call the
// lex callback, and then return from the parser_lex function.
#define LEX(token_type) parser->current.type = token_type; parser_lex_callback(parser); return
// Called when the parser requires a new token. The parser maintains a moving
// window of two tokens at a time: parser.previous and parser.current. This
// function will move the current token into the previous token and then
// lex a new token into the current token.
static void
parser_lex(yp_parser_t *parser) {
assert(parser->current.end <= parser->end);
parser->previous = parser->current;
// This value mirrors cmd_state from CRuby.
bool previous_command_start = parser->command_start;
parser->command_start = false;
// This is used to communicate to the newline lexing function that we've
// already seen a comment.
bool lexed_comment = false;
switch (parser->lex_modes.current->mode) {
case YP_LEX_DEFAULT:
case YP_LEX_EMBEXPR:
case YP_LEX_EMBVAR:
case YP_LEX_NUMERIC:
// We have a specific named label here because we are going to jump back to
// this location in the event that we have lexed a token that should not be
// returned to the parser. This includes comments, ignored newlines, and
// invalid tokens of some form.
lex_next_token: {
// If we have the special next_start pointer set, then we're going to jump
// to that location and start lexing from there.
if (parser->next_start != NULL) {
parser->current.end = parser->next_start;
parser->next_start = NULL;
}
// This value mirrors space_seen from CRuby. It tracks whether or not
// space has been eaten before the start of the next token.
bool space_seen = false;
// First, we're going to skip past any whitespace at the front of the next
// token.
bool chomping = true;
while (parser->current.end < parser->end && chomping) {
switch (*parser->current.end) {
case ' ':
case '\t':
case '\f':
case '\v':
parser->current.end++;
space_seen = true;
break;
case '\r':
if (peek_at(parser, 1) == '\n') {
chomping = false;
} else {
parser->current.end++;
space_seen = true;
}
break;
case '\\':
if (peek_at(parser, 1) == '\n') {
yp_newline_list_append(&parser->newline_list, parser->current.end + 1);
parser->current.end += 2;
space_seen = true;
} else if (parser->current.end + 2 < parser->end && peek_at(parser, 1) == '\r' && peek_at(parser, 2) == '\n') {
yp_newline_list_append(&parser->newline_list, parser->current.end + 2);
parser->current.end += 3;
space_seen = true;
} else if (yp_char_is_inline_whitespace(*parser->current.end)) {
parser->current.end += 2;
} else {
chomping = false;
}
break;
default:
chomping = false;
break;
}
}
// Next, we'll set to start of this token to be the current end.
parser->current.start = parser->current.end;
// We'll check if we're at the end of the file. If we are, then we need to
// return the EOF token.
if (parser->current.end >= parser->end) {
LEX(YP_TOKEN_EOF);
}
// Finally, we'll check the current character to determine the next token.
switch (*parser->current.end++) {
case '\0': // NUL or end of script
case '\004': // ^D
case '\032': // ^Z
parser->current.end--;
LEX(YP_TOKEN_EOF);
case '#': { // comments
const char *ending = next_newline(parser->current.end, parser->end - parser->current.end);
while (ending && ending < parser->end && *ending != '\n') {
ending = next_newline(ending + 1, parser->end - ending);
}
parser->current.end = ending == NULL ? parser->end : ending + 1;
parser->current.type = YP_TOKEN_COMMENT;
parser_lex_callback(parser);
// If we found a comment while lexing, then we're going to add it to the
// list of comments in the file and keep lexing.
yp_comment_t *comment = parser_comment(parser, YP_COMMENT_INLINE);
yp_list_append(&parser->comment_list, (yp_list_node_t *) comment);
if (parser->current.start == parser->encoding_comment_start) {
parser_lex_encoding_comment(parser);
}
lexed_comment = true;
}
/* fallthrough */
case '\r': {
// The only way you can have carriage returns in this particular loop
// is if you have a carriage return followed by a newline. In that
// case we'll just skip over the carriage return and continue lexing,
// in order to make it so that the newline token encapsulates both the
// carriage return and the newline. Note that we need to check that
// we haven't already lexed a comment here because that falls through
// into here as well.
if (!lexed_comment) parser->current.end++;
}
/* fallthrough */
case '\n': {
if (parser->heredoc_end == NULL) {
yp_newline_list_append(&parser->newline_list, parser->current.end - 1);
} else {
parser_flush_heredoc_end(parser);
}
// If this is an ignored newline, then we can continue lexing after
// calling the callback with the ignored newline token.
switch (lex_state_ignored_p(parser)) {
case YP_IGNORED_NEWLINE_NONE:
break;
case YP_IGNORED_NEWLINE_PATTERN:
if (parser->pattern_matching_newlines || parser->in_keyword_arg) {
if (!lexed_comment) parser_lex_ignored_newline(parser);
lex_state_set(parser, YP_LEX_STATE_BEG);
parser->command_start = true;
parser->current.type = YP_TOKEN_NEWLINE;
return;
}
/* fallthrough */
case YP_IGNORED_NEWLINE_ALL:
if (!lexed_comment) parser_lex_ignored_newline(parser);
lexed_comment = false;
goto lex_next_token;
}
// Here we need to look ahead and see if there is a call operator
// (either . or &.) that starts the next line. If there is, then this
// is going to become an ignored newline and we're going to instead
// return the call operator.
const char *next_content = parser->next_start == NULL ? parser->current.end : parser->next_start;
next_content += yp_strspn_inline_whitespace(next_content, parser->end - next_content);
if (next_content < parser->end) {
// If we hit a comment after a newline, then we're going to check
// if it's ignored or if it's followed by a method call ('.').
// If it is, then we're going to call the
// callback with an ignored newline and then continue lexing.
// Otherwise we'll return a regular newline.
if (next_content[0] == '#') {
// Here we look for a "." or "&." following a "\n".
const char *following = next_newline(next_content, parser->end - next_content);
while (following && (following < parser->end)) {
following++;
following += yp_strspn_inline_whitespace(following, parser->end - following);
// If this is not followed by a comment, then we can break out
// of this loop.
if (*following != '#') break;
// If there is a comment, then we need to find the end of the
// comment and continue searching from there.
following = next_newline(following, parser->end - following);
}
// If the lex state was ignored, or we hit a '.' or a '&.',
// we will lex the ignored newline
if (lex_state_ignored_p(parser) || (following && ((following[0] == '.') || (following + 1 < parser->end && following[0] == '&' && following[1] == '.')))) {
if (!lexed_comment) parser_lex_ignored_newline(parser);
lexed_comment = false;
goto lex_next_token;
}
}
// If we hit a . after a newline, then we're in a call chain and
// we need to return the call operator.
if (next_content[0] == '.') {
// To match ripper, we need to emit an ignored newline even though
// its a real newline in the case that we have a beginless range
// on a subsequent line.
if ((next_content + 1 < parser->end) && (next_content[1] == '.')) {
if (!lexed_comment) parser_lex_ignored_newline(parser);
lex_state_set(parser, YP_LEX_STATE_BEG);
parser->command_start = true;
parser->current.type = YP_TOKEN_NEWLINE;
return;
}
if (!lexed_comment) parser_lex_ignored_newline(parser);
lex_state_set(parser, YP_LEX_STATE_DOT);
parser->current.start = next_content;
parser->current.end = next_content + 1;
parser->next_start = NULL;
LEX(YP_TOKEN_DOT);
}
// If we hit a &. after a newline, then we're in a call chain and
// we need to return the call operator.
if (next_content + 1 < parser->end && next_content[0] == '&' && next_content[1] == '.') {
if (!lexed_comment) parser_lex_ignored_newline(parser);
lex_state_set(parser, YP_LEX_STATE_DOT);
parser->current.start = next_content;
parser->current.end = next_content + 2;
parser->next_start = NULL;
LEX(YP_TOKEN_AMPERSAND_DOT);
}
}
// At this point we know this is a regular newline, and we can set the
// necessary state and return the token.
lex_state_set(parser, YP_LEX_STATE_BEG);
parser->command_start = true;
parser->current.type = YP_TOKEN_NEWLINE;
if (!lexed_comment) parser_lex_callback(parser);
return;
}
// ,
case ',':
lex_state_set(parser, YP_LEX_STATE_BEG | YP_LEX_STATE_LABEL);
LEX(YP_TOKEN_COMMA);
// (
case '(': {
yp_token_type_t type = YP_TOKEN_PARENTHESIS_LEFT;
if (space_seen && (lex_state_arg_p(parser) || parser->lex_state == (YP_LEX_STATE_END | YP_LEX_STATE_LABEL))) {
type = YP_TOKEN_PARENTHESIS_LEFT_PARENTHESES;
}
parser->enclosure_nesting++;
lex_state_set(parser, YP_LEX_STATE_BEG | YP_LEX_STATE_LABEL);
yp_do_loop_stack_push(parser, false);
LEX(type);
}
// )
case ')':
parser->enclosure_nesting--;
lex_state_set(parser, YP_LEX_STATE_ENDFN);
yp_do_loop_stack_pop(parser);
LEX(YP_TOKEN_PARENTHESIS_RIGHT);
// ;
case ';':
lex_state_set(parser, YP_LEX_STATE_BEG);
parser->command_start = true;
LEX(YP_TOKEN_SEMICOLON);
// [ [] []=
case '[':
parser->enclosure_nesting++;
yp_token_type_t type = YP_TOKEN_BRACKET_LEFT;
if (lex_state_operator_p(parser)) {
if (match(parser, ']')) {
parser->enclosure_nesting--;
lex_state_set(parser, YP_LEX_STATE_ARG);
LEX(match(parser, '=') ? YP_TOKEN_BRACKET_LEFT_RIGHT_EQUAL : YP_TOKEN_BRACKET_LEFT_RIGHT);
}
lex_state_set(parser, YP_LEX_STATE_ARG | YP_LEX_STATE_LABEL);
LEX(type);
}
if (lex_state_beg_p(parser) || (lex_state_arg_p(parser) && (space_seen || lex_state_p(parser, YP_LEX_STATE_LABELED)))) {
type = YP_TOKEN_BRACKET_LEFT_ARRAY;
}
lex_state_set(parser, YP_LEX_STATE_BEG | YP_LEX_STATE_LABEL);
yp_do_loop_stack_push(parser, false);
LEX(type);
// ]
case ']':
parser->enclosure_nesting--;
lex_state_set(parser, YP_LEX_STATE_END);
yp_do_loop_stack_pop(parser);
LEX(YP_TOKEN_BRACKET_RIGHT);
// {
case '{': {
yp_token_type_t type = YP_TOKEN_BRACE_LEFT;
if (parser->enclosure_nesting == parser->lambda_enclosure_nesting) {
// This { begins a lambda
parser->command_start = true;
lex_state_set(parser, YP_LEX_STATE_BEG);
type = YP_TOKEN_LAMBDA_BEGIN;
} else if (lex_state_p(parser, YP_LEX_STATE_LABELED)) {
// This { begins a hash literal
lex_state_set(parser, YP_LEX_STATE_BEG | YP_LEX_STATE_LABEL);
} else if (lex_state_p(parser, YP_LEX_STATE_ARG_ANY | YP_LEX_STATE_END | YP_LEX_STATE_ENDFN)) {
// This { begins a block
parser->command_start = true;
lex_state_set(parser, YP_LEX_STATE_BEG);
} else if (lex_state_p(parser, YP_LEX_STATE_ENDARG)) {
// This { begins a block on a command
parser->command_start = true;
lex_state_set(parser, YP_LEX_STATE_BEG);
} else {
// This { begins a hash literal
lex_state_set(parser, YP_LEX_STATE_BEG | YP_LEX_STATE_LABEL);
}
parser->enclosure_nesting++;
parser->brace_nesting++;
yp_do_loop_stack_push(parser, false);
LEX(type);
}
// }
case '}':
parser->enclosure_nesting--;
yp_do_loop_stack_pop(parser);
if ((parser->lex_modes.current->mode == YP_LEX_EMBEXPR) && (parser->brace_nesting == 0)) {
lex_mode_pop(parser);
LEX(YP_TOKEN_EMBEXPR_END);
}
parser->brace_nesting--;
lex_state_set(parser, YP_LEX_STATE_END);
LEX(YP_TOKEN_BRACE_RIGHT);
// * ** **= *=
case '*': {
if (match(parser, '*')) {
if (match(parser, '=')) {
lex_state_set(parser, YP_LEX_STATE_BEG);
LEX(YP_TOKEN_STAR_STAR_EQUAL);
}
yp_token_type_t type = YP_TOKEN_STAR_STAR;
if (lex_state_spcarg_p(parser, space_seen) || lex_state_beg_p(parser)) {
type = YP_TOKEN_USTAR_STAR;
}
if (lex_state_operator_p(parser)) {
lex_state_set(parser, YP_LEX_STATE_ARG);
} else {
lex_state_set(parser, YP_LEX_STATE_BEG);
}
LEX(type);
}
if (match(parser, '=')) {
lex_state_set(parser, YP_LEX_STATE_BEG);
LEX(YP_TOKEN_STAR_EQUAL);
}
yp_token_type_t type = YP_TOKEN_STAR;
if (lex_state_spcarg_p(parser, space_seen)) {
yp_diagnostic_list_append(&parser->warning_list, parser->current.start, parser->current.end, "`*' interpreted as argument prefix");
type = YP_TOKEN_USTAR;
} else if (lex_state_beg_p(parser)) {
type = YP_TOKEN_USTAR;
}
if (lex_state_operator_p(parser)) {
lex_state_set(parser, YP_LEX_STATE_ARG);
} else {
lex_state_set(parser, YP_LEX_STATE_BEG);
}
LEX(type);
}
// ! != !~ !@
case '!':
if (lex_state_operator_p(parser)) {
lex_state_set(parser, YP_LEX_STATE_ARG);
if (match(parser, '@')) {
LEX(YP_TOKEN_BANG);
}
} else {
lex_state_set(parser, YP_LEX_STATE_BEG);
}
if (match(parser, '=')) {
LEX(YP_TOKEN_BANG_EQUAL);
}
if (match(parser, '~')) {
LEX(YP_TOKEN_BANG_TILDE);
}
LEX(YP_TOKEN_BANG);
// = => =~ == === =begin
case '=':
if (current_token_starts_line(parser) && strncmp(peek_string(parser, 5), "begin", 5) == 0 && yp_char_is_whitespace(peek_at(parser, 5))) {
yp_token_type_t type = lex_embdoc(parser);
if (type == YP_TOKEN_EOF) {
LEX(type);
}
goto lex_next_token;
}
if (lex_state_operator_p(parser)) {
lex_state_set(parser, YP_LEX_STATE_ARG);
} else {
lex_state_set(parser, YP_LEX_STATE_BEG);
}
if (match(parser, '>')) {
LEX(YP_TOKEN_EQUAL_GREATER);
}
if (match(parser, '~')) {
LEX(YP_TOKEN_EQUAL_TILDE);
}
if (match(parser, '=')) {
LEX(match(parser, '=') ? YP_TOKEN_EQUAL_EQUAL_EQUAL : YP_TOKEN_EQUAL_EQUAL);
}
LEX(YP_TOKEN_EQUAL);
// < << <<= <= <=>
case '<':
if (match(parser, '<')) {
if (
!lex_state_p(parser, YP_LEX_STATE_DOT | YP_LEX_STATE_CLASS) &&
!lex_state_end_p(parser) &&
(!lex_state_p(parser, YP_LEX_STATE_ARG_ANY) || lex_state_p(parser, YP_LEX_STATE_LABELED) || space_seen)
) {
const char *end = parser->current.end;
yp_heredoc_quote_t quote = YP_HEREDOC_QUOTE_NONE;
yp_heredoc_indent_t indent = YP_HEREDOC_INDENT_NONE;
if (match(parser, '-')) {
indent = YP_HEREDOC_INDENT_DASH;
}
else if (match(parser, '~')) {
indent = YP_HEREDOC_INDENT_TILDE;
}
if (match(parser, '`')) {
quote = YP_HEREDOC_QUOTE_BACKTICK;
}
else if (match(parser, '"')) {
quote = YP_HEREDOC_QUOTE_DOUBLE;
}
else if (match(parser, '\'')) {
quote = YP_HEREDOC_QUOTE_SINGLE;
}
const char *ident_start = parser->current.end;
size_t width = 0;
if (parser->current.end >= parser->end) {
parser->current.end = end;
} else if (quote == YP_HEREDOC_QUOTE_NONE && (width = char_is_identifier(parser, parser->current.end)) == 0) {
parser->current.end = end;
} else {
if (quote == YP_HEREDOC_QUOTE_NONE) {
parser->current.end += width;
while ((parser->current.end < parser->end) && (width = char_is_identifier(parser, parser->current.end))) {
parser->current.end += width;
}
} else {
// If we have quotes, then we're going to go until we find the
// end quote.
while ((parser->current.end < parser->end) && quote != (yp_heredoc_quote_t) (*parser->current.end)) {
parser->current.end++;
}
}
size_t ident_length = (size_t) (parser->current.end - ident_start);
if (quote != YP_HEREDOC_QUOTE_NONE && !match(parser, quote)) {
// TODO: handle unterminated heredoc
}
lex_mode_push(parser, (yp_lex_mode_t) {
.mode = YP_LEX_HEREDOC,
.as.heredoc = {
.ident_start = ident_start,
.ident_length = ident_length,
.next_start = parser->current.end,
.quote = quote,
.indent = indent
}
});
if (parser->heredoc_end == NULL) {
const char *body_start = next_newline(parser->current.end, parser->end - parser->current.end);
if (body_start == NULL) {
// If there is no newline after the heredoc identifier, then
// this is not a valid heredoc declaration. In this case we
// will add an error, but we will still return a heredoc
// start.
yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, "Unterminated heredoc.");
body_start = parser->end;
} else {
// Otherwise, we want to indicate that the body of the
// heredoc starts on the character after the next newline.
yp_newline_list_append(&parser->newline_list, body_start);
body_start++;
}
parser->next_start = body_start;
} else {
parser->next_start = parser->heredoc_end;
}
LEX(YP_TOKEN_HEREDOC_START);
}
}
if (match(parser, '=')) {
lex_state_set(parser, YP_LEX_STATE_BEG);
LEX(YP_TOKEN_LESS_LESS_EQUAL);
}
if (lex_state_operator_p(parser)) {
lex_state_set(parser, YP_LEX_STATE_ARG);
} else {
if (lex_state_p(parser, YP_LEX_STATE_CLASS)) parser->command_start = true;
lex_state_set(parser, YP_LEX_STATE_BEG);
}
LEX(YP_TOKEN_LESS_LESS);
}
if (lex_state_operator_p(parser)) {
lex_state_set(parser, YP_LEX_STATE_ARG);
} else {
if (lex_state_p(parser, YP_LEX_STATE_CLASS)) parser->command_start = true;
lex_state_set(parser, YP_LEX_STATE_BEG);
}
if (match(parser, '=')) {
if (match(parser, '>')) {
LEX(YP_TOKEN_LESS_EQUAL_GREATER);
}
LEX(YP_TOKEN_LESS_EQUAL);
}
LEX(YP_TOKEN_LESS);
// > >> >>= >=
case '>':
if (match(parser, '>')) {
if (lex_state_operator_p(parser)) {
lex_state_set(parser, YP_LEX_STATE_ARG);
} else {
lex_state_set(parser, YP_LEX_STATE_BEG);
}
LEX(match(parser, '=') ? YP_TOKEN_GREATER_GREATER_EQUAL : YP_TOKEN_GREATER_GREATER);
}
if (lex_state_operator_p(parser)) {
lex_state_set(parser, YP_LEX_STATE_ARG);
} else {
lex_state_set(parser, YP_LEX_STATE_BEG);
}
LEX(match(parser, '=') ? YP_TOKEN_GREATER_EQUAL : YP_TOKEN_GREATER);
// double-quoted string literal
case '"': {
bool label_allowed = (lex_state_p(parser, YP_LEX_STATE_LABEL | YP_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser);
lex_mode_push_string(parser, true, label_allowed, '\0', '"');
LEX(YP_TOKEN_STRING_BEGIN);
}
// xstring literal
case '`': {
if (lex_state_p(parser, YP_LEX_STATE_FNAME)) {
lex_state_set(parser, YP_LEX_STATE_ENDFN);
LEX(YP_TOKEN_BACKTICK);
}
if (lex_state_p(parser, YP_LEX_STATE_DOT)) {
if (previous_command_start) {
lex_state_set(parser, YP_LEX_STATE_CMDARG);
} else {
lex_state_set(parser, YP_LEX_STATE_ARG);
}
LEX(YP_TOKEN_BACKTICK);
}
lex_mode_push_string(parser, true, false, '\0', '`');
LEX(YP_TOKEN_BACKTICK);
}
// single-quoted string literal
case '\'': {
bool label_allowed = (lex_state_p(parser, YP_LEX_STATE_LABEL | YP_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser);
lex_mode_push_string(parser, false, label_allowed, '\0', '\'');
LEX(YP_TOKEN_STRING_BEGIN);
}
// ? character literal
case '?':
LEX(lex_question_mark(parser));
// & && &&= &=
case '&':
if (match(parser, '&')) {
lex_state_set(parser, YP_LEX_STATE_BEG);
if (match(parser, '=')) {
LEX(YP_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
}
LEX(YP_TOKEN_AMPERSAND_AMPERSAND);
}
if (match(parser, '=')) {
lex_state_set(parser, YP_LEX_STATE_BEG);
LEX(YP_TOKEN_AMPERSAND_EQUAL);
}
if (match(parser, '.')) {
lex_state_set(parser, YP_LEX_STATE_DOT);
LEX(YP_TOKEN_AMPERSAND_DOT);
}
if (lex_state_operator_p(parser)) {
lex_state_set(parser, YP_LEX_STATE_ARG);
} else {
lex_state_set(parser, YP_LEX_STATE_BEG);
}
LEX(YP_TOKEN_AMPERSAND);
// | || ||= |=
case '|':
if (match(parser, '|')) {
if (match(parser, '=')) {
lex_state_set(parser, YP_LEX_STATE_BEG);
LEX(YP_TOKEN_PIPE_PIPE_EQUAL);
}
if (lex_state_p(parser, YP_LEX_STATE_BEG)) {
parser->current.end--;
LEX(YP_TOKEN_PIPE);
}
lex_state_set(parser, YP_LEX_STATE_BEG);
LEX(YP_TOKEN_PIPE_PIPE);
}
if (match(parser, '=')) {
lex_state_set(parser, YP_LEX_STATE_BEG);
LEX(YP_TOKEN_PIPE_EQUAL);
}
if (lex_state_operator_p(parser)) {
lex_state_set(parser, YP_LEX_STATE_ARG);
} else {
lex_state_set(parser, YP_LEX_STATE_BEG | YP_LEX_STATE_LABEL);
}
LEX(YP_TOKEN_PIPE);
// + += +@
case '+': {
if (lex_state_operator_p(parser)) {
lex_state_set(parser, YP_LEX_STATE_ARG);
if (match(parser, '@')) {
LEX(YP_TOKEN_UPLUS);
}
LEX(YP_TOKEN_PLUS);
}
if (match(parser, '=')) {
lex_state_set(parser, YP_LEX_STATE_BEG);
LEX(YP_TOKEN_PLUS_EQUAL);
}
bool spcarg = lex_state_spcarg_p(parser, space_seen);
if (spcarg) {
yp_diagnostic_list_append(
&parser->warning_list,
parser->current.start,
parser->current.end,
"ambiguous first argument; put parentheses or a space even after `+` operator"
);
}
if (lex_state_beg_p(parser) || spcarg) {
lex_state_set(parser, YP_LEX_STATE_BEG);
if (yp_char_is_decimal_digit(peek(parser))) {
parser->current.end++;
yp_token_type_t type = lex_numeric(parser);
lex_state_set(parser, YP_LEX_STATE_END);
LEX(type);
}
LEX(YP_TOKEN_UPLUS);
}
lex_state_set(parser, YP_LEX_STATE_BEG);
LEX(YP_TOKEN_PLUS);
}
// - -= -@
case '-': {
if (lex_state_operator_p(parser)) {
lex_state_set(parser, YP_LEX_STATE_ARG);
if (match(parser, '@')) {
LEX(YP_TOKEN_UMINUS);
}
LEX(YP_TOKEN_MINUS);
}
if (match(parser, '=')) {
lex_state_set(parser, YP_LEX_STATE_BEG);
LEX(YP_TOKEN_MINUS_EQUAL);
}
if (match(parser, '>')) {
lex_state_set(parser, YP_LEX_STATE_ENDFN);
LEX(YP_TOKEN_MINUS_GREATER);
}
bool spcarg = lex_state_spcarg_p(parser, space_seen);
if (spcarg) {
yp_diagnostic_list_append(
&parser->warning_list,
parser->current.start,
parser->current.end,
"ambiguous first argument; put parentheses or a space even after `-` operator"
);
}
if (lex_state_beg_p(parser) || spcarg) {
lex_state_set(parser, YP_LEX_STATE_BEG);
LEX(yp_char_is_decimal_digit(peek(parser)) ? YP_TOKEN_UMINUS_NUM : YP_TOKEN_UMINUS);
}
lex_state_set(parser, YP_LEX_STATE_BEG);
LEX(YP_TOKEN_MINUS);
}
// . .. ...
case '.': {
bool beg_p = lex_state_beg_p(parser);
if (match(parser, '.')) {
if (match(parser, '.')) {
// If we're _not_ inside a range within default parameters
if (
!context_p(parser, YP_CONTEXT_DEFAULT_PARAMS) &&
context_p(parser, YP_CONTEXT_DEF_PARAMS)
) {
if (lex_state_p(parser, YP_LEX_STATE_END)) {
lex_state_set(parser, YP_LEX_STATE_BEG);
} else {
lex_state_set(parser, YP_LEX_STATE_ENDARG);
}
LEX(YP_TOKEN_UDOT_DOT_DOT);
}
lex_state_set(parser, YP_LEX_STATE_BEG);
LEX(beg_p ? YP_TOKEN_UDOT_DOT_DOT : YP_TOKEN_DOT_DOT_DOT);
}
lex_state_set(parser, YP_LEX_STATE_BEG);
LEX(beg_p ? YP_TOKEN_UDOT_DOT : YP_TOKEN_DOT_DOT);
}
lex_state_set(parser, YP_LEX_STATE_DOT);
LEX(YP_TOKEN_DOT);
}
// integer
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9': {
yp_token_type_t type = lex_numeric(parser);
lex_state_set(parser, YP_LEX_STATE_END);
LEX(type);
}
// :: symbol
case ':':
if (match(parser, ':')) {
if (lex_state_beg_p(parser) || lex_state_p(parser, YP_LEX_STATE_CLASS) || (lex_state_p(parser, YP_LEX_STATE_ARG_ANY) && space_seen)) {
lex_state_set(parser, YP_LEX_STATE_BEG);
LEX(YP_TOKEN_UCOLON_COLON);
}
lex_state_set(parser, YP_LEX_STATE_DOT);
LEX(YP_TOKEN_COLON_COLON);
}
if (lex_state_end_p(parser) || yp_char_is_whitespace(*parser->current.end) || (*parser->current.end == '#')) {
lex_state_set(parser, YP_LEX_STATE_BEG);
LEX(YP_TOKEN_COLON);
}
if ((*parser->current.end == '"') || (*parser->current.end == '\'')) {
lex_mode_push_string(parser, *parser->current.end == '"', false, '\0', *parser->current.end);
parser->current.end++;
}
lex_state_set(parser, YP_LEX_STATE_FNAME);
LEX(YP_TOKEN_SYMBOL_BEGIN);
// / /=
case '/':
if (lex_state_beg_p(parser)) {
lex_mode_push_regexp(parser, '\0', '/');
LEX(YP_TOKEN_REGEXP_BEGIN);
}
if (match(parser, '=')) {
lex_state_set(parser, YP_LEX_STATE_BEG);
LEX(YP_TOKEN_SLASH_EQUAL);
}
if (lex_state_spcarg_p(parser, space_seen)) {
yp_diagnostic_list_append(&parser->warning_list, parser->current.start, parser->current.end, "ambiguity between regexp and two divisions: wrap regexp in parentheses or add a space after `/' operator");
lex_mode_push_regexp(parser, '\0', '/');
LEX(YP_TOKEN_REGEXP_BEGIN);
}
if (lex_state_operator_p(parser)) {
lex_state_set(parser, YP_LEX_STATE_ARG);
} else {
lex_state_set(parser, YP_LEX_STATE_BEG);
}
LEX(YP_TOKEN_SLASH);
// ^ ^=
case '^':
if (lex_state_operator_p(parser)) {
lex_state_set(parser, YP_LEX_STATE_ARG);
} else {
lex_state_set(parser, YP_LEX_STATE_BEG);
}
LEX(match(parser, '=') ? YP_TOKEN_CARET_EQUAL : YP_TOKEN_CARET);
// ~ ~@
case '~':
if (lex_state_operator_p(parser)) {
(void) match(parser, '@');
lex_state_set(parser, YP_LEX_STATE_ARG);
} else {
lex_state_set(parser, YP_LEX_STATE_BEG);
}
LEX(YP_TOKEN_TILDE);
// % %= %i %I %q %Q %w %W
case '%': {
// In a BEG state, if you encounter a % then you must be starting
// something. In this case if there is no subsequent character then
// we have an invalid token.
if (lex_state_beg_p(parser) && (parser->current.end >= parser->end)) {
yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, "unexpected end of input");
LEX(YP_TOKEN_STRING_BEGIN);
}
if (!lex_state_beg_p(parser) && match(parser, '=')) {
lex_state_set(parser, YP_LEX_STATE_BEG);
LEX(YP_TOKEN_PERCENT_EQUAL);
}
else if(
lex_state_beg_p(parser) ||
(lex_state_p(parser, YP_LEX_STATE_FITEM) && (*parser->current.end == 's')) ||
lex_state_spcarg_p(parser, space_seen)
) {
if (!parser->encoding.alnum_char(parser->current.end)) {
lex_mode_push_string(parser, true, false, lex_mode_incrementor(*parser->current.end), lex_mode_terminator(*parser->current.end));
if (*parser->current.end == '\r') {
parser->current.end++;
}
if (*parser->current.end == '\n') {
yp_newline_list_append(&parser->newline_list, parser->current.end);
}
parser->current.end++;
LEX(YP_TOKEN_STRING_BEGIN);
}
switch (*parser->current.end) {
case 'i': {
parser->current.end++;
lex_mode_push_list(parser, false, *parser->current.end++);
LEX(YP_TOKEN_PERCENT_LOWER_I);
}
case 'I': {
parser->current.end++;
lex_mode_push_list(parser, true, *parser->current.end++);
LEX(YP_TOKEN_PERCENT_UPPER_I);
}
case 'r': {
parser->current.end++;
lex_mode_push_regexp(parser, lex_mode_incrementor(*parser->current.end), lex_mode_terminator(*parser->current.end));
parser->current.end++;
LEX(YP_TOKEN_REGEXP_BEGIN);
}
case 'q': {
parser->current.end++;
lex_mode_push_string(parser, false, false, lex_mode_incrementor(*parser->current.end), lex_mode_terminator(*parser->current.end));
parser->current.end++;
LEX(YP_TOKEN_STRING_BEGIN);
}
case 'Q': {
parser->current.end++;
lex_mode_push_string(parser, true, false, lex_mode_incrementor(*parser->current.end), lex_mode_terminator(*parser->current.end));
parser->current.end++;
LEX(YP_TOKEN_STRING_BEGIN);
}
case 's': {
parser->current.end++;
lex_mode_push_string(parser, false, false, lex_mode_incrementor(*parser->current.end), lex_mode_terminator(*parser->current.end));
lex_state_set(parser, YP_LEX_STATE_FNAME | YP_LEX_STATE_FITEM);
parser->current.end++;
LEX(YP_TOKEN_SYMBOL_BEGIN);
}
case 'w': {
parser->current.end++;
lex_mode_push_list(parser, false, *parser->current.end++);
LEX(YP_TOKEN_PERCENT_LOWER_W);
}
case 'W': {
parser->current.end++;
lex_mode_push_list(parser, true, *parser->current.end++);
LEX(YP_TOKEN_PERCENT_UPPER_W);
}
case 'x': {
parser->current.end++;
lex_mode_push_string(parser, true, false, lex_mode_incrementor(*parser->current.end), lex_mode_terminator(*parser->current.end));
parser->current.end++;
LEX(YP_TOKEN_PERCENT_LOWER_X);
}
default:
// If we get to this point, then we have a % that is completely
// unparseable. In this case we'll just drop it from the parser
// and skip past it and hope that the next token is something
// that we can parse.
yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, "invalid %% token");
goto lex_next_token;
}
}
lex_state_set(parser, lex_state_operator_p(parser) ? YP_LEX_STATE_ARG : YP_LEX_STATE_BEG);
LEX(YP_TOKEN_PERCENT);
}
// global variable
case '$': {
yp_token_type_t type = lex_global_variable(parser);
// If we're lexing an embedded variable, then we need to pop back into
// the parent lex context.
if (parser->lex_modes.current->mode == YP_LEX_EMBVAR) {
lex_mode_pop(parser);
}
lex_state_set(parser, YP_LEX_STATE_END);
LEX(type);
}
// instance variable, class variable
case '@':
lex_state_set(parser, parser->lex_state & YP_LEX_STATE_FNAME ? YP_LEX_STATE_ENDFN : YP_LEX_STATE_END);
LEX(lex_at_variable(parser));
default: {
if (*parser->current.start != '_') {
size_t width = char_is_identifier_start(parser, parser->current.start);
// If this isn't the beginning of an identifier, then it's an invalid
// token as we've exhausted all of the other options. We'll skip past
// it and return the next token.
if (!width) {
yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, "Invalid token.");
goto lex_next_token;
}
parser->current.end = parser->current.start + width;
}
yp_token_type_t type = lex_identifier(parser, previous_command_start);
// If we've hit a __END__ and it was at the start of the line or the
// start of the file and it is followed by either a \n or a \r\n, then
// this is the last token of the file.
if (
((parser->current.end - parser->current.start) == 7) &&
current_token_starts_line(parser) &&
(strncmp(parser->current.start, "__END__", 7) == 0) &&
(*parser->current.end == '\n' || (*parser->current.end == '\r' && parser->current.end[1] == '\n'))
) {
parser->current.end = parser->end;
parser->current.type = YP_TOKEN___END__;
parser_lex_callback(parser);
yp_comment_t *comment = parser_comment(parser, YP_COMMENT___END__);
yp_list_append(&parser->comment_list, (yp_list_node_t *) comment);
LEX(YP_TOKEN_EOF);
}
yp_lex_state_t last_state = parser->lex_state;
if (type == YP_TOKEN_IDENTIFIER || type == YP_TOKEN_CONSTANT) {
if (lex_state_p(parser, YP_LEX_STATE_BEG_ANY | YP_LEX_STATE_ARG_ANY | YP_LEX_STATE_DOT)) {
if (previous_command_start) {
lex_state_set(parser, YP_LEX_STATE_CMDARG);
} else {
lex_state_set(parser, YP_LEX_STATE_ARG);
}
} else if (parser->lex_state == YP_LEX_STATE_FNAME) {
lex_state_set(parser, YP_LEX_STATE_ENDFN);
} else {
lex_state_set(parser, YP_LEX_STATE_END);
}
}
if (
!(last_state & (YP_LEX_STATE_DOT | YP_LEX_STATE_FNAME)) &&
(type == YP_TOKEN_IDENTIFIER) &&
((yp_parser_local_depth(parser, &parser->current) != -1) ||
token_is_numbered_parameter(parser->current.start, parser->current.end))
) {
lex_state_set(parser, YP_LEX_STATE_END | YP_LEX_STATE_LABEL);
}
LEX(type);
}
}
}
case YP_LEX_LIST:
if (parser->next_start != NULL) {
parser->current.end = parser->next_start;
parser->next_start = NULL;
}
// First we'll set the beginning of the token.
parser->current.start = parser->current.end;
// If there's any whitespace at the start of the list, then we're
// going to trim it off the beginning and create a new token.
size_t whitespace;
bool should_stop = parser->heredoc_end;
if ((whitespace = yp_strspn_whitespace_newlines(parser->current.end, parser->end - parser->current.end, &parser->newline_list, should_stop)) > 0) {
parser->current.end += whitespace;
if (parser->current.end[-1] == '\n') {
// mutates next_start
parser_flush_heredoc_end(parser);
}
LEX(YP_TOKEN_WORDS_SEP);
}
// We'll check if we're at the end of the file. If we are, then we
// need to return the EOF token.
if (parser->current.end >= parser->end) {
LEX(YP_TOKEN_EOF);
}
// Here we'll get a list of the places where strpbrk should break,
// and then find the first one.
const char *breakpoints = parser->lex_modes.current->as.list.breakpoints;
const char *breakpoint = yp_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
while (breakpoint != NULL) {
switch (*breakpoint) {
case '\0':
// If we hit a null byte, skip directly past it.
breakpoint = yp_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
break;
case '\\': {
// If we hit escapes, then we need to treat the next token
// literally. In this case we'll skip past the next character and
// find the next breakpoint.
yp_unescape_type_t unescape_type;
if (parser->lex_modes.current->as.list.interpolation) {
unescape_type = YP_UNESCAPE_ALL;
} else {
unescape_type = YP_UNESCAPE_MINIMAL;
}
size_t difference = yp_unescape_calculate_difference(breakpoint, parser->end, unescape_type, false, &parser->error_list);
// If the result is an escaped newline, then we need to
// track that newline.
if (breakpoint[difference - 1] == '\n') {
yp_newline_list_append(&parser->newline_list, breakpoint + difference - 1);
}
breakpoint = yp_strpbrk(parser, breakpoint + difference, breakpoints, parser->end - (breakpoint + difference));
break;
}
case ' ':
case '\t':
case '\f':
case '\r':
case '\v':
case '\n':
// If we've hit whitespace, then we must have received content by
// now, so we can return an element of the list.
parser->current.end = breakpoint;
LEX(YP_TOKEN_STRING_CONTENT);
case '#': {
// if # is the terminator, we need to fall into the default case
if (parser->lex_modes.current->as.list.terminator != '#') {
yp_token_type_t type = lex_interpolation(parser, breakpoint);
if (type != YP_TOKEN_NOT_PROVIDED) {
LEX(type);
}
// If we haven't returned at this point then we had something
// that looked like an interpolated class or instance variable
// like "#@" but wasn't actually. In this case we'll just skip
// to the next breakpoint.
breakpoint = yp_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
break;
}
}
/* fallthrough */
default:
if (*breakpoint == parser->lex_modes.current->as.list.incrementor) {
// If we've hit the incrementor, then we need to skip past it and
// find the next breakpoint.
breakpoint = yp_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
parser->lex_modes.current->as.list.nesting++;
break;
}
// In this case we've hit the terminator.
assert(*breakpoint == parser->lex_modes.current->as.list.terminator);
// If this terminator doesn't actually close the list, then we need
// to continue on past it.
if (parser->lex_modes.current->as.list.nesting > 0) {
breakpoint = yp_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
parser->lex_modes.current->as.list.nesting--;
break;
}
// If we've hit the terminator and we've already skipped past
// content, then we can return a list node.
if (breakpoint > parser->current.start) {
parser->current.end = breakpoint;
LEX(YP_TOKEN_STRING_CONTENT);
}
// Otherwise, switch back to the default state and return the end of
// the list.
parser->current.end = breakpoint + 1;
lex_mode_pop(parser);
lex_state_set(parser, YP_LEX_STATE_END);
LEX(YP_TOKEN_STRING_END);
}
}
// If we were unable to find a breakpoint, then this token hits the end of
// the file.
LEX(YP_TOKEN_EOF);
case YP_LEX_REGEXP: {
// First, we'll set to start of this token to be the current end.
parser->current.start = parser->current.end;
// We'll check if we're at the end of the file. If we are, then we need to
// return the EOF token.
if (parser->current.end >= parser->end) {
LEX(YP_TOKEN_EOF);
}
// These are the places where we need to split up the content of the
// regular expression. We'll use strpbrk to find the first of these
// characters.
const char *breakpoints = parser->lex_modes.current->as.regexp.breakpoints;
const char *breakpoint = yp_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
while (breakpoint != NULL) {
switch (*breakpoint) {
case '\0':
// If we hit a null byte, skip directly past it.
breakpoint = yp_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
break;
case '\\': {
// If we hit escapes, then we need to treat the next token
// literally. In this case we'll skip past the next character and
// find the next breakpoint.
size_t difference = yp_unescape_calculate_difference(breakpoint, parser->end, YP_UNESCAPE_ALL, false, &parser->error_list);
// If the result is an escaped newline, then we need to
// track that newline.
if (breakpoint[difference - 1] == '\n') {
yp_newline_list_append(&parser->newline_list, breakpoint + difference - 1);
}
breakpoint = yp_strpbrk(parser, breakpoint + difference, breakpoints, parser->end - (breakpoint + difference));
break;
}
case '#': {
yp_token_type_t type = lex_interpolation(parser, breakpoint);
if (type != YP_TOKEN_NOT_PROVIDED) {
LEX(type);
}
// We need to check if the terminator was # before skipping over
// to the next breakpoint
if (parser->lex_modes.current->as.regexp.terminator != '#') {
// If we haven't returned at this point then we had something
// that looked like an interpolated class or instance variable
// like "#@" but wasn't actually. In this case we'll just skip
// to the next breakpoint.
breakpoint = yp_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
break;
}
}
/* fallthrough */
default: {
if (*breakpoint == parser->lex_modes.current->as.regexp.incrementor) {
// If we've hit the incrementor, then we need to skip past it and
// find the next breakpoint.
breakpoint = yp_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
parser->lex_modes.current->as.regexp.nesting++;
break;
}
if (*breakpoint == '\n') {
// If we've hit a newline, then we need to track
// that in the list of newlines.
yp_newline_list_append(&parser->newline_list, breakpoint);
if (parser->lex_modes.current->as.regexp.terminator != '\n') {
// If the terminator is not a newline, then we
// can set the next breakpoint and continue.
breakpoint = yp_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
break;
}
// Otherwise, the newline character is the
// terminator so we need to continue on.
}
assert(*breakpoint == parser->lex_modes.current->as.regexp.terminator);
if (parser->lex_modes.current->as.regexp.nesting > 0) {
breakpoint = yp_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
parser->lex_modes.current->as.regexp.nesting--;
break;
}
// Here we've hit the terminator. If we have already consumed
// content then we need to return that content as string content
// first.
if (breakpoint > parser->current.start) {
parser->current.end = breakpoint;
LEX(YP_TOKEN_STRING_CONTENT);
}
// Since we've hit the terminator of the regular expression, we now
// need to parse the options.
parser->current.end = breakpoint + 1;
parser->current.end += yp_strspn_regexp_option(parser->current.end, parser->end - parser->current.end);
lex_mode_pop(parser);
lex_state_set(parser, YP_LEX_STATE_END);
LEX(YP_TOKEN_REGEXP_END);
}
}
}
// At this point, the breakpoint is NULL which means we were unable to
// find anything before the end of the file.
LEX(YP_TOKEN_EOF);
}
case YP_LEX_STRING: {
// First, we'll set to start of this token to be the current end.
if (parser->next_start == NULL) {
parser->current.start = parser->current.end;
} else {
parser->current.start = parser->next_start;
parser->current.end = parser->next_start;
parser->next_start = NULL;
}
// We'll check if we're at the end of the file. If we are, then we need to
// return the EOF token.
if (parser->current.end >= parser->end) {
LEX(YP_TOKEN_EOF);
}
// These are the places where we need to split up the content of the
// string. We'll use strpbrk to find the first of these characters.
const char *breakpoints = parser->lex_modes.current->as.string.breakpoints;
const char *breakpoint = yp_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
while (breakpoint != NULL) {
// If we hit the incrementor, then we'll increment then nesting and
// continue lexing.
if (
parser->lex_modes.current->as.string.incrementor != '\0' &&
*breakpoint == parser->lex_modes.current->as.string.incrementor
) {
parser->lex_modes.current->as.string.nesting++;
breakpoint = yp_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
continue;
}
// Note that we have to check the terminator here first because we could
// potentially be parsing a % string that has a # character as the
// terminator.
if (*breakpoint == parser->lex_modes.current->as.string.terminator) {
// If this terminator doesn't actually close the string, then we need
// to continue on past it.
if (parser->lex_modes.current->as.string.nesting > 0) {
breakpoint = yp_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
parser->lex_modes.current->as.string.nesting--;
continue;
}
// Here we've hit the terminator. If we have already consumed content
// then we need to return that content as string content first.
if (breakpoint > parser->current.start) {
parser->current.end = breakpoint;
LEX(YP_TOKEN_STRING_CONTENT);
}
// Otherwise we need to switch back to the parent lex mode and
// return the end of the string.
if (*parser->current.end == '\r' && parser->current.end + 1 < parser->end && parser->current.end[1] == '\n') {
parser->current.end = breakpoint + 2;
yp_newline_list_append(&parser->newline_list, breakpoint + 1);
} else {
if (*parser->current.end == '\n') {
yp_newline_list_append(&parser->newline_list, parser->current.end);
}
parser->current.end = breakpoint + 1;
}
if (
parser->lex_modes.current->as.string.label_allowed &&
(peek(parser) == ':') &&
(peek_at(parser, 1) != ':')
) {
parser->current.end++;
lex_state_set(parser, YP_LEX_STATE_ARG | YP_LEX_STATE_LABELED);
lex_mode_pop(parser);
LEX(YP_TOKEN_LABEL_END);
}
lex_state_set(parser, YP_LEX_STATE_END);
lex_mode_pop(parser);
LEX(YP_TOKEN_STRING_END);
}
// When we hit a newline, we need to flush any potential heredocs. Note
// that this has to happen after we check for the terminator in case the
// terminator is a newline character.
if (*breakpoint == '\n') {
if (parser->heredoc_end == NULL) {
yp_newline_list_append(&parser->newline_list, breakpoint);
breakpoint = yp_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
continue;
} else {
parser->current.end = breakpoint + 1;
parser_flush_heredoc_end(parser);
LEX(YP_TOKEN_STRING_CONTENT);
}
}
switch (*breakpoint) {
case '\0':
// Skip directly past the null character.
breakpoint = yp_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
break;
case '\\': {
// If we hit escapes, then we need to treat the next token
// literally. In this case we'll skip past the next character and
// find the next breakpoint.
yp_unescape_type_t unescape_type = parser->lex_modes.current->as.string.interpolation ? YP_UNESCAPE_ALL : YP_UNESCAPE_MINIMAL;
size_t difference = yp_unescape_calculate_difference(breakpoint, parser->end, unescape_type, false, &parser->error_list);
// If the result is an escaped newline, then we need to
// track that newline.
if (breakpoint[difference - 1] == '\n') {
yp_newline_list_append(&parser->newline_list, breakpoint + difference - 1);
}
breakpoint = yp_strpbrk(parser, breakpoint + difference, breakpoints, parser->end - (breakpoint + difference));
break;
}
case '#': {
yp_token_type_t type = lex_interpolation(parser, breakpoint);
if (type != YP_TOKEN_NOT_PROVIDED) {
LEX(type);
}
// If we haven't returned at this point then we had something that
// looked like an interpolated class or instance variable like "#@"
// but wasn't actually. In this case we'll just skip to the next
// breakpoint.
breakpoint = yp_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
break;
}
default:
assert(false && "unreachable");
}
}
// If we've hit the end of the string, then this is an unterminated
// string. In that case we'll return the EOF token.
parser->current.end = parser->end;
LEX(YP_TOKEN_EOF);
}
case YP_LEX_HEREDOC: {
// First, we'll set to start of this token.
if (parser->next_start == NULL) {
parser->current.start = parser->current.end;
} else {
parser->current.start = parser->next_start;
parser->current.end = parser->next_start;
parser->next_start = NULL;
}
// We'll check if we're at the end of the file. If we are, then we need to
// return the EOF token.
if (parser->current.end >= parser->end) {
LEX(YP_TOKEN_EOF);
}
// Now let's grab the information about the identifier off of the current
// lex mode.
const char *ident_start = parser->lex_modes.current->as.heredoc.ident_start;
size_t ident_length = parser->lex_modes.current->as.heredoc.ident_length;
// If we are immediately following a newline and we have hit the
// terminator, then we need to return the ending of the heredoc.
if (parser->current.start[-1] == '\n') {
const char *start = parser->current.start;
if (parser->lex_modes.current->as.heredoc.indent != YP_HEREDOC_INDENT_NONE) {
start += yp_strspn_inline_whitespace(start, parser->end - start);
}
if ((start + ident_length <= parser->end) && (strncmp(start, ident_start, ident_length) == 0)) {
bool matched = true;
bool at_end = false;
if ((start + ident_length < parser->end) && (start[ident_length] == '\n')) {
parser->current.end = start + ident_length + 1;
yp_newline_list_append(&parser->newline_list, start + ident_length);
} else if ((start + ident_length + 1 < parser->end) && (start[ident_length] == '\r') && (start[ident_length + 1] == '\n')) {
parser->current.end = start + ident_length + 2;
yp_newline_list_append(&parser->newline_list, start + ident_length + 1);
} else if (parser->end == (start + ident_length)) {
parser->current.end = start + ident_length;
at_end = true;
} else {
matched = false;
}
if (matched) {
if (*parser->lex_modes.current->as.heredoc.next_start == '\\') {
parser->next_start = NULL;
} else {
parser->next_start = parser->lex_modes.current->as.heredoc.next_start;
parser->heredoc_end = parser->current.end;
}
lex_mode_pop(parser);
if (!at_end) {
lex_state_set(parser, YP_LEX_STATE_END);
}
LEX(YP_TOKEN_HEREDOC_END);
}
}
}
// Otherwise we'll be parsing string content. These are the places where
// we need to split up the content of the heredoc. We'll use strpbrk to
// find the first of these characters.
char breakpoints[] = "\n\\#";
yp_heredoc_quote_t quote = parser->lex_modes.current->as.heredoc.quote;
if (quote == YP_HEREDOC_QUOTE_SINGLE) {
breakpoints[2] = '\0';
}
const char *breakpoint = yp_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
while (breakpoint != NULL) {
switch (*breakpoint) {
case '\0':
// Skip directly past the null character.
breakpoint = yp_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
break;
case '\n': {
yp_newline_list_append(&parser->newline_list, breakpoint);
if (parser->heredoc_end != NULL && (parser->heredoc_end > breakpoint)) {
parser_flush_heredoc_end(parser);
parser->current.end = breakpoint + 1;
LEX(YP_TOKEN_STRING_CONTENT);
}
const char *start = breakpoint + 1;
if (parser->lex_modes.current->as.heredoc.indent != YP_HEREDOC_INDENT_NONE) {
start += yp_strspn_inline_whitespace(start, parser->end - start);
}
// If we have hit a newline that is followed by a valid terminator,
// then we need to return the content of the heredoc here as string
// content. Then, the next time a token is lexed, it will match
// again and return the end of the heredoc.
if (
(start + ident_length <= parser->end) &&
(strncmp(start, ident_start, ident_length) == 0)
) {
// Heredoc terminators must be followed by a newline or EOF to be valid.
if (start + ident_length == parser->end || start[ident_length] == '\n') {
parser->current.end = breakpoint + 1;
LEX(YP_TOKEN_STRING_CONTENT);
}
// They can also be followed by a carriage return and then a
// newline. Be sure here that we don't accidentally read off the
// end.
if (
(start + ident_length + 1 < parser->end) &&
(start[ident_length] == '\r') &&
(start[ident_length + 1] == '\n')
) {
parser->current.end = breakpoint + 1;
LEX(YP_TOKEN_STRING_CONTENT);
}
}
// Otherwise we hit a newline and it wasn't followed by a
// terminator, so we can continue parsing.
breakpoint = yp_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
break;
}
case '\\': {
// If we hit escapes, then we need to treat the next token
// literally. In this case we'll skip past the next character and
// find the next breakpoint.
if (breakpoint[1] == '\n') {
breakpoint++;
} else {
yp_unescape_type_t unescape_type = (quote == YP_HEREDOC_QUOTE_SINGLE) ? YP_UNESCAPE_MINIMAL : YP_UNESCAPE_ALL;
size_t difference = yp_unescape_calculate_difference(breakpoint, parser->end, unescape_type, false, &parser->error_list);
if (breakpoint[difference - 1] == '\n') {
yp_newline_list_append(&parser->newline_list, breakpoint + difference - 1);
}
breakpoint = yp_strpbrk(parser, breakpoint + difference, breakpoints, parser->end - (breakpoint + difference));
}
break;
}
case '#': {
yp_token_type_t type = lex_interpolation(parser, breakpoint);
if (type != YP_TOKEN_NOT_PROVIDED) {
LEX(type);
}
// If we haven't returned at this point then we had something
// that looked like an interpolated class or instance variable
// like "#@" but wasn't actually. In this case we'll just skip
// to the next breakpoint.
breakpoint = yp_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
break;
}
default:
assert(false && "unreachable");
}
}
// If we've hit the end of the string, then this is an unterminated
// heredoc. In that case we'll return the EOF token.
parser->current.end = parser->end;
LEX(YP_TOKEN_EOF);
}
}
assert(false && "unreachable");
}
#undef LEX
/******************************************************************************/
/* Parse functions */
/******************************************************************************/
// When we are parsing certain content, we need to unescape the content to
// provide to the consumers of the parser. The following functions accept a range
// of characters from the source and unescapes into the provided type.
//
// We have functions for unescaping regular expression nodes, string nodes,
// symbol nodes, and xstring nodes
static yp_regular_expression_node_t *
yp_regular_expression_node_create_and_unescape(yp_parser_t *parser, const yp_token_t *opening, const yp_token_t *content, const yp_token_t *closing, yp_unescape_type_t unescape_type) {
yp_regular_expression_node_t *node = yp_regular_expression_node_create(parser, opening, content, closing);
ptrdiff_t length = content->end - content->start;
assert(length >= 0);
yp_unescape_manipulate_string(parser, content->start, (size_t) length, &node->unescaped, unescape_type, &parser->error_list);
return node;
}
static yp_symbol_node_t *
yp_symbol_node_create_and_unescape(yp_parser_t *parser, const yp_token_t *opening, const yp_token_t *content, const yp_token_t *closing, yp_unescape_type_t unescape_type) {
yp_symbol_node_t *node = yp_symbol_node_create(parser, opening, content, closing);
ptrdiff_t length = content->end - content->start;
assert(length >= 0);
yp_unescape_manipulate_string(parser, content->start, (size_t) length, &node->unescaped, unescape_type, &parser->error_list);
return node;
}
static yp_string_node_t *
yp_string_node_create_and_unescape(yp_parser_t *parser, const yp_token_t *opening, const yp_token_t *content, const yp_token_t *closing, yp_unescape_type_t unescape_type) {
yp_string_node_t *node = yp_string_node_create(parser, opening, content, closing);
ptrdiff_t length = content->end - content->start;
assert(length >= 0);
yp_unescape_manipulate_string(parser, content->start, (size_t) length, &node->unescaped, unescape_type, &parser->error_list);
return node;
}
static yp_x_string_node_t *
yp_xstring_node_create_and_unescape(yp_parser_t *parser, const yp_token_t *opening, const yp_token_t *content, const yp_token_t *closing) {
yp_x_string_node_t *node = yp_xstring_node_create(parser, opening, content, closing);
ptrdiff_t length = content->end - content->start;
assert(length >= 0);
yp_unescape_manipulate_string(parser, content->start, (size_t) length, &node->unescaped, YP_UNESCAPE_ALL, &parser->error_list);
return node;
}
// Returns true if the current token is of the specified type.
static inline bool
match_type_p(yp_parser_t *parser, yp_token_type_t type) {
return parser->current.type == type;
}
// Returns true if the current token is of any of the specified types.
static bool
match_any_type_p(yp_parser_t *parser, size_t count, ...) {
va_list types;
va_start(types, count);
for (size_t index = 0; index < count; index++) {
if (match_type_p(parser, va_arg(types, yp_token_type_t))) {
va_end(types);
return true;
}
}
va_end(types);
return false;
}
// These are the various precedence rules. Because we are using a Pratt parser,
// they are named binding power to represent the manner in which nodes are bound
// together in the stack.
//
// We increment by 2 because we want to leave room for the infix operators to
// specify their associativity by adding or subtracting one.
typedef enum {
YP_BINDING_POWER_UNSET = 0, // used to indicate this token cannot be used as an infix operator
YP_BINDING_POWER_STATEMENT = 2,
YP_BINDING_POWER_MODIFIER = 4, // if unless until while in
YP_BINDING_POWER_MODIFIER_RESCUE = 6, // rescue
YP_BINDING_POWER_COMPOSITION = 8, // and or
YP_BINDING_POWER_NOT = 10, // not
YP_BINDING_POWER_MATCH = 12, // =>
YP_BINDING_POWER_DEFINED = 14, // defined?
YP_BINDING_POWER_ASSIGNMENT = 16, // = += -= *= /= %= &= |= ^= &&= ||= <<= >>= **=
YP_BINDING_POWER_TERNARY = 18, // ?:
YP_BINDING_POWER_RANGE = 20, // .. ...
YP_BINDING_POWER_LOGICAL_OR = 22, // ||
YP_BINDING_POWER_LOGICAL_AND = 24, // &&
YP_BINDING_POWER_EQUALITY = 26, // <=> == === != =~ !~
YP_BINDING_POWER_COMPARISON = 28, // > >= < <=
YP_BINDING_POWER_BITWISE_OR = 30, // | ^
YP_BINDING_POWER_BITWISE_AND = 32, // &
YP_BINDING_POWER_SHIFT = 34, // << >>
YP_BINDING_POWER_TERM = 36, // + -
YP_BINDING_POWER_FACTOR = 38, // * / %
YP_BINDING_POWER_UMINUS = 40, // -@
YP_BINDING_POWER_EXPONENT = 42, // **
YP_BINDING_POWER_UNARY = 44, // ! ~ +@
YP_BINDING_POWER_INDEX = 46, // [] []=
YP_BINDING_POWER_CALL = 48, // :: .
YP_BINDING_POWER_MAX = 50
} yp_binding_power_t;
// This struct represents a set of binding powers used for a given token. They
// are combined in this way to make it easier to represent associativity.
typedef struct {
yp_binding_power_t left;
yp_binding_power_t right;
bool binary;
} yp_binding_powers_t;
#define BINDING_POWER_ASSIGNMENT { YP_BINDING_POWER_UNARY, YP_BINDING_POWER_ASSIGNMENT, true }
#define LEFT_ASSOCIATIVE(precedence) { precedence, precedence + 1, true }
#define RIGHT_ASSOCIATIVE(precedence) { precedence, precedence, true }
#define RIGHT_ASSOCIATIVE_UNARY(precedence) { precedence, precedence, false }
yp_binding_powers_t yp_binding_powers[YP_TOKEN_MAXIMUM] = {
// if unless until while in rescue
[YP_TOKEN_KEYWORD_IF_MODIFIER] = LEFT_ASSOCIATIVE(YP_BINDING_POWER_MODIFIER),
[YP_TOKEN_KEYWORD_UNLESS_MODIFIER] = LEFT_ASSOCIATIVE(YP_BINDING_POWER_MODIFIER),
[YP_TOKEN_KEYWORD_UNTIL_MODIFIER] = LEFT_ASSOCIATIVE(YP_BINDING_POWER_MODIFIER),
[YP_TOKEN_KEYWORD_WHILE_MODIFIER] = LEFT_ASSOCIATIVE(YP_BINDING_POWER_MODIFIER),
[YP_TOKEN_KEYWORD_IN] = LEFT_ASSOCIATIVE(YP_BINDING_POWER_MODIFIER),
// rescue modifier
[YP_TOKEN_KEYWORD_RESCUE_MODIFIER] = {
YP_BINDING_POWER_ASSIGNMENT,
YP_BINDING_POWER_MODIFIER_RESCUE + 1,
true
},
// and or
[YP_TOKEN_KEYWORD_AND] = LEFT_ASSOCIATIVE(YP_BINDING_POWER_COMPOSITION),
[YP_TOKEN_KEYWORD_OR] = LEFT_ASSOCIATIVE(YP_BINDING_POWER_COMPOSITION),
// =>
[YP_TOKEN_EQUAL_GREATER] = LEFT_ASSOCIATIVE(YP_BINDING_POWER_MATCH),
// &&= &= ^= = >>= <<= -= %= |= += /= *= **=
[YP_TOKEN_AMPERSAND_AMPERSAND_EQUAL] = BINDING_POWER_ASSIGNMENT,
[YP_TOKEN_AMPERSAND_EQUAL] = BINDING_POWER_ASSIGNMENT,
[YP_TOKEN_CARET_EQUAL] = BINDING_POWER_ASSIGNMENT,
[YP_TOKEN_EQUAL] = BINDING_POWER_ASSIGNMENT,
[YP_TOKEN_GREATER_GREATER_EQUAL] = BINDING_POWER_ASSIGNMENT,
[YP_TOKEN_LESS_LESS_EQUAL] = BINDING_POWER_ASSIGNMENT,
[YP_TOKEN_MINUS_EQUAL] = BINDING_POWER_ASSIGNMENT,
[YP_TOKEN_PERCENT_EQUAL] = BINDING_POWER_ASSIGNMENT,
[YP_TOKEN_PIPE_EQUAL] = BINDING_POWER_ASSIGNMENT,
[YP_TOKEN_PIPE_PIPE_EQUAL] = BINDING_POWER_ASSIGNMENT,
[YP_TOKEN_PLUS_EQUAL] = BINDING_POWER_ASSIGNMENT,
[YP_TOKEN_SLASH_EQUAL] = BINDING_POWER_ASSIGNMENT,
[YP_TOKEN_STAR_EQUAL] = BINDING_POWER_ASSIGNMENT,
[YP_TOKEN_STAR_STAR_EQUAL] = BINDING_POWER_ASSIGNMENT,
// ?:
[YP_TOKEN_QUESTION_MARK] = RIGHT_ASSOCIATIVE(YP_BINDING_POWER_TERNARY),
// .. ...
[YP_TOKEN_DOT_DOT] = LEFT_ASSOCIATIVE(YP_BINDING_POWER_RANGE),
[YP_TOKEN_DOT_DOT_DOT] = LEFT_ASSOCIATIVE(YP_BINDING_POWER_RANGE),
// ||
[YP_TOKEN_PIPE_PIPE] = LEFT_ASSOCIATIVE(YP_BINDING_POWER_LOGICAL_OR),
// &&
[YP_TOKEN_AMPERSAND_AMPERSAND] = LEFT_ASSOCIATIVE(YP_BINDING_POWER_LOGICAL_AND),
// != !~ == === =~ <=>
[YP_TOKEN_BANG_EQUAL] = RIGHT_ASSOCIATIVE(YP_BINDING_POWER_EQUALITY),
[YP_TOKEN_BANG_TILDE] = RIGHT_ASSOCIATIVE(YP_BINDING_POWER_EQUALITY),
[YP_TOKEN_EQUAL_EQUAL] = RIGHT_ASSOCIATIVE(YP_BINDING_POWER_EQUALITY),
[YP_TOKEN_EQUAL_EQUAL_EQUAL] = RIGHT_ASSOCIATIVE(YP_BINDING_POWER_EQUALITY),
[YP_TOKEN_EQUAL_TILDE] = RIGHT_ASSOCIATIVE(YP_BINDING_POWER_EQUALITY),
[YP_TOKEN_LESS_EQUAL_GREATER] = RIGHT_ASSOCIATIVE(YP_BINDING_POWER_EQUALITY),
// > >= < <=
[YP_TOKEN_GREATER] = RIGHT_ASSOCIATIVE(YP_BINDING_POWER_COMPARISON),
[YP_TOKEN_GREATER_EQUAL] = RIGHT_ASSOCIATIVE(YP_BINDING_POWER_COMPARISON),
[YP_TOKEN_LESS] = RIGHT_ASSOCIATIVE(YP_BINDING_POWER_COMPARISON),
[YP_TOKEN_LESS_EQUAL] = RIGHT_ASSOCIATIVE(YP_BINDING_POWER_COMPARISON),
// ^ |
[YP_TOKEN_CARET] = RIGHT_ASSOCIATIVE(YP_BINDING_POWER_BITWISE_OR),
[YP_TOKEN_PIPE] = RIGHT_ASSOCIATIVE(YP_BINDING_POWER_BITWISE_OR),
// &
[YP_TOKEN_AMPERSAND] = RIGHT_ASSOCIATIVE(YP_BINDING_POWER_BITWISE_AND),
// >> <<
[YP_TOKEN_GREATER_GREATER] = RIGHT_ASSOCIATIVE(YP_BINDING_POWER_SHIFT),
[YP_TOKEN_LESS_LESS] = RIGHT_ASSOCIATIVE(YP_BINDING_POWER_SHIFT),
// - +
[YP_TOKEN_MINUS] = LEFT_ASSOCIATIVE(YP_BINDING_POWER_TERM),
[YP_TOKEN_PLUS] = LEFT_ASSOCIATIVE(YP_BINDING_POWER_TERM),
// % / *
[YP_TOKEN_PERCENT] = LEFT_ASSOCIATIVE(YP_BINDING_POWER_FACTOR),
[YP_TOKEN_SLASH] = LEFT_ASSOCIATIVE(YP_BINDING_POWER_FACTOR),
[YP_TOKEN_STAR] = LEFT_ASSOCIATIVE(YP_BINDING_POWER_FACTOR),
[YP_TOKEN_USTAR] = LEFT_ASSOCIATIVE(YP_BINDING_POWER_FACTOR),
// -@
[YP_TOKEN_UMINUS] = RIGHT_ASSOCIATIVE_UNARY(YP_BINDING_POWER_UMINUS),
[YP_TOKEN_UMINUS_NUM] = RIGHT_ASSOCIATIVE_UNARY(YP_BINDING_POWER_UMINUS),
// **
[YP_TOKEN_STAR_STAR] = RIGHT_ASSOCIATIVE(YP_BINDING_POWER_EXPONENT),
[YP_TOKEN_USTAR_STAR] = RIGHT_ASSOCIATIVE_UNARY(YP_BINDING_POWER_UNARY),
// ! ~ +@
[YP_TOKEN_BANG] = RIGHT_ASSOCIATIVE_UNARY(YP_BINDING_POWER_UNARY),
[YP_TOKEN_TILDE] = RIGHT_ASSOCIATIVE_UNARY(YP_BINDING_POWER_UNARY),
[YP_TOKEN_UPLUS] = RIGHT_ASSOCIATIVE_UNARY(YP_BINDING_POWER_UNARY),
// [
[YP_TOKEN_BRACKET_LEFT] = LEFT_ASSOCIATIVE(YP_BINDING_POWER_INDEX),
// :: . &.
[YP_TOKEN_COLON_COLON] = RIGHT_ASSOCIATIVE(YP_BINDING_POWER_CALL),
[YP_TOKEN_DOT] = RIGHT_ASSOCIATIVE(YP_BINDING_POWER_CALL),
[YP_TOKEN_AMPERSAND_DOT] = RIGHT_ASSOCIATIVE(YP_BINDING_POWER_CALL)
};
#undef BINDING_POWER_ASSIGNMENT
#undef LEFT_ASSOCIATIVE
#undef RIGHT_ASSOCIATIVE
#undef RIGHT_ASSOCIATIVE_UNARY
// If the current token is of the specified type, lex forward by one token and
// return true. Otherwise, return false. For example:
//
// if (accept(parser, YP_TOKEN_COLON)) { ... }
//
static bool
accept(yp_parser_t *parser, yp_token_type_t type) {
if (match_type_p(parser, type)) {
parser_lex(parser);
return true;
}
return false;
}
// If the current token is of any of the specified types, lex forward by one
// token and return true. Otherwise, return false. For example:
//
// if (accept_any(parser, 2, YP_TOKEN_COLON, YP_TOKEN_SEMICOLON)) { ... }
//
static bool
accept_any(yp_parser_t *parser, size_t count, ...) {
va_list types;
va_start(types, count);
for (size_t index = 0; index < count; index++) {
if (match_type_p(parser, va_arg(types, yp_token_type_t))) {
parser_lex(parser);
va_end(types);
return true;
}
}
va_end(types);
return false;
}
// This function indicates that the parser expects a token in a specific
// position. For example, if you're parsing a BEGIN block, you know that a { is
// expected immediately after the keyword. In that case you would call this
// function to indicate that that token should be found.
//
// If we didn't find the token that we were expecting, then we're going to add
// an error to the parser's list of errors (to indicate that the tree is not
// valid) and create an artificial token instead. This allows us to recover from
// the fact that the token isn't present and continue parsing.
static void
expect(yp_parser_t *parser, yp_token_type_t type, const char *message) {
if (accept(parser, type)) return;
yp_diagnostic_list_append(&parser->error_list, parser->previous.end, parser->previous.end, message);
parser->previous =
(yp_token_t) { .type = YP_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
}
static void
expect_any(yp_parser_t *parser, const char*message, size_t count, ...) {
va_list types;
va_start(types, count);
for (size_t index = 0; index < count; index++) {
if (accept(parser, va_arg(types, yp_token_type_t))) {
va_end(types);
return;
}
}
va_end(types);
yp_diagnostic_list_append(&parser->error_list, parser->previous.end, parser->previous.end, message);
parser->previous =
(yp_token_t) { .type = YP_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
}
static yp_node_t *
parse_expression(yp_parser_t *parser, yp_binding_power_t binding_power, const char *message);
// This function controls whether or not we will attempt to parse an expression
// beginning at the subsequent token. It is used when we are in a context where
// an expression is optional.
//
// For example, looking at a range object when we've already lexed the operator,
// we need to know if we should attempt to parse an expression on the right.
//
// For another example, if we've parsed an identifier or a method call and we do
// not have parentheses, then the next token may be the start of an argument or
// it may not.
//
// CRuby parsers that are generated would resolve this by using a lookahead and
// potentially backtracking. We attempt to do this by just looking at the next
// token and making a decision based on that. I am not sure if this is going to
// work in all cases, it may need to be refactored later. But it appears to work
// for now.
static inline bool
token_begins_expression_p(yp_token_type_t type) {
switch (type) {
case YP_TOKEN_EQUAL_GREATER:
case YP_TOKEN_KEYWORD_IN:
// We need to special case this because it is a binary operator that
// should not be marked as beginning an expression.
return false;
case YP_TOKEN_BRACE_RIGHT:
case YP_TOKEN_BRACKET_RIGHT:
case YP_TOKEN_COLON:
case YP_TOKEN_COMMA:
case YP_TOKEN_EMBEXPR_END:
case YP_TOKEN_EOF:
case YP_TOKEN_LAMBDA_BEGIN:
case YP_TOKEN_KEYWORD_DO:
case YP_TOKEN_KEYWORD_DO_LOOP:
case YP_TOKEN_KEYWORD_END:
case YP_TOKEN_KEYWORD_ELSE:
case YP_TOKEN_KEYWORD_ELSIF:
case YP_TOKEN_KEYWORD_THEN:
case YP_TOKEN_KEYWORD_RESCUE:
case YP_TOKEN_KEYWORD_WHEN:
case YP_TOKEN_NEWLINE:
case YP_TOKEN_PARENTHESIS_RIGHT:
case YP_TOKEN_SEMICOLON:
// The reason we need this short-circuit is because we're using the
// binding powers table to tell us if the subsequent token could
// potentially be the start of an expression . If there _is_ a binding
// power for one of these tokens, then we should remove it from this list
// and let it be handled by the default case below.
assert(yp_binding_powers[type].left == YP_BINDING_POWER_UNSET);
return false;
case YP_TOKEN_UCOLON_COLON:
case YP_TOKEN_UMINUS:
case YP_TOKEN_UMINUS_NUM:
case YP_TOKEN_UPLUS:
case YP_TOKEN_BANG:
case YP_TOKEN_TILDE:
case YP_TOKEN_UDOT_DOT:
case YP_TOKEN_UDOT_DOT_DOT:
// These unary tokens actually do have binding power associated with them
// so that we can correctly place them into the precedence order. But we
// want them to be marked as beginning an expression, so we need to
// special case them here.
return true;
default:
return yp_binding_powers[type].left == YP_BINDING_POWER_UNSET;
}
}
// Parse an expression with the given binding power that may be optionally
// prefixed by the * operator.
static yp_node_t *
parse_starred_expression(yp_parser_t *parser, yp_binding_power_t binding_power, const char *message) {
if (accept(parser, YP_TOKEN_USTAR)) {
yp_token_t operator = parser->previous;
yp_node_t *expression = parse_expression(parser, binding_power, "Expected expression after `*'.");
return (yp_node_t *) yp_splat_node_create(parser, &operator, expression);
}
return parse_expression(parser, binding_power, message);
}
// Convert the given node into a valid target node.
static yp_node_t *
parse_target(yp_parser_t *parser, yp_node_t *target, yp_token_t *operator, yp_node_t *value) {
switch (target->type) {
case YP_NODE_MISSING_NODE:
return target;
case YP_NODE_CLASS_VARIABLE_READ_NODE: {
yp_class_variable_write_node_t *write_node = yp_class_variable_read_node_to_class_variable_write_node(parser, (yp_class_variable_read_node_t *) target, operator, value);
yp_node_destroy(parser, target);
return (yp_node_t *) write_node;
}
case YP_NODE_CONSTANT_PATH_NODE:
case YP_NODE_CONSTANT_READ_NODE:
return (yp_node_t *) yp_constant_path_write_node_create(parser, target, operator, value);
case YP_NODE_BACK_REFERENCE_READ_NODE:
case YP_NODE_NUMBERED_REFERENCE_READ_NODE:
yp_diagnostic_list_append(&parser->error_list, target->location.start, target->location.end, "Can't set variable");
/* fallthrough */
case YP_NODE_GLOBAL_VARIABLE_READ_NODE: {
yp_global_variable_write_node_t *result = yp_global_variable_write_node_create(parser, &target->location, operator, value);
yp_node_destroy(parser, target);
return (yp_node_t *) result;
}
case YP_NODE_LOCAL_VARIABLE_READ_NODE: {
yp_local_variable_read_node_t *local_read = (yp_local_variable_read_node_t *) target;
yp_constant_id_t constant_id = local_read->constant_id;
uint32_t depth = local_read->depth;
yp_location_t name_loc = target->location;
yp_node_destroy(parser, target);
return (yp_node_t *) yp_local_variable_write_node_create(parser, constant_id, depth, value, &name_loc, operator);
}
case YP_NODE_INSTANCE_VARIABLE_READ_NODE: {
yp_node_t *write_node = (yp_node_t *) yp_instance_variable_write_node_create(parser, (yp_instance_variable_read_node_t *) target, operator, value);
yp_node_destroy(parser, target);
return write_node;
}
case YP_NODE_MULTI_WRITE_NODE: {
yp_multi_write_node_t *multi_write = (yp_multi_write_node_t *) target;
yp_multi_write_node_operator_loc_set(multi_write, operator);
if (value != NULL) {
multi_write->value = value;
multi_write->base.location.end = value->location.end;
}
return (yp_node_t *) multi_write;
}
case YP_NODE_SPLAT_NODE: {
yp_splat_node_t *splat = (yp_splat_node_t *) target;
if (splat->expression != NULL) {
splat->expression = parse_target(parser, splat->expression, operator, value);
}
yp_location_t location = { .start = NULL, .end = NULL };
yp_multi_write_node_t *multi_write = yp_multi_write_node_create(parser, operator, value, &location, &location);
yp_multi_write_node_targets_append(multi_write, (yp_node_t *) splat);
return (yp_node_t *) multi_write;
}
case YP_NODE_CALL_NODE: {
yp_call_node_t *call = (yp_call_node_t *) target;
// If we have no arguments to the call node and we need this to be a
// target then this is either a method call or a local variable write.
if (
(call->opening_loc.start == NULL) &&
(call->arguments == NULL) &&
(call->block == NULL)
) {
if (call->receiver == NULL) {
// When we get here, we have a local variable write, because it
// was previously marked as a method call but now we have an =.
// This looks like:
//
// foo = 1
//
// When it was parsed in the prefix position, foo was seen as a
// method call with no receiver and no arguments. Now we have an
// =, so we know it's a local variable write.
const yp_location_t message = call->message_loc;
yp_parser_local_add_location(parser, message.start, message.end);
yp_node_destroy(parser, target);
yp_constant_id_t constant_id = yp_parser_constant_id_location(parser, message.start, message.end);
target = (yp_node_t *) yp_local_variable_write_node_create(parser, constant_id, 0, value, &message, operator);
if (token_is_numbered_parameter(message.start, message.end)) {
yp_diagnostic_list_append(&parser->error_list, message.start, message.end, "reserved for numbered parameter");
}
return target;
}
// When we get here, we have a method call, because it was
// previously marked as a method call but now we have an =. This
// looks like:
//
// foo.bar = 1
//
// When it was parsed in the prefix position, foo.bar was seen as a
// method call with no arguments. Now we have an =, so we know it's
// a method call with an argument. In this case we will create the
// arguments node, parse the argument, and add it to the list.
if (value) {
yp_arguments_node_t *arguments = yp_arguments_node_create(parser);
call->arguments = arguments;
yp_arguments_node_arguments_append(arguments, value);
target->location.end = arguments->base.location.end;
}
// The method name needs to change. If we previously had foo, we now
// need foo=. In this case we'll allocate a new owned string, copy
// the previous method name in, and append an =.
size_t length = yp_string_length(&call->name);
char *name = calloc(length + 2, sizeof(char));
if (name == NULL) return NULL;
yp_snprintf(name, length + 2, "%.*s=", (int) length, yp_string_source(&call->name));
// Now switch the name to the new string.
yp_string_free(&call->name);
yp_string_owned_init(&call->name, name, length + 1);
return target;
}
// If there is no call operator and the message is "[]" then this is
// an aref expression, and we can transform it into an aset
// expression.
if (
(call->operator_loc.start == NULL) &&
(call->message_loc.start[0] == '[') &&
(call->message_loc.end[-1] == ']') &&
(call->block == NULL)
) {
if (value != NULL) {
if (call->arguments == NULL) {
call->arguments = yp_arguments_node_create(parser);
}
yp_arguments_node_arguments_append(call->arguments, value);
target->location.end = value->location.end;
}
// Free the previous name and replace it with "[]=".
yp_string_free(&call->name);
yp_string_constant_init(&call->name, "[]=", 3);
return target;
}
// If there are arguments on the call node, then it can't be a method
// call ending with = or a local variable write, so it must be a
// syntax error. In this case we'll fall through to our default
// handling. We need to free the value that we parsed because there
// is no way for us to attach it to the tree at this point.
if (value != NULL) {
yp_node_destroy(parser, value);
}
}
/* fallthrough */
default:
// In this case we have a node that we don't know how to convert into a
// target. We need to treat it as an error. For now, we'll mark it as an
// error and just skip right past it.
yp_diagnostic_list_append(&parser->error_list, operator->start, operator->end, "Unexpected `='.");
return target;
}
}
// Parse a list of targets for assignment. This is used in the case of a for
// loop or a multi-assignment. For example, in the following code:
//
// for foo, bar in baz
// ^^^^^^^^
//
// The targets are `foo` and `bar`. This function will either return a single
// target node or a multi-target node.
static yp_node_t *
parse_targets(yp_parser_t *parser, yp_node_t *first_target, yp_binding_power_t binding_power) {
yp_token_t operator = not_provided(parser);
// The first_target parameter can be NULL in the case that we're parsing a
// location that we know requires a multi write, as in the case of a for loop.
// In this case we will set up the parsing loop slightly differently.
if (first_target != NULL) {
first_target = parse_target(parser, first_target, &operator, NULL);
if (!match_type_p(parser, YP_TOKEN_COMMA)) {
return first_target;
}
}
yp_location_t lparen_loc = { .start = NULL, .end = NULL };
yp_multi_write_node_t *result = yp_multi_write_node_create(parser, &operator, NULL, &lparen_loc, &lparen_loc);
if (first_target != NULL) {
yp_multi_write_node_targets_append(result, first_target);
}
bool has_splat = false;
if (first_target == NULL || accept(parser, YP_TOKEN_COMMA)) {
do {
if (accept(parser, YP_TOKEN_USTAR)) {
// Here we have a splat operator. It can have a name or be anonymous. It
// can be the final target or be in the middle if there haven't been any
// others yet.
if (has_splat) {
yp_diagnostic_list_append(&parser->error_list, parser->previous.start, parser->previous.end, "Multiple splats in multi-assignment.");
}
yp_token_t star_operator = parser->previous;
yp_node_t *name = NULL;
if (token_begins_expression_p(parser->current.type)) {
yp_token_t operator = not_provided(parser);
name = parse_expression(parser, binding_power, "Expected an expression after '*'.");
name = parse_target(parser, name, &operator, NULL);
}
yp_node_t *splat = (yp_node_t *) yp_splat_node_create(parser, &star_operator, name);
yp_multi_write_node_targets_append(result, splat);
has_splat = true;
} else if (accept(parser, YP_TOKEN_PARENTHESIS_LEFT)) {
// Here we have a parenthesized list of targets. We'll recurse down into
// the parentheses by calling parse_targets again and then finish out
// the node when it returns.
yp_token_t lparen = parser->previous;
yp_node_t *first_child_target = parse_expression(parser, YP_BINDING_POWER_STATEMENT, "Expected an expression after '('.");
yp_node_t *child_target = parse_targets(parser, first_child_target, YP_BINDING_POWER_STATEMENT);
expect(parser, YP_TOKEN_PARENTHESIS_RIGHT, "Expected an ')' after multi-assignment.");
yp_token_t rparen = parser->previous;
if (child_target->type == YP_NODE_MULTI_WRITE_NODE && first_target == NULL && result->targets.size == 0) {
yp_node_destroy(parser, (yp_node_t *) result);
result = (yp_multi_write_node_t *) child_target;
result->base.location.start = lparen.start;
result->base.location.end = rparen.end;
result->lparen_loc = (yp_location_t) { .start = lparen.start, .end = lparen.end };
result->rparen_loc = (yp_location_t) { .start = rparen.start, .end = rparen.end };
} else {
yp_multi_write_node_t *target;
if (child_target->type == YP_NODE_MULTI_WRITE_NODE) {
target = (yp_multi_write_node_t *) child_target;
target->lparen_loc = (yp_location_t) { .start = lparen.start, .end = lparen.end };
target->rparen_loc = (yp_location_t) { .start = rparen.start, .end = rparen.end };
} else {
yp_token_t operator = not_provided(parser);
target = yp_multi_write_node_create(
parser,
&operator,
NULL,
&(yp_location_t) { .start = lparen.start, .end = lparen.end },
&(yp_location_t) { .start = rparen.start, .end = rparen.end }
);
yp_multi_write_node_targets_append(target, child_target);
}
target->base.location.end = rparen.end;
yp_multi_write_node_targets_append(result, (yp_node_t *) target);
}
} else {
if (!token_begins_expression_p(parser->current.type) && !match_type_p(parser, YP_TOKEN_USTAR)) {
if (first_target == NULL && result->targets.size == 0) {
// If we get here, then we weren't able to parse anything at all, so
// we need to return a missing node.
yp_node_destroy(parser, (yp_node_t *) result);
yp_diagnostic_list_append(&parser->error_list, operator.start, operator.end, "Expected index after for.");
return (yp_node_t *) yp_missing_node_create(parser, operator.start, operator.end);
}
// If we get here, then we have a trailing , in a multi write node.
// We need to indicate this somehow in the tree, so we'll add an
// anonymous splat.
yp_node_t *splat = (yp_node_t *) yp_splat_node_create(parser, &parser->previous, NULL);
yp_multi_write_node_targets_append(result, splat);
return (yp_node_t *) result;
}
yp_node_t *target = parse_expression(parser, binding_power, "Expected another expression after ','.");
target = parse_target(parser, target, &operator, NULL);
yp_multi_write_node_targets_append(result, target);
}
} while (accept(parser, YP_TOKEN_COMMA));
}
return (yp_node_t *) result;
}
// Parse a list of statements separated by newlines or semicolons.
static yp_statements_node_t *
parse_statements(yp_parser_t *parser, yp_context_t context) {
// First, skip past any optional terminators that might be at the beginning of
// the statements.
while (accept_any(parser, 2, YP_TOKEN_SEMICOLON, YP_TOKEN_NEWLINE));
// If we have a terminator, then we can just return NULL.
if (context_terminator(context, &parser->current)) return NULL;
yp_statements_node_t *statements = yp_statements_node_create(parser);
// At this point we know we have at least one statement, and that it
// immediately follows the current token.
context_push(parser, context);
while (true) {
yp_node_t *node = parse_expression(parser, YP_BINDING_POWER_STATEMENT, "Expected to be able to parse an expression.");
yp_statements_node_body_append(statements, node);
// If we're recovering from a syntax error, then we need to stop parsing the
// statements now.
if (parser->recovering) {
// If this is the level of context where the recovery has happened, then
// we can mark the parser as done recovering.
if (context_terminator(context, &parser->current)) parser->recovering = false;
break;
}
// If we have a terminator, then we will parse all consequtive terminators
// and then continue parsing the statements list.
if (accept_any(parser, 2, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON)) {
// If we have a terminator, then we will continue parsing the statements
// list.
while (accept_any(parser, 2, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON));
if (context_terminator(context, &parser->current)) break;
// Now we can continue parsing the list of statements.
continue;
}
// At this point we have a list of statements that are not terminated by a
// newline or semicolon. At this point we need to check if we're at the end
// of the statements list. If we are, then we should break out of the loop.
if (context_terminator(context, &parser->current)) break;
// At this point, we have a syntax error, because the statement was not
// terminated by a newline or semicolon, and we're not at the end of the
// statements list. Ideally we should scan forward to determine if we should
// insert a missing terminator or break out of parsing the statements list
// at this point.
//
// We don't have that yet, so instead we'll do a more naive approach. If we
// were unable to parse an expression, then we will skip past this token and
// continue parsing the statements list. Otherwise we'll add an error and
// continue parsing the statements list.
if (node->type == YP_NODE_MISSING_NODE) {
parser_lex(parser);
while (accept_any(parser, 2, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON));
if (context_terminator(context, &parser->current)) break;
} else {
expect(parser, YP_TOKEN_NEWLINE, "Expected a newline or semicolon after statement.");
}
}
context_pop(parser);
return statements;
}
// Parse all of the elements of a hash.
static void
parse_assocs(yp_parser_t *parser, yp_node_t *node) {
assert((node->type == YP_NODE_HASH_NODE) || (node->type == YP_NODE_KEYWORD_HASH_NODE));
while (true) {
yp_node_t *element;
switch (parser->current.type) {
case YP_TOKEN_USTAR_STAR: {
parser_lex(parser);
yp_token_t operator = parser->previous;
yp_node_t *value = NULL;
if (token_begins_expression_p(parser->current.type)) {
value = parse_expression(parser, YP_BINDING_POWER_DEFINED, "Expected an expression after ** in hash.");
} else if (yp_parser_local_depth(parser, &operator) == -1) {
yp_diagnostic_list_append(&parser->error_list, operator.start, operator.end, "Expected an expression after ** in hash.");
}
element = (yp_node_t *) yp_assoc_splat_node_create(parser, value, &operator);
break;
}
case YP_TOKEN_LABEL: {
parser_lex(parser);
yp_node_t *key = (yp_node_t *) yp_symbol_node_label_create(parser, &parser->previous);
yp_token_t operator = not_provided(parser);
yp_node_t *value = NULL;
if (token_begins_expression_p(parser->current.type)) {
value = parse_expression(parser, YP_BINDING_POWER_DEFINED, "Expected an expression after the label in hash.");
}
element = (yp_node_t *) yp_assoc_node_create(parser, key, &operator, value);
break;
}
default: {
yp_node_t *key = parse_expression(parser, YP_BINDING_POWER_DEFINED, "Expected a key in the hash literal.");
yp_token_t operator;
if (yp_symbol_node_label_p(key)) {
operator = not_provided(parser);
} else {
expect(parser, YP_TOKEN_EQUAL_GREATER, "Expected a => between the key and the value in the hash.");
operator = parser->previous;
}
yp_node_t *value = parse_expression(parser, YP_BINDING_POWER_DEFINED, "Expected a value in the hash literal.");
element = (yp_node_t *) yp_assoc_node_create(parser, key, &operator, value);
break;
}
}
if (node->type == YP_NODE_HASH_NODE) {
yp_hash_node_elements_append((yp_hash_node_t *) node, element);
} else {
yp_keyword_hash_node_elements_append((yp_keyword_hash_node_t *) node, element);
}
// If there's no comma after the element, then we're done.
if (!accept(parser, YP_TOKEN_COMMA)) return;
// If the next element starts with a label or a **, then we know we have
// another element in the hash, so we'll continue parsing.
if (match_any_type_p(parser, 2, YP_TOKEN_USTAR_STAR, YP_TOKEN_LABEL)) continue;
// Otherwise we need to check if the subsequent token begins an expression.
// If it does, then we'll continue parsing.
if (token_begins_expression_p(parser->current.type)) continue;
// Otherwise by default we will exit out of this loop.
return;
}
}
// Parse a list of arguments.
static void
parse_arguments(yp_parser_t *parser, yp_arguments_node_t *arguments, bool accepts_forwarding, yp_token_type_t terminator) {
yp_binding_power_t binding_power = yp_binding_powers[parser->current.type].left;
// First we need to check if the next token is one that could be the start of
// an argument. If it's not, then we can just return.
if (
match_any_type_p(parser, 2, terminator, YP_TOKEN_EOF) ||
(binding_power != YP_BINDING_POWER_UNSET && binding_power < YP_BINDING_POWER_RANGE) ||
context_terminator(parser->current_context->context, &parser->current)
) {
return;
}
bool parsed_bare_hash = false;
bool parsed_block_argument = false;
while (!match_type_p(parser, YP_TOKEN_EOF)) {
if (parsed_block_argument) {
yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, "Unexpected argument after block argument.");
}
yp_node_t *argument = NULL;
switch (parser->current.type) {
case YP_TOKEN_USTAR_STAR:
case YP_TOKEN_LABEL: {
if (parsed_bare_hash) {
yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, "Unexpected bare hash.");
}
yp_keyword_hash_node_t *hash = yp_keyword_hash_node_create(parser);
argument = (yp_node_t *)hash;
if (!match_any_type_p(parser, 7, terminator, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON, YP_TOKEN_EOF, YP_TOKEN_BRACE_RIGHT, YP_TOKEN_KEYWORD_DO, YP_TOKEN_PARENTHESIS_RIGHT)) {
parse_assocs(parser, (yp_node_t *) hash);
}
parsed_bare_hash = true;
break;
}
case YP_TOKEN_AMPERSAND: {
parser_lex(parser);
yp_token_t operator = parser->previous;
yp_node_t *expression = NULL;
if (token_begins_expression_p(parser->current.type)) {
expression = parse_expression(parser, YP_BINDING_POWER_DEFINED, "Expected to be able to parse an argument.");
} else if (yp_parser_local_depth(parser, &operator) == -1) {
yp_diagnostic_list_append(&parser->error_list, operator.start, operator.end, "unexpected & when parent method is not forwarding.");
}
argument = (yp_node_t *)yp_block_argument_node_create(parser, &operator, expression);
parsed_block_argument = true;
break;
}
case YP_TOKEN_USTAR: {
parser_lex(parser);
yp_token_t operator = parser->previous;
if (match_any_type_p(parser, 2, YP_TOKEN_PARENTHESIS_RIGHT, YP_TOKEN_COMMA)) {
if (yp_parser_local_depth(parser, &parser->previous) == -1) {
yp_diagnostic_list_append(&parser->error_list, operator.start, operator.end, "unexpected * when parent method is not forwarding.");
}
argument = (yp_node_t *) yp_splat_node_create(parser, &operator, NULL);
} else {
yp_node_t *expression = parse_expression(parser, YP_BINDING_POWER_DEFINED, "Expected an expression after '*' in argument.");
if (parsed_bare_hash) {
yp_diagnostic_list_append(&parser->error_list, operator.start, expression->location.end, "Unexpected splat argument after double splat.");
}
argument = (yp_node_t *) yp_splat_node_create(parser, &operator, expression);
}
break;
}
case YP_TOKEN_UDOT_DOT_DOT: {
if (accepts_forwarding) {
parser_lex(parser);
if (token_begins_expression_p(parser->current.type)) {
// If the token begins an expression then this ... was not actually
// argument forwarding but was instead a range.
yp_token_t operator = parser->previous;
yp_node_t *right = parse_expression(parser, YP_BINDING_POWER_RANGE, "Expected a value after the operator.");
argument = (yp_node_t *) yp_range_node_create(parser, NULL, &operator, right);
} else {
if (yp_parser_local_depth(parser, &parser->previous) == -1) {
yp_diagnostic_list_append(&parser->error_list, parser->previous.start, parser->previous.end, "unexpected ... when parent method is not forwarding.");
}
argument = (yp_node_t *)yp_forwarding_arguments_node_create(parser, &parser->previous);
break;
}
}
}
/* fallthrough */
default: {
if (argument == NULL) {
argument = parse_expression(parser, YP_BINDING_POWER_DEFINED, "Expected to be able to parse an argument.");
}
if (yp_symbol_node_label_p(argument) || accept(parser, YP_TOKEN_EQUAL_GREATER)) {
if (parsed_bare_hash) {
yp_diagnostic_list_append(&parser->error_list, parser->previous.start, parser->previous.end, "Unexpected bare hash argument.");
}
yp_token_t operator;
if (parser->previous.type == YP_TOKEN_EQUAL_GREATER) {
operator = parser->previous;
} else {
operator = not_provided(parser);
}
yp_keyword_hash_node_t *bare_hash = yp_keyword_hash_node_create(parser);
// Finish parsing the one we are part way through
yp_node_t *value = parse_expression(parser, YP_BINDING_POWER_DEFINED, "Expected a value in the hash literal.");
argument = (yp_node_t *) yp_assoc_node_create(parser, argument, &operator, value);
yp_keyword_hash_node_elements_append(bare_hash, argument);
argument = (yp_node_t *) bare_hash;
// Then parse more if we have a comma
if (accept(parser, YP_TOKEN_COMMA) && (
token_begins_expression_p(parser->current.type) ||
match_any_type_p(parser, 2, YP_TOKEN_USTAR_STAR, YP_TOKEN_LABEL)
)) {
parse_assocs(parser, (yp_node_t *) bare_hash);
}
parsed_bare_hash = true;
}
break;
}
}
yp_arguments_node_arguments_append(arguments, argument);
// If parsing the argument failed, we need to stop parsing arguments.
if (argument->type == YP_NODE_MISSING_NODE || parser->recovering) break;
// If the terminator of these arguments is not EOF, then we have a specific
// token we're looking for. In that case we can accept a newline here
// because it is not functioning as a statement terminator.
if (terminator != YP_TOKEN_EOF) accept(parser, YP_TOKEN_NEWLINE);
if (parser->previous.type == YP_TOKEN_COMMA && parsed_bare_hash) {
// If we previously were on a comma and we just parsed a bare hash, then
// we want to continue parsing arguments. This is because the comma was
// grabbed up by the hash parser.
} else {
// If there is no comma at the end of the argument list then we're done
// parsing arguments and can break out of this loop.
if (!accept(parser, YP_TOKEN_COMMA)) break;
}
// If we hit the terminator, then that means we have a trailing comma so we
// can accept that output as well.
if (match_type_p(parser, terminator)) break;
}
}
// Required parameters on method, block, and lambda declarations can be
// destructured using parentheses. This looks like:
//
// def foo((bar, baz))
// end
//
// It can recurse infinitely down, and splats are allowed to group arguments.
static yp_required_destructured_parameter_node_t *
parse_required_destructured_parameter(yp_parser_t *parser) {
expect(parser, YP_TOKEN_PARENTHESIS_LEFT, "Expected '(' to start a required parameter.");
yp_token_t opening = parser->previous;
yp_required_destructured_parameter_node_t *node = yp_required_destructured_parameter_node_create(parser, &opening);
bool parsed_splat = false;
do {
yp_node_t *param;
if (node->parameters.size > 0 && match_type_p(parser, YP_TOKEN_PARENTHESIS_RIGHT)) {
if (parsed_splat) {
yp_diagnostic_list_append(&parser->error_list, parser->previous.start, parser->previous.end, "Unexpected splat after splat.");
}
param = (yp_node_t *) yp_splat_node_create(parser, &parser->previous, NULL);
yp_required_destructured_parameter_node_append_parameter(node, param);
break;
}
if (match_type_p(parser, YP_TOKEN_PARENTHESIS_LEFT)) {
param = (yp_node_t *) parse_required_destructured_parameter(parser);
} else if (accept(parser, YP_TOKEN_USTAR)) {
if (parsed_splat) {
yp_diagnostic_list_append(&parser->error_list, parser->previous.start, parser->previous.end, "Unexpected splat after splat.");
}
yp_token_t star = parser->previous;
yp_node_t *value = NULL;
if (accept(parser, YP_TOKEN_IDENTIFIER)) {
yp_token_t name = parser->previous;
value = (yp_node_t *) yp_required_parameter_node_create(parser, &name);
yp_parser_local_add_token(parser, &name);
}
param = (yp_node_t *) yp_splat_node_create(parser, &star, value);
parsed_splat = true;
} else {
expect(parser, YP_TOKEN_IDENTIFIER, "Expected an identifier for a required parameter.");
yp_token_t name = parser->previous;
param = (yp_node_t *) yp_required_parameter_node_create(parser, &name);
yp_parser_local_add_token(parser, &name);
}
yp_required_destructured_parameter_node_append_parameter(node, param);
} while (accept(parser, YP_TOKEN_COMMA));
expect(parser, YP_TOKEN_PARENTHESIS_RIGHT, "Expected ')' to end a required parameter.");
yp_required_destructured_parameter_node_closing_set(node, &parser->previous);
return node;
}
// This represents the different order states we can be in when parsing
// method parameters.
typedef enum {
YP_PARAMETERS_NO_CHANGE = 0, // Extra state for tokens that should not change the state
YP_PARAMETERS_ORDER_NOTHING_AFTER = 1,
YP_PARAMETERS_ORDER_KEYWORDS_REST,
YP_PARAMETERS_ORDER_KEYWORDS,
YP_PARAMETERS_ORDER_REST,
YP_PARAMETERS_ORDER_AFTER_OPTIONAL,
YP_PARAMETERS_ORDER_OPTIONAL,
YP_PARAMETERS_ORDER_NAMED,
YP_PARAMETERS_ORDER_NONE,
} yp_parameters_order_t;
// This matches parameters tokens with parameters state.
static yp_parameters_order_t parameters_ordering[YP_TOKEN_MAXIMUM] = {
[0] = YP_PARAMETERS_NO_CHANGE,
[YP_TOKEN_AMPERSAND] = YP_PARAMETERS_ORDER_NOTHING_AFTER,
[YP_TOKEN_UDOT_DOT_DOT] = YP_PARAMETERS_ORDER_NOTHING_AFTER,
[YP_TOKEN_IDENTIFIER] = YP_PARAMETERS_ORDER_NAMED,
[YP_TOKEN_PARENTHESIS_LEFT] = YP_PARAMETERS_ORDER_NAMED,
[YP_TOKEN_EQUAL] = YP_PARAMETERS_ORDER_OPTIONAL,
[YP_TOKEN_LABEL] = YP_PARAMETERS_ORDER_KEYWORDS,
[YP_TOKEN_USTAR] = YP_PARAMETERS_ORDER_AFTER_OPTIONAL,
[YP_TOKEN_STAR] = YP_PARAMETERS_ORDER_AFTER_OPTIONAL,
[YP_TOKEN_USTAR_STAR] = YP_PARAMETERS_ORDER_KEYWORDS_REST,
[YP_TOKEN_STAR_STAR] = YP_PARAMETERS_ORDER_KEYWORDS_REST
};
// Check if current parameter follows valid parameters ordering. If not it adds an
// error to the list without stopping the parsing, otherwise sets the parameters state
// to the one corresponding to the current parameter.
static void
update_parameter_state(yp_parser_t *parser, yp_token_t *token, yp_parameters_order_t *current) {
yp_parameters_order_t state = parameters_ordering[token->type];
if (state == YP_PARAMETERS_NO_CHANGE) return;
// If we see another ordered argument after a optional argument
// we only continue parsing ordered arguments until we stop seeing ordered arguments
if (*current == YP_PARAMETERS_ORDER_OPTIONAL && state == YP_PARAMETERS_ORDER_NAMED) {
*current = YP_PARAMETERS_ORDER_AFTER_OPTIONAL;
return;
} else if (*current == YP_PARAMETERS_ORDER_AFTER_OPTIONAL && state == YP_PARAMETERS_ORDER_NAMED) {
return;
}
if (*current == YP_PARAMETERS_ORDER_NOTHING_AFTER || state > *current) {
// We know what transition we failed on, so we can provide a better error here.
yp_diagnostic_list_append(&parser->error_list, token->start, token->end, "Unexpected parameter order");
} else if (state < *current) {
*current = state;
}
}
// Parse a list of parameters on a method definition.
static yp_parameters_node_t *
parse_parameters(
yp_parser_t *parser,
yp_binding_power_t binding_power,
bool uses_parentheses,
bool allows_trailing_comma,
bool allows_forwarding_parameter
) {
yp_parameters_node_t *params = yp_parameters_node_create(parser);
bool looping = true;
yp_do_loop_stack_push(parser, false);
yp_parameters_order_t order = YP_PARAMETERS_ORDER_NONE;
do {
switch (parser->current.type) {
case YP_TOKEN_PARENTHESIS_LEFT: {
update_parameter_state(parser, &parser->current, &order);
yp_node_t *param = (yp_node_t *) parse_required_destructured_parameter(parser);
if (order > YP_PARAMETERS_ORDER_AFTER_OPTIONAL) {
yp_parameters_node_requireds_append(params, param);
} else {
yp_parameters_node_posts_append(params, param);
}
break;
}
case YP_TOKEN_AMPERSAND: {
update_parameter_state(parser, &parser->current, &order);
parser_lex(parser);
yp_token_t operator = parser->previous;
yp_token_t name;
if (accept(parser, YP_TOKEN_IDENTIFIER)) {
name = parser->previous;
yp_parser_parameter_name_check(parser, &name);
yp_parser_local_add_token(parser, &name);
} else {
name = not_provided(parser);
yp_parser_local_add_token(parser, &operator);
}
yp_block_parameter_node_t *param = yp_block_parameter_node_create(parser, &name, &operator);
yp_parameters_node_block_set(params, param);
break;
}
case YP_TOKEN_UDOT_DOT_DOT: {
if (!allows_forwarding_parameter) {
yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, "Unexpected ...");
}
if (order > YP_PARAMETERS_ORDER_NOTHING_AFTER) {
update_parameter_state(parser, &parser->current, &order);
parser_lex(parser);
yp_parser_local_add_token(parser, &parser->previous);
yp_forwarding_parameter_node_t *param = yp_forwarding_parameter_node_create(parser, &parser->previous);
yp_parameters_node_keyword_rest_set(params, (yp_node_t *)param);
} else {
update_parameter_state(parser, &parser->current, &order);
parser_lex(parser);
}
break;
}
case YP_TOKEN_CLASS_VARIABLE:
case YP_TOKEN_IDENTIFIER:
case YP_TOKEN_CONSTANT:
case YP_TOKEN_INSTANCE_VARIABLE:
case YP_TOKEN_GLOBAL_VARIABLE: {
parser_lex(parser);
switch (parser->previous.type) {
case YP_TOKEN_CONSTANT:
yp_diagnostic_list_append(&parser->error_list, parser->previous.start, parser->previous.end, "Formal argument cannot be a constant");
break;
case YP_TOKEN_INSTANCE_VARIABLE:
yp_diagnostic_list_append(&parser->error_list, parser->previous.start, parser->previous.end, "Formal argument cannot be an instance variable");
break;
case YP_TOKEN_GLOBAL_VARIABLE:
yp_diagnostic_list_append(&parser->error_list, parser->previous.start, parser->previous.end, "Formal argument cannot be a global variable");
break;
case YP_TOKEN_CLASS_VARIABLE:
yp_diagnostic_list_append(&parser->error_list, parser->previous.start, parser->previous.end, "Formal argument cannot be a class variable");
break;
default: break;
}
if (parser->current.type == YP_TOKEN_EQUAL) {
update_parameter_state(parser, &parser->current, &order);
} else {
update_parameter_state(parser, &parser->previous, &order);
}
yp_token_t name = parser->previous;
yp_parser_parameter_name_check(parser, &name);
yp_parser_local_add_token(parser, &name);
if (accept(parser, YP_TOKEN_EQUAL)) {
yp_token_t operator = parser->previous;
context_push(parser, YP_CONTEXT_DEFAULT_PARAMS);
yp_node_t *value = parse_expression(parser, binding_power, "Expected to find a default value for the parameter.");
yp_optional_parameter_node_t *param = yp_optional_parameter_node_create(parser, &name, &operator, value);
yp_parameters_node_optionals_append(params, param);
context_pop(parser);
// If parsing the value of the parameter resulted in error recovery,
// then we can put a missing node in its place and stop parsing the
// parameters entirely now.
if (parser->recovering) {
looping = false;
break;
}
} else if (order > YP_PARAMETERS_ORDER_AFTER_OPTIONAL) {
yp_required_parameter_node_t *param = yp_required_parameter_node_create(parser, &name);
yp_parameters_node_requireds_append(params, (yp_node_t *) param);
} else {
yp_required_parameter_node_t *param = yp_required_parameter_node_create(parser, &name);
yp_parameters_node_posts_append(params, (yp_node_t *) param);
}
break;
}
case YP_TOKEN_LABEL: {
if (!uses_parentheses) parser->in_keyword_arg = true;
update_parameter_state(parser, &parser->current, &order);
parser_lex(parser);
yp_token_t name = parser->previous;
yp_token_t local = name;
local.end -= 1;
yp_parser_parameter_name_check(parser, &local);
yp_parser_local_add_token(parser, &local);
switch (parser->current.type) {
case YP_TOKEN_COMMA:
case YP_TOKEN_PARENTHESIS_RIGHT:
case YP_TOKEN_PIPE: {
yp_node_t *param = (yp_node_t *) yp_keyword_parameter_node_create(parser, &name, NULL);
yp_parameters_node_keywords_append(params, param);
break;
}
case YP_TOKEN_SEMICOLON:
case YP_TOKEN_NEWLINE: {
if (uses_parentheses) {
looping = false;
break;
}
yp_node_t *param = (yp_node_t *) yp_keyword_parameter_node_create(parser, &name, NULL);
yp_parameters_node_keywords_append(params, param);
break;
}
default: {
yp_node_t *value = NULL;
if (token_begins_expression_p(parser->current.type)) {
context_push(parser, YP_CONTEXT_DEFAULT_PARAMS);
value = parse_expression(parser, binding_power, "Expected to find a default value for the keyword parameter.");
context_pop(parser);
}
yp_node_t *param = (yp_node_t *) yp_keyword_parameter_node_create(parser, &name, value);
yp_parameters_node_keywords_append(params, param);
// If parsing the value of the parameter resulted in error recovery,
// then we can put a missing node in its place and stop parsing the
// parameters entirely now.
if (parser->recovering) {
looping = false;
break;
}
}
}
parser->in_keyword_arg = false;
break;
}
case YP_TOKEN_USTAR:
case YP_TOKEN_STAR: {
update_parameter_state(parser, &parser->current, &order);
parser_lex(parser);
yp_token_t operator = parser->previous;
yp_token_t name;
if (accept(parser, YP_TOKEN_IDENTIFIER)) {
name = parser->previous;
yp_parser_parameter_name_check(parser, &name);
yp_parser_local_add_token(parser, &name);
} else {
name = not_provided(parser);
yp_parser_local_add_token(parser, &operator);
}
yp_rest_parameter_node_t *param = yp_rest_parameter_node_create(parser, &operator, &name);
yp_parameters_node_rest_set(params, param);
break;
}
case YP_TOKEN_STAR_STAR:
case YP_TOKEN_USTAR_STAR: {
update_parameter_state(parser, &parser->current, &order);
parser_lex(parser);
yp_token_t operator = parser->previous;
yp_node_t *param;
if (accept(parser, YP_TOKEN_KEYWORD_NIL)) {
param = (yp_node_t *) yp_no_keywords_parameter_node_create(parser, &operator, &parser->previous);
} else {
yp_token_t name;
if (accept(parser, YP_TOKEN_IDENTIFIER)) {
name = parser->previous;
yp_parser_parameter_name_check(parser, &name);
yp_parser_local_add_token(parser, &name);
} else {
name = not_provided(parser);
yp_parser_local_add_token(parser, &operator);
}
param = (yp_node_t *) yp_keyword_rest_parameter_node_create(parser, &operator, &name);
}
yp_parameters_node_keyword_rest_set(params, param);
break;
}
default:
if (parser->previous.type == YP_TOKEN_COMMA) {
if (allows_trailing_comma) {
// If we get here, then we have a trailing comma in a block
// parameter list. We need to create an anonymous rest parameter to
// represent it.
yp_token_t name = not_provided(parser);
yp_rest_parameter_node_t *param = yp_rest_parameter_node_create(parser, &parser->previous, &name);
yp_parameters_node_rest_set(params, param);
} else {
yp_diagnostic_list_append(&parser->error_list, parser->previous.start, parser->previous.end, "Unexpected ','.");
}
}
looping = false;
break;
}
if (looping && uses_parentheses) {
accept(parser, YP_TOKEN_NEWLINE);
}
} while (looping && accept(parser, YP_TOKEN_COMMA));
yp_do_loop_stack_pop(parser);
// If we don't have any parameters, return `NULL` instead of an empty `ParametersNode`.
if (params->base.location.start == params->base.location.end) {
yp_node_destroy(parser, (yp_node_t *) params);
return NULL;
}
return params;
}
// Parse any number of rescue clauses. This will form a linked list of if
// nodes pointing to each other from the top.
static inline void
parse_rescues(yp_parser_t *parser, yp_begin_node_t *parent_node) {
yp_rescue_node_t *current = NULL;
while (accept(parser, YP_TOKEN_KEYWORD_RESCUE)) {
yp_rescue_node_t *rescue = yp_rescue_node_create(parser, &parser->previous);
switch (parser->current.type) {
case YP_TOKEN_EQUAL_GREATER: {
// Here we have an immediate => after the rescue keyword, in which case
// we're going to have an empty list of exceptions to rescue (which
// implies StandardError).
parser_lex(parser);
yp_rescue_node_operator_set(rescue, &parser->previous);
yp_node_t *node = parse_expression(parser, YP_BINDING_POWER_INDEX, "Expected an exception variable after `=>` in rescue statement.");
yp_token_t operator = not_provided(parser);
node = parse_target(parser, node, &operator, NULL);
rescue->exception = node;
break;
}
case YP_TOKEN_NEWLINE:
case YP_TOKEN_SEMICOLON:
case YP_TOKEN_KEYWORD_THEN:
// Here we have a terminator for the rescue keyword, in which case we're
// going to just continue on.
break;
default: {
if (token_begins_expression_p(parser->current.type) || match_type_p(parser, YP_TOKEN_USTAR)) {
// Here we have something that could be an exception expression, so
// we'll attempt to parse it here and any others delimited by commas.
do {
yp_node_t *expression = parse_starred_expression(parser, YP_BINDING_POWER_DEFINED, "Expected to find a rescued expression.");
yp_rescue_node_exceptions_append(rescue, expression);
// If we hit a newline, then this is the end of the rescue expression. We
// can continue on to parse the statements.
if (match_any_type_p(parser, 3, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON, YP_TOKEN_KEYWORD_THEN)) break;
// If we hit a `=>` then we're going to parse the exception variable. Once
// we've done that, we'll break out of the loop and parse the statements.
if (accept(parser, YP_TOKEN_EQUAL_GREATER)) {
yp_rescue_node_operator_set(rescue, &parser->previous);
yp_node_t *node = parse_expression(parser, YP_BINDING_POWER_INDEX, "Expected an exception variable after `=>` in rescue statement.");
yp_token_t operator = not_provided(parser);
node = parse_target(parser, node, &operator, NULL);
yp_rescue_node_exception_set(rescue, node);
break;
}
} while (accept(parser, YP_TOKEN_COMMA));
}
}
}
if (accept_any(parser, 2, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON)) {
accept(parser, YP_TOKEN_KEYWORD_THEN);
} else {
expect(parser, YP_TOKEN_KEYWORD_THEN, "Expected a terminator after rescue clause.");
}
if (!match_any_type_p(parser, 3, YP_TOKEN_KEYWORD_ELSE, YP_TOKEN_KEYWORD_ENSURE, YP_TOKEN_KEYWORD_END)) {
yp_statements_node_t *statements = parse_statements(parser, YP_CONTEXT_RESCUE);
if (statements) {
yp_rescue_node_statements_set(rescue, statements);
}
accept_any(parser, 2, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON);
}
if (current == NULL) {
yp_begin_node_rescue_clause_set(parent_node, rescue);
} else {
yp_rescue_node_consequent_set(current, rescue);
}
current = rescue;
}
// The end node locations on rescue nodes will not be set correctly
// since we won't know the end until we've found all consequent
// clauses. This sets the end location on all rescues once we know it
if (current) {
const char *end_to_set = current->base.location.end;
current = parent_node->rescue_clause;
while (current) {
current->base.location.end = end_to_set;
current = current->consequent;
}
}
if (accept(parser, YP_TOKEN_KEYWORD_ELSE)) {
yp_token_t else_keyword = parser->previous;
accept_any(parser, 2, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON);
yp_statements_node_t *else_statements = NULL;
if (!match_any_type_p(parser, 2, YP_TOKEN_KEYWORD_END, YP_TOKEN_KEYWORD_ENSURE)) {
else_statements = parse_statements(parser, YP_CONTEXT_RESCUE_ELSE);
accept_any(parser, 2, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON);
}
yp_else_node_t *else_clause = yp_else_node_create(parser, &else_keyword, else_statements, &parser->current);
yp_begin_node_else_clause_set(parent_node, else_clause);
}
if (accept(parser, YP_TOKEN_KEYWORD_ENSURE)) {
yp_token_t ensure_keyword = parser->previous;
accept_any(parser, 2, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON);
yp_statements_node_t *ensure_statements = NULL;
if (!match_type_p(parser, YP_TOKEN_KEYWORD_END)) {
ensure_statements = parse_statements(parser, YP_CONTEXT_ENSURE);
accept_any(parser, 2, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON);
}
yp_ensure_node_t *ensure_clause = yp_ensure_node_create(parser, &ensure_keyword, ensure_statements, &parser->current);
yp_begin_node_ensure_clause_set(parent_node, ensure_clause);
}
if (parser->current.type == YP_TOKEN_KEYWORD_END) {
yp_begin_node_end_keyword_set(parent_node, &parser->current);
} else {
yp_token_t end_keyword = (yp_token_t) { .type = YP_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
yp_begin_node_end_keyword_set(parent_node, &end_keyword);
}
}
static inline yp_begin_node_t *
parse_rescues_as_begin(yp_parser_t *parser, yp_statements_node_t *statements) {
yp_token_t no_begin_token = not_provided(parser);
yp_begin_node_t *begin_node = yp_begin_node_create(parser, &no_begin_token, statements);
parse_rescues(parser, begin_node);
// All nodes within a begin node are optional, so we look
// for the earliest possible node that we can use to set
// the BeginNode's start location
const char * start = begin_node->base.location.start;
if (begin_node->statements) {
start = begin_node->statements->base.location.start;
} else if (begin_node->rescue_clause) {
start = begin_node->rescue_clause->base.location.start;
} else if (begin_node->else_clause) {
start = begin_node->else_clause->base.location.start;
} else if (begin_node->ensure_clause) {
start = begin_node->ensure_clause->base.location.start;
}
begin_node->base.location.start = start;
return begin_node;
}
// Parse a list of parameters and local on a block definition.
static yp_block_parameters_node_t *
parse_block_parameters(
yp_parser_t *parser,
bool allows_trailing_comma,
const yp_token_t *opening,
bool is_lambda_literal
) {
yp_parameters_node_t *parameters = NULL;
if (!match_type_p(parser, YP_TOKEN_SEMICOLON)) {
parameters = parse_parameters(
parser,
is_lambda_literal ? YP_BINDING_POWER_DEFINED : YP_BINDING_POWER_INDEX,
false,
allows_trailing_comma,
false
);
}
yp_block_parameters_node_t *block_parameters = yp_block_parameters_node_create(parser, parameters, opening);
if (accept(parser, YP_TOKEN_SEMICOLON)) {
do {
expect(parser, YP_TOKEN_IDENTIFIER, "Expected a local variable name.");
yp_parser_local_add_token(parser, &parser->previous);
yp_block_parameters_node_append_local(block_parameters, &parser->previous);
} while (accept(parser, YP_TOKEN_COMMA));
}
return block_parameters;
}
// Parse a block.
static yp_block_node_t *
parse_block(yp_parser_t *parser) {
yp_token_t opening = parser->previous;
accept(parser, YP_TOKEN_NEWLINE);
yp_accepts_block_stack_push(parser, true);
yp_parser_scope_push(parser, false);
yp_block_parameters_node_t *parameters = NULL;
if (accept(parser, YP_TOKEN_PIPE)) {
yp_token_t block_parameters_opening = parser->previous;
if (match_type_p(parser, YP_TOKEN_PIPE)) {
parameters = yp_block_parameters_node_create(parser, NULL, &block_parameters_opening);
parser->command_start = true;
parser_lex(parser);
} else {
parameters = parse_block_parameters(parser, true, &block_parameters_opening, false);
accept(parser, YP_TOKEN_NEWLINE);
parser->command_start = true;
expect(parser, YP_TOKEN_PIPE, "Expected block parameters to end with '|'.");
}
yp_block_parameters_node_closing_set(parameters, &parser->previous);
}
accept(parser, YP_TOKEN_NEWLINE);
yp_node_t *statements = NULL;
if (opening.type == YP_TOKEN_BRACE_LEFT) {
if (!match_type_p(parser, YP_TOKEN_BRACE_RIGHT)) {
statements = (yp_node_t *) parse_statements(parser, YP_CONTEXT_BLOCK_BRACES);
}
expect(parser, YP_TOKEN_BRACE_RIGHT, "Expected block beginning with '{' to end with '}'.");
} else {
if (!match_type_p(parser, YP_TOKEN_KEYWORD_END)) {
if (!match_any_type_p(parser, 3, YP_TOKEN_KEYWORD_RESCUE, YP_TOKEN_KEYWORD_ELSE, YP_TOKEN_KEYWORD_ENSURE)) {
statements = (yp_node_t *) parse_statements(parser, YP_CONTEXT_BLOCK_KEYWORDS);
}
if (match_any_type_p(parser, 2, YP_TOKEN_KEYWORD_RESCUE, YP_TOKEN_KEYWORD_ENSURE)) {
assert(statements == NULL || statements->type == YP_NODE_STATEMENTS_NODE);
statements = (yp_node_t *) parse_rescues_as_begin(parser, (yp_statements_node_t *) statements);
}
}
expect(parser, YP_TOKEN_KEYWORD_END, "Expected block beginning with 'do' to end with 'end'.");
}
yp_constant_id_list_t locals = parser->current_scope->locals;
yp_parser_scope_pop(parser);
yp_accepts_block_stack_pop(parser);
return yp_block_node_create(parser, &locals, &opening, parameters, statements, &parser->previous);
}
// Parse a list of arguments and their surrounding parentheses if they are
// present.
static void
parse_arguments_list(yp_parser_t *parser, yp_arguments_t *arguments, bool accepts_block) {
if (accept(parser, YP_TOKEN_PARENTHESIS_LEFT)) {
arguments->opening_loc = ((yp_location_t) { .start = parser->previous.start, .end = parser->previous.end });
if (accept(parser, YP_TOKEN_PARENTHESIS_RIGHT)) {
arguments->closing_loc = ((yp_location_t) { .start = parser->previous.start, .end = parser->previous.end });
} else {
arguments->arguments = yp_arguments_node_create(parser);
yp_accepts_block_stack_push(parser, true);
parse_arguments(parser, arguments->arguments, true, YP_TOKEN_PARENTHESIS_RIGHT);
expect(parser, YP_TOKEN_PARENTHESIS_RIGHT, "Expected a ')' to close the argument list.");
yp_accepts_block_stack_pop(parser);
arguments->closing_loc = ((yp_location_t) { .start = parser->previous.start, .end = parser->previous.end });
}
} else if ((token_begins_expression_p(parser->current.type) || match_any_type_p(parser, 2, YP_TOKEN_USTAR, YP_TOKEN_USTAR_STAR)) && !match_type_p(parser, YP_TOKEN_BRACE_LEFT)) {
yp_accepts_block_stack_push(parser, false);
// If we get here, then the subsequent token cannot be used as an infix
// operator. In this case we assume the subsequent token is part of an
// argument to this method call.
arguments->arguments = yp_arguments_node_create(parser);
parse_arguments(parser, arguments->arguments, true, YP_TOKEN_EOF);
yp_accepts_block_stack_pop(parser);
}
// If we're at the end of the arguments, we can now check if there is a block
// node that starts with a {. If there is, then we can parse it and add it to
// the arguments.
if (accepts_block) {
if (accept(parser, YP_TOKEN_BRACE_LEFT)) {
arguments->block = parse_block(parser);
} else if (yp_accepts_block_stack_p(parser) && accept(parser, YP_TOKEN_KEYWORD_DO)) {
arguments->block = parse_block(parser);
}
}
}
static inline yp_node_t *
parse_conditional(yp_parser_t *parser, yp_context_t context) {
yp_token_t keyword = parser->previous;
context_push(parser, YP_CONTEXT_PREDICATE);
yp_node_t *predicate = parse_expression(parser, YP_BINDING_POWER_MODIFIER, "Expected to find a predicate for the conditional.");
// Predicates are closed by a term, a "then", or a term and then a "then".
accept_any(parser, 2, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON);
accept(parser, YP_TOKEN_KEYWORD_THEN);
context_pop(parser);
yp_statements_node_t *statements = NULL;
if (!match_any_type_p(parser, 3, YP_TOKEN_KEYWORD_ELSIF, YP_TOKEN_KEYWORD_ELSE, YP_TOKEN_KEYWORD_END)) {
yp_accepts_block_stack_push(parser, true);
statements = parse_statements(parser, context);
yp_accepts_block_stack_pop(parser);
accept_any(parser, 2, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON);
}
yp_token_t end_keyword = not_provided(parser);
yp_node_t *parent;
switch (context) {
case YP_CONTEXT_IF:
parent = (yp_node_t *) yp_if_node_create(parser, &keyword, predicate, statements, NULL, &end_keyword);
break;
case YP_CONTEXT_UNLESS:
parent = (yp_node_t *) yp_unless_node_create(parser, &keyword, predicate, statements);
break;
default:
parent = NULL;
assert(false && "unreachable");
break;
}
yp_node_t *current = parent;
// Parse any number of elsif clauses. This will form a linked list of if
// nodes pointing to each other from the top.
if (context == YP_CONTEXT_IF) {
while (accept(parser, YP_TOKEN_KEYWORD_ELSIF)) {
yp_token_t elsif_keyword = parser->previous;
yp_node_t *predicate = parse_expression(parser, YP_BINDING_POWER_MODIFIER, "Expected to find a predicate for the elsif clause.");
// Predicates are closed by a term, a "then", or a term and then a "then".
accept_any(parser, 2, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON);
accept(parser, YP_TOKEN_KEYWORD_THEN);
yp_accepts_block_stack_push(parser, true);
yp_statements_node_t *statements = parse_statements(parser, YP_CONTEXT_ELSIF);
yp_accepts_block_stack_pop(parser);
accept_any(parser, 2, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON);
yp_node_t *elsif = (yp_node_t *) yp_if_node_create(parser, &elsif_keyword, predicate, statements, NULL, &end_keyword);
((yp_if_node_t *) current)->consequent = elsif;
current = elsif;
}
}
if (match_type_p(parser, YP_TOKEN_KEYWORD_ELSE)) {
parser_lex(parser);
yp_token_t else_keyword = parser->previous;
yp_accepts_block_stack_push(parser, true);
yp_statements_node_t *else_statements = parse_statements(parser, YP_CONTEXT_ELSE);
yp_accepts_block_stack_pop(parser);
accept_any(parser, 2, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON);
expect(parser, YP_TOKEN_KEYWORD_END, "Expected `end` to close `else` clause.");
yp_else_node_t *else_node = yp_else_node_create(parser, &else_keyword, else_statements, &parser->previous);
switch (context) {
case YP_CONTEXT_IF:
((yp_if_node_t *) current)->consequent = (yp_node_t *) else_node;
// Recurse down if nodes setting the appropriate end location in all cases
yp_node_t * recursing_node = parent;
bool recursing = true;
while (recursing) {
switch (recursing_node->type) {
case YP_NODE_IF_NODE:
yp_if_node_end_keyword_loc_set((yp_if_node_t *) recursing_node, &parser->previous);
recursing_node = ((yp_if_node_t *) recursing_node)->consequent;
break;
case YP_NODE_ELSE_NODE:
yp_else_node_end_keyword_loc_set((yp_else_node_t *) recursing_node, &parser->previous);
recursing = false;
break;
default: {
recursing = false;
break;
}
}
}
break;
case YP_CONTEXT_UNLESS:
((yp_unless_node_t *) parent)->consequent = else_node;
yp_unless_node_end_keyword_loc_set((yp_unless_node_t *) parent, &parser->previous);
break;
default:
assert(false && "unreachable");
break;
}
} else {
expect(parser, YP_TOKEN_KEYWORD_END, "Expected `end` to close `if` statement.");
switch (context) {
case YP_CONTEXT_IF:
yp_if_node_end_keyword_loc_set((yp_if_node_t *) parent, &parser->previous);
break;
case YP_CONTEXT_UNLESS:
yp_unless_node_end_keyword_loc_set((yp_unless_node_t *) parent, &parser->previous);
break;
default:
assert(false && "unreachable");
break;
}
}
return parent;
}
// This macro allows you to define a case statement for all of the keywords.
// It's meant to be used in a switch statement.
#define YP_CASE_KEYWORD YP_TOKEN_KEYWORD___ENCODING__: case YP_TOKEN_KEYWORD___FILE__: case YP_TOKEN_KEYWORD___LINE__: \
case YP_TOKEN_KEYWORD_ALIAS: case YP_TOKEN_KEYWORD_AND: case YP_TOKEN_KEYWORD_BEGIN: case YP_TOKEN_KEYWORD_BEGIN_UPCASE: \
case YP_TOKEN_KEYWORD_BREAK: case YP_TOKEN_KEYWORD_CASE: case YP_TOKEN_KEYWORD_CLASS: case YP_TOKEN_KEYWORD_DEF: \
case YP_TOKEN_KEYWORD_DEFINED: case YP_TOKEN_KEYWORD_DO: case YP_TOKEN_KEYWORD_DO_LOOP: case YP_TOKEN_KEYWORD_ELSE: \
case YP_TOKEN_KEYWORD_ELSIF: case YP_TOKEN_KEYWORD_END: case YP_TOKEN_KEYWORD_END_UPCASE: case YP_TOKEN_KEYWORD_ENSURE: \
case YP_TOKEN_KEYWORD_FALSE: case YP_TOKEN_KEYWORD_FOR: case YP_TOKEN_KEYWORD_IF: case YP_TOKEN_KEYWORD_IN: \
case YP_TOKEN_KEYWORD_MODULE: case YP_TOKEN_KEYWORD_NEXT: case YP_TOKEN_KEYWORD_NIL: case YP_TOKEN_KEYWORD_NOT: \
case YP_TOKEN_KEYWORD_OR: case YP_TOKEN_KEYWORD_REDO: case YP_TOKEN_KEYWORD_RESCUE: case YP_TOKEN_KEYWORD_RETRY: \
case YP_TOKEN_KEYWORD_RETURN: case YP_TOKEN_KEYWORD_SELF: case YP_TOKEN_KEYWORD_SUPER: case YP_TOKEN_KEYWORD_THEN: \
case YP_TOKEN_KEYWORD_TRUE: case YP_TOKEN_KEYWORD_UNDEF: case YP_TOKEN_KEYWORD_UNLESS: case YP_TOKEN_KEYWORD_UNTIL: \
case YP_TOKEN_KEYWORD_WHEN: case YP_TOKEN_KEYWORD_WHILE: case YP_TOKEN_KEYWORD_YIELD
// This macro allows you to define a case statement for all of the operators.
// It's meant to be used in a switch statement.
#define YP_CASE_OPERATOR YP_TOKEN_AMPERSAND: case YP_TOKEN_BACKTICK: case YP_TOKEN_BANG_EQUAL: \
case YP_TOKEN_BANG_TILDE: case YP_TOKEN_BANG: case YP_TOKEN_BRACKET_LEFT_RIGHT_EQUAL: \
case YP_TOKEN_BRACKET_LEFT_RIGHT: case YP_TOKEN_CARET: case YP_TOKEN_EQUAL_EQUAL_EQUAL: case YP_TOKEN_EQUAL_EQUAL: \
case YP_TOKEN_EQUAL_TILDE: case YP_TOKEN_GREATER_EQUAL: case YP_TOKEN_GREATER_GREATER: case YP_TOKEN_GREATER: \
case YP_TOKEN_LESS_EQUAL_GREATER: case YP_TOKEN_LESS_EQUAL: case YP_TOKEN_LESS_LESS: case YP_TOKEN_LESS: \
case YP_TOKEN_MINUS: case YP_TOKEN_PERCENT: case YP_TOKEN_PIPE: case YP_TOKEN_PLUS: case YP_TOKEN_SLASH: \
case YP_TOKEN_STAR_STAR: case YP_TOKEN_STAR: case YP_TOKEN_TILDE: case YP_TOKEN_UMINUS: case YP_TOKEN_UMINUS_NUM: \
case YP_TOKEN_UPLUS: case YP_TOKEN_USTAR: case YP_TOKEN_USTAR_STAR
// This macro allows you to define a case statement for all of the token types
// that represent the beginning of nodes that are "primitives" in a pattern
// matching expression.
#define YP_CASE_PRIMITIVE YP_TOKEN_INTEGER: case YP_TOKEN_FLOAT: case YP_TOKEN_RATIONAL_NUMBER: \
case YP_TOKEN_IMAGINARY_NUMBER: case YP_TOKEN_SYMBOL_BEGIN: case YP_TOKEN_REGEXP_BEGIN: case YP_TOKEN_BACKTICK: \
case YP_TOKEN_PERCENT_LOWER_X: case YP_TOKEN_PERCENT_LOWER_I: case YP_TOKEN_PERCENT_LOWER_W: \
case YP_TOKEN_PERCENT_UPPER_I: case YP_TOKEN_PERCENT_UPPER_W: case YP_TOKEN_STRING_BEGIN: case YP_TOKEN_KEYWORD_NIL: \
case YP_TOKEN_KEYWORD_SELF: case YP_TOKEN_KEYWORD_TRUE: case YP_TOKEN_KEYWORD_FALSE: case YP_TOKEN_KEYWORD___FILE__: \
case YP_TOKEN_KEYWORD___LINE__: case YP_TOKEN_KEYWORD___ENCODING__: case YP_TOKEN_MINUS_GREATER: \
case YP_TOKEN_HEREDOC_START: case YP_TOKEN_UMINUS_NUM: case YP_TOKEN_CHARACTER_LITERAL
// This macro allows you to define a case statement for all of the token types
// that could begin a parameter.
#define YP_CASE_PARAMETER YP_TOKEN_AMPERSAND: case YP_TOKEN_UDOT_DOT_DOT: case YP_TOKEN_IDENTIFIER: \
case YP_TOKEN_LABEL: case YP_TOKEN_USTAR: case YP_TOKEN_STAR: case YP_TOKEN_STAR_STAR: case YP_TOKEN_USTAR_STAR: case YP_TOKEN_CONSTANT: \
case YP_TOKEN_INSTANCE_VARIABLE: case YP_TOKEN_GLOBAL_VARIABLE: case YP_TOKEN_CLASS_VARIABLE
// This macro allows you to define a case statement for all of the nodes that
// can be transformed into write targets.
#define YP_CASE_WRITABLE YP_NODE_CLASS_VARIABLE_READ_NODE: case YP_NODE_CONSTANT_PATH_NODE: \
case YP_NODE_CONSTANT_READ_NODE: case YP_NODE_GLOBAL_VARIABLE_READ_NODE: case YP_NODE_LOCAL_VARIABLE_READ_NODE: \
case YP_NODE_INSTANCE_VARIABLE_READ_NODE: case YP_NODE_MULTI_WRITE_NODE: case YP_NODE_BACK_REFERENCE_READ_NODE: \
case YP_NODE_NUMBERED_REFERENCE_READ_NODE
// Parse a node that is part of a string. If the subsequent tokens cannot be
// parsed as a string part, then NULL is returned.
static yp_node_t *
parse_string_part(yp_parser_t *parser) {
switch (parser->current.type) {
// Here the lexer has returned to us plain string content. In this case
// we'll create a string node that has no opening or closing and return that
// as the part. These kinds of parts look like:
//
// "aaa #{bbb} #@ccc ddd"
// ^^^^ ^ ^^^^
case YP_TOKEN_STRING_CONTENT: {
yp_unescape_type_t unescape_type = YP_UNESCAPE_ALL;
if (parser->lex_modes.current->mode == YP_LEX_HEREDOC) {
if (parser->lex_modes.current->as.heredoc.quote == YP_HEREDOC_QUOTE_SINGLE) {
unescape_type = YP_UNESCAPE_MINIMAL;
}
}
parser_lex(parser);
yp_token_t opening = not_provided(parser);
yp_token_t closing = not_provided(parser);
return (yp_node_t *) yp_string_node_create_and_unescape(parser, &opening, &parser->previous, &closing, unescape_type);
}
// Here the lexer has returned the beginning of an embedded expression. In
// that case we'll parse the inner statements and return that as the part.
// These kinds of parts look like:
//
// "aaa #{bbb} #@ccc ddd"
// ^^^^^^
case YP_TOKEN_EMBEXPR_BEGIN: {
yp_lex_state_t state = parser->lex_state;
int brace_nesting = parser->brace_nesting;
parser->brace_nesting = 0;
lex_state_set(parser, YP_LEX_STATE_BEG);
parser_lex(parser);
yp_token_t opening = parser->previous;
yp_statements_node_t *statements = NULL;
if (!match_type_p(parser, YP_TOKEN_EMBEXPR_END)) {
yp_accepts_block_stack_push(parser, true);
statements = parse_statements(parser, YP_CONTEXT_EMBEXPR);
yp_accepts_block_stack_pop(parser);
}
parser->brace_nesting = brace_nesting;
lex_state_set(parser, state);
expect(parser, YP_TOKEN_EMBEXPR_END, "Expected a closing delimiter for an embedded expression.");
yp_token_t closing = parser->previous;
return (yp_node_t *) yp_embedded_statements_node_create(parser, &opening, statements, &closing);
}
// Here the lexer has returned the beginning of an embedded variable.
// In that case we'll parse the variable and create an appropriate node
// for it and then return that node. These kinds of parts look like:
//
// "aaa #{bbb} #@ccc ddd"
// ^^^^^
case YP_TOKEN_EMBVAR: {
lex_state_set(parser, YP_LEX_STATE_BEG);
parser_lex(parser);
yp_token_t operator = parser->previous;
yp_node_t *variable;
switch (parser->current.type) {
// In this case a back reference is being interpolated. We'll
// create a global variable read node.
case YP_TOKEN_BACK_REFERENCE:
parser_lex(parser);
variable = (yp_node_t *) yp_back_reference_read_node_create(parser, &parser->previous);
break;
// In this case an nth reference is being interpolated. We'll
// create a global variable read node.
case YP_TOKEN_NUMBERED_REFERENCE:
parser_lex(parser);
variable = (yp_node_t *) yp_numbered_reference_read_node_create(parser, &parser->previous);
break;
// In this case a global variable is being interpolated. We'll
// create a global variable read node.
case YP_TOKEN_GLOBAL_VARIABLE:
parser_lex(parser);
variable = (yp_node_t *) yp_global_variable_read_node_create(parser, &parser->previous);
break;
// In this case an instance variable is being interpolated.
// We'll create an instance variable read node.
case YP_TOKEN_INSTANCE_VARIABLE:
parser_lex(parser);
variable = (yp_node_t *) yp_instance_variable_read_node_create(parser, &parser->previous);
break;
// In this case a class variable is being interpolated. We'll
// create a class variable read node.
case YP_TOKEN_CLASS_VARIABLE:
parser_lex(parser);
variable = (yp_node_t *) yp_class_variable_read_node_create(parser, &parser->previous);
break;
// We can hit here if we got an invalid token. In that case
// we'll not attempt to lex this token and instead just return a
// missing node.
default:
expect(parser, YP_TOKEN_IDENTIFIER, "Expected a valid embedded variable.");
variable = (yp_node_t *) yp_missing_node_create(parser, parser->current.start, parser->current.end);
break;
}
return (yp_node_t *) yp_embedded_variable_node_create(parser, &operator, variable);
}
default:
parser_lex(parser);
yp_diagnostic_list_append(&parser->error_list, parser->previous.start, parser->previous.end, "Could not understand string part");
return NULL;
}
}
static yp_node_t *
parse_symbol(yp_parser_t *parser, yp_lex_mode_t *lex_mode, yp_lex_state_t next_state) {
bool lex_string = lex_mode->mode == YP_LEX_STRING;
bool lex_interpolation = lex_string && lex_mode->as.string.interpolation;
yp_token_t opening = parser->previous;
if (!lex_string) {
if (next_state != YP_LEX_STATE_NONE) {
lex_state_set(parser, next_state);
}
yp_token_t symbol;
switch (parser->current.type) {
case YP_TOKEN_IDENTIFIER:
case YP_TOKEN_CONSTANT:
case YP_TOKEN_INSTANCE_VARIABLE:
case YP_TOKEN_CLASS_VARIABLE:
case YP_TOKEN_GLOBAL_VARIABLE:
case YP_TOKEN_NUMBERED_REFERENCE:
case YP_TOKEN_BACK_REFERENCE:
case YP_CASE_KEYWORD:
parser_lex(parser);
symbol = parser->previous;
break;
case YP_CASE_OPERATOR:
lex_state_set(parser, next_state == YP_LEX_STATE_NONE ? YP_LEX_STATE_ENDFN : next_state);
parser_lex(parser);
symbol = parser->previous;
break;
default:
expect(parser, YP_TOKEN_IDENTIFIER, "Expected symbol.");
symbol = parser->previous;
break;
}
yp_token_t closing = not_provided(parser);
return (yp_node_t *) yp_symbol_node_create_and_unescape(parser, &opening, &symbol, &closing, YP_UNESCAPE_ALL);
}
// If we weren't in a string in the previous check then we have to be now.
assert(lex_string);
if (lex_interpolation) {
yp_interpolated_symbol_node_t *interpolated = yp_interpolated_symbol_node_create(parser, &opening, NULL, &opening);
while (!match_any_type_p(parser, 2, YP_TOKEN_STRING_END, YP_TOKEN_EOF)) {
yp_node_t *part = parse_string_part(parser);
if (part != NULL) {
yp_interpolated_symbol_node_append(interpolated, part);
}
}
if (next_state != YP_LEX_STATE_NONE) {
lex_state_set(parser, next_state);
}
expect(parser, YP_TOKEN_STRING_END, "Expected a closing delimiter for an interpolated symbol.");
yp_interpolated_symbol_node_closing_set(interpolated, &parser->previous);
return (yp_node_t *) interpolated;
}
yp_token_t content;
if (accept(parser, YP_TOKEN_STRING_CONTENT)) {
content = parser->previous;
} else {
content = (yp_token_t) { .type = YP_TOKEN_STRING_CONTENT, .start = parser->previous.end, .end = parser->previous.end };
}
if (next_state != YP_LEX_STATE_NONE) {
lex_state_set(parser, next_state);
}
expect(parser, YP_TOKEN_STRING_END, "Expected a closing delimiter for a dynamic symbol.");
return (yp_node_t *) yp_symbol_node_create_and_unescape(parser, &opening, &content, &parser->previous, YP_UNESCAPE_ALL);
}
// Parse an argument to undef which can either be a bare word, a
// symbol, a constant, or an interpolated symbol.
static inline yp_node_t *
parse_undef_argument(yp_parser_t *parser) {
switch (parser->current.type) {
case YP_CASE_KEYWORD:
case YP_CASE_OPERATOR:
case YP_TOKEN_CONSTANT:
case YP_TOKEN_IDENTIFIER: {
parser_lex(parser);
yp_token_t opening = not_provided(parser);
yp_token_t closing = not_provided(parser);
return (yp_node_t *) yp_symbol_node_create_and_unescape(parser, &opening, &parser->previous, &closing, YP_UNESCAPE_ALL);
}
case YP_TOKEN_SYMBOL_BEGIN: {
yp_lex_mode_t lex_mode = *parser->lex_modes.current;
parser_lex(parser);
return parse_symbol(parser, &lex_mode, YP_LEX_STATE_NONE);
}
default:
yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, "Expected a bare word or symbol argument.");
return (yp_node_t *) yp_missing_node_create(parser, parser->current.start, parser->current.end);
}
}
// Parse an argument to alias which can either be a bare word, a symbol, an
// interpolated symbol or a global variable. If this is the first argument, then
// we need to set the lex state to YP_LEX_STATE_FNAME | YP_LEX_STATE_FITEM
// between the first and second arguments.
static inline yp_node_t *
parse_alias_argument(yp_parser_t *parser, bool first) {
switch (parser->current.type) {
case YP_CASE_OPERATOR:
case YP_CASE_KEYWORD:
case YP_TOKEN_CONSTANT:
case YP_TOKEN_IDENTIFIER: {
if (first) {
lex_state_set(parser, YP_LEX_STATE_FNAME | YP_LEX_STATE_FITEM);
}
parser_lex(parser);
yp_token_t opening = not_provided(parser);
yp_token_t closing = not_provided(parser);
return (yp_node_t *) yp_symbol_node_create_and_unescape(parser, &opening, &parser->previous, &closing, YP_UNESCAPE_ALL);
}
case YP_TOKEN_SYMBOL_BEGIN: {
yp_lex_mode_t lex_mode = *parser->lex_modes.current;
parser_lex(parser);
return parse_symbol(parser, &lex_mode, first ? YP_LEX_STATE_FNAME | YP_LEX_STATE_FITEM : YP_LEX_STATE_NONE);
}
case YP_TOKEN_BACK_REFERENCE:
parser_lex(parser);
return (yp_node_t *) yp_back_reference_read_node_create(parser, &parser->previous);
case YP_TOKEN_NUMBERED_REFERENCE:
parser_lex(parser);
return (yp_node_t *) yp_numbered_reference_read_node_create(parser, &parser->previous);
case YP_TOKEN_GLOBAL_VARIABLE:
parser_lex(parser);
return (yp_node_t *) yp_global_variable_read_node_create(parser, &parser->previous);
default:
yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, "Expected a bare word, symbol or global variable argument.");
return (yp_node_t *) yp_missing_node_create(parser, parser->current.start, parser->current.end);
}
}
// Parse an identifier into either a local variable read or a call.
static yp_node_t *
parse_vcall(yp_parser_t *parser) {
int depth;
if (
(parser->current.type != YP_TOKEN_PARENTHESIS_LEFT) &&
(parser->previous.end[-1] != '!') &&
(parser->previous.end[-1] != '?') &&
(depth = yp_parser_local_depth(parser, &parser->previous)) != -1
) {
return (yp_node_t *) yp_local_variable_read_node_create(parser, &parser->previous, (uint32_t) depth);
}
return (yp_node_t *) yp_call_node_vcall_create(parser, &parser->previous);
}
static inline yp_token_t
parse_method_definition_name(yp_parser_t *parser) {
switch (parser->current.type) {
case YP_CASE_KEYWORD:
case YP_TOKEN_CONSTANT:
case YP_TOKEN_IDENTIFIER:
parser_lex(parser);
return parser->previous;
case YP_CASE_OPERATOR:
lex_state_set(parser, YP_LEX_STATE_ENDFN);
parser_lex(parser);
return parser->previous;
default:
return not_provided(parser);
}
}
// Calculate the common leading whitespace for each line in a heredoc.
static int
parse_heredoc_common_whitespace(yp_parser_t *parser, yp_node_list_t *nodes) {
int common_whitespace = -1;
for (size_t index = 0; index < nodes->size; index++) {
yp_node_t *node = nodes->nodes[index];
if (node->type != YP_NODE_STRING_NODE) continue;
yp_location_t *content_loc = &((yp_string_node_t *) node)->content_loc;
// If the previous node wasn't a string node, we don't want to trim
// whitespace. This could happen after an interpolated expression or
// variable.
if (index == 0 || nodes->nodes[index - 1]->type == YP_NODE_STRING_NODE) {
int cur_whitespace;
const char *cur_char = content_loc->start;
while (cur_char && cur_char < content_loc->end) {
// Any empty newlines aren't included in the minimum whitespace calculation
while (cur_char < content_loc->end && *cur_char == '\n') cur_char++;
while (cur_char + 1 < content_loc->end && *cur_char == '\r' && cur_char[1] == '\n') cur_char += 2;
if (cur_char == content_loc->end) break;
cur_whitespace = 0;
while (yp_char_is_inline_whitespace(*cur_char) && cur_char < content_loc->end) {
if (cur_char[0] == '\t') {
cur_whitespace = (cur_whitespace / YP_TAB_WHITESPACE_SIZE + 1) * YP_TAB_WHITESPACE_SIZE;
} else {
cur_whitespace++;
}
cur_char++;
}
// If we hit a newline, then we have encountered a line that contains
// only whitespace, and it shouldn't be considered in the calculation of
// common leading whitespace.
if (*cur_char == '\n') {
cur_char++;
continue;
}
if (cur_whitespace < common_whitespace || common_whitespace == -1) {
common_whitespace = cur_whitespace;
}
cur_char = next_newline(cur_char + 1, parser->end - (cur_char + 1));
if (cur_char) cur_char++;
}
}
}
return common_whitespace;
}
// Take a heredoc node that is indented by a ~ and trim the leading whitespace.
static void
parse_heredoc_dedent(yp_parser_t *parser, yp_node_t *node, yp_heredoc_quote_t quote) {
yp_node_list_t *nodes;
if (quote == YP_HEREDOC_QUOTE_BACKTICK) {
nodes = &((yp_interpolated_x_string_node_t *) node)->parts;
} else {
nodes = &((yp_interpolated_string_node_t *) node)->parts;
}
// First, calculate how much common whitespace we need to trim. If there is
// none or it's 0, then we can return early.
int common_whitespace;
if ((common_whitespace = parse_heredoc_common_whitespace(parser, nodes)) <= 0) return;
// Iterate over all nodes, and trim whitespace accordingly.
for (size_t index = 0; index < nodes->size; index++) {
yp_node_t *node = nodes->nodes[index];
if (node->type != YP_NODE_STRING_NODE) continue;
// Get a reference to the string struct that is being held by the string
// node. This is the value we're going to actual manipulate.
yp_string_t *string = &((yp_string_node_t *) node)->unescaped;
yp_string_ensure_owned(string);
// Now get the bounds of the existing string. We'll use this as a
// destination to move bytes into. We'll also use it for bounds checking
// since we don't require that these strings be null terminated.
size_t dest_length = string->as.owned.length;
char *source_start = string->as.owned.source;
const char *source_cursor = source_start;
const char *source_end = source_cursor + dest_length;
// We're going to move bytes backward in the string when we get leading
// whitespace, so we'll maintain a pointer to the current position in the
// string that we're writing to.
char *dest_cursor = source_start;
bool dedent_next = (index == 0) || (nodes->nodes[index - 1]->type == YP_NODE_STRING_NODE);
while (source_cursor < source_end) {
// If we need to dedent the next element within the heredoc or the next
// line within the string node, then we'll do it here.
if (dedent_next) {
int trimmed_whitespace = 0;
// While we haven't reached the amount of common whitespace that we need
// to trim and we haven't reached the end of the string, we'll keep
// trimming whitespace. Trimming in this context means skipping over
// these bytes such that they aren't copied into the new string.
while ((source_cursor < source_end) && yp_char_is_inline_whitespace(*source_cursor) && trimmed_whitespace < common_whitespace) {
if (*source_cursor == '\t') {
trimmed_whitespace = (trimmed_whitespace / YP_TAB_WHITESPACE_SIZE + 1) * YP_TAB_WHITESPACE_SIZE;
if (trimmed_whitespace > common_whitespace) break;
} else {
trimmed_whitespace++;
}
source_cursor++;
dest_length--;
}
}
// At this point we have dedented all that we need to, so we need to find
// the next newline.
const char *breakpoint = next_newline(source_cursor, source_end - source_cursor);
if (breakpoint == NULL) {
// If there isn't another newline, then we can just move the rest of the
// string and break from the loop.
memmove(dest_cursor, source_cursor, (size_t) (source_end - source_cursor));
break;
}
// Otherwise, we need to move everything including the newline, and
// then set the dedent_next flag to true.
if (breakpoint < source_end) breakpoint++;
memmove(dest_cursor, source_cursor, (size_t) (breakpoint - source_cursor));
dest_cursor += (breakpoint - source_cursor);
source_cursor = breakpoint;
dedent_next = true;
}
string->as.owned.length = dest_length;
}
}
static yp_node_t *
parse_pattern(yp_parser_t *parser, bool top_pattern, const char *message);
// Accept any number of constants joined by :: delimiters.
static yp_node_t *
parse_pattern_constant_path(yp_parser_t *parser, yp_node_t *node) {
// Now, if there are any :: operators that follow, parse them as constant
// path nodes.
while (accept(parser, YP_TOKEN_COLON_COLON)) {
yp_token_t delimiter = parser->previous;
expect(parser, YP_TOKEN_CONSTANT, "Expected a constant after the :: operator.");
yp_node_t *child = (yp_node_t *) yp_constant_read_node_create(parser, &parser->previous);
node = (yp_node_t *)yp_constant_path_node_create(parser, node, &delimiter, child);
}
// If there is a [ or ( that follows, then this is part of a larger pattern
// expression. We'll parse the inner pattern here, then modify the returned
// inner pattern with our constant path attached.
if (!match_any_type_p(parser, 2, YP_TOKEN_BRACKET_LEFT, YP_TOKEN_PARENTHESIS_LEFT)) {
return node;
}
yp_token_t opening;
yp_token_t closing;
yp_node_t *inner = NULL;
if (accept(parser, YP_TOKEN_BRACKET_LEFT)) {
opening = parser->previous;
accept(parser, YP_TOKEN_NEWLINE);
if (!accept(parser, YP_TOKEN_BRACKET_RIGHT)) {
inner = parse_pattern(parser, true, "Expected a pattern expression after the [ operator.");
accept(parser, YP_TOKEN_NEWLINE);
expect(parser, YP_TOKEN_BRACKET_RIGHT, "Expected a ] to close the pattern expression.");
}
closing = parser->previous;
} else {
parser_lex(parser);
opening = parser->previous;
if (!accept(parser, YP_TOKEN_PARENTHESIS_RIGHT)) {
inner = parse_pattern(parser, true, "Expected a pattern expression after the ( operator.");
expect(parser, YP_TOKEN_PARENTHESIS_RIGHT, "Expected a ) to close the pattern expression.");
}
closing = parser->previous;
}
if (!inner) {
// If there was no inner pattern, then we have something like Foo() or
// Foo[]. In that case we'll create an array pattern with no requireds.
return (yp_node_t *) yp_array_pattern_node_constant_create(parser, node, &opening, &closing);
}
// Now that we have the inner pattern, check to see if it's an array, find,
// or hash pattern. If it is, then we'll attach our constant path to it if
// it doesn't already have a constant. If it's not one of those node types
// or it does have a constant, then we'll create an array pattern.
switch (inner->type) {
case YP_NODE_ARRAY_PATTERN_NODE: {
yp_array_pattern_node_t *pattern_node = (yp_array_pattern_node_t *) inner;
if (pattern_node->constant == NULL) {
pattern_node->base.location.start = node->location.start;
pattern_node->base.location.end = closing.end;
pattern_node->constant = node;
pattern_node->opening_loc = (yp_location_t) { .start = opening.start, .end = opening.end };
pattern_node->closing_loc = (yp_location_t) { .start = closing.start, .end = closing.end };
return (yp_node_t *) pattern_node;
}
break;
}
case YP_NODE_FIND_PATTERN_NODE: {
yp_find_pattern_node_t *pattern_node = (yp_find_pattern_node_t *) inner;
if (pattern_node->constant == NULL) {
pattern_node->base.location.start = node->location.start;
pattern_node->base.location.end = closing.end;
pattern_node->constant = node;
pattern_node->opening_loc = (yp_location_t) { .start = opening.start, .end = opening.end };
pattern_node->closing_loc = (yp_location_t) { .start = closing.start, .end = closing.end };
return (yp_node_t *) pattern_node;
}
break;
}
case YP_NODE_HASH_PATTERN_NODE: {
yp_hash_pattern_node_t *pattern_node = (yp_hash_pattern_node_t *) inner;
if (pattern_node->constant == NULL) {
pattern_node->base.location.start = node->location.start;
pattern_node->base.location.end = closing.end;
pattern_node->constant = node;
pattern_node->opening_loc = (yp_location_t) { .start = opening.start, .end = opening.end };
pattern_node->closing_loc = (yp_location_t) { .start = closing.start, .end = closing.end };
return (yp_node_t *) pattern_node;
}
break;
}
default:
break;
}
// If we got here, then we didn't return one of the inner patterns by
// attaching its constant. In this case we'll create an array pattern and
// attach our constant to it.
yp_array_pattern_node_t *pattern_node = yp_array_pattern_node_constant_create(parser, node, &opening, &closing);
yp_array_pattern_node_requireds_append(pattern_node, inner);
return (yp_node_t *) pattern_node;
}
// Parse a rest pattern.
static yp_splat_node_t *
parse_pattern_rest(yp_parser_t *parser) {
assert(parser->previous.type == YP_TOKEN_USTAR);
yp_token_t operator = parser->previous;
yp_node_t *name = NULL;
// Rest patterns don't necessarily have a name associated with them. So we
// will check for that here. If they do, then we'll add it to the local table
// since this pattern will cause it to become a local variable.
if (accept(parser, YP_TOKEN_IDENTIFIER)) {
yp_token_t identifier = parser->previous;
yp_parser_local_add_token(parser, &identifier);
name = (yp_node_t *) yp_local_variable_target_node_create(parser, &identifier);
}
// Finally we can return the created node.
return yp_splat_node_create(parser, &operator, name);
}
// Parse a keyword rest node.
static yp_node_t *
parse_pattern_keyword_rest(yp_parser_t *parser) {
assert(parser->current.type == YP_TOKEN_USTAR_STAR);
parser_lex(parser);
yp_token_t operator = parser->previous;
yp_node_t *value = NULL;
if (accept(parser, YP_TOKEN_KEYWORD_NIL)) {
return (yp_node_t *) yp_no_keywords_parameter_node_create(parser, &operator, &parser->previous);
}
if (accept(parser, YP_TOKEN_IDENTIFIER)) {
yp_parser_local_add_token(parser, &parser->previous);
value = (yp_node_t *) yp_local_variable_target_node_create(parser, &parser->previous);
}
return (yp_node_t *) yp_assoc_splat_node_create(parser, value, &operator);
}
// Parse a hash pattern.
static yp_hash_pattern_node_t *
parse_pattern_hash(yp_parser_t *parser, yp_node_t *first_assoc) {
if (first_assoc->type == YP_NODE_ASSOC_NODE) {
if (!match_any_type_p(parser, 7, YP_TOKEN_COMMA, YP_TOKEN_KEYWORD_THEN, YP_TOKEN_BRACE_RIGHT, YP_TOKEN_BRACKET_RIGHT, YP_TOKEN_PARENTHESIS_RIGHT, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON)) {
// Here we have a value for the first assoc in the list, so we will parse it
// now and update the first assoc.
yp_node_t *value = parse_pattern(parser, false, "Expected a pattern expression after the key.");
yp_assoc_node_t *assoc = (yp_assoc_node_t *) first_assoc;
assoc->base.location.end = value->location.end;
assoc->value = value;
} else {
yp_node_t *key = ((yp_assoc_node_t *) first_assoc)->key;
if (key->type == YP_NODE_SYMBOL_NODE) {
yp_location_t *value_loc = &((yp_symbol_node_t *) key)->value_loc;
yp_parser_local_add_location(parser, value_loc->start, value_loc->end);
}
}
}
yp_node_list_t assocs = YP_EMPTY_NODE_LIST;
yp_node_list_append(&assocs, first_assoc);
// If there are any other assocs, then we'll parse them now.
while (accept(parser, YP_TOKEN_COMMA)) {
// Here we need to break to support trailing commas.
if (match_any_type_p(parser, 6, YP_TOKEN_KEYWORD_THEN, YP_TOKEN_BRACE_RIGHT, YP_TOKEN_BRACKET_RIGHT, YP_TOKEN_PARENTHESIS_RIGHT, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON)) {
break;
}
yp_node_t *assoc;
if (match_type_p(parser, YP_TOKEN_USTAR_STAR)) {
assoc = parse_pattern_keyword_rest(parser);
} else {
expect(parser, YP_TOKEN_LABEL, "Expected a label after the `,'.");
yp_node_t *key = (yp_node_t *) yp_symbol_node_label_create(parser, &parser->previous);
yp_node_t *value = NULL;
if (!match_any_type_p(parser, 7, YP_TOKEN_COMMA, YP_TOKEN_KEYWORD_THEN, YP_TOKEN_BRACE_RIGHT, YP_TOKEN_BRACKET_RIGHT, YP_TOKEN_PARENTHESIS_RIGHT, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON)) {
value = parse_pattern(parser, false, "Expected a pattern expression after the key.");
} else {
yp_location_t *value_loc = &((yp_symbol_node_t *) key)->value_loc;
yp_parser_local_add_location(parser, value_loc->start, value_loc->end);
}
yp_token_t operator = not_provided(parser);
assoc = (yp_node_t *) yp_assoc_node_create(parser, key, &operator, value);
}
yp_node_list_append(&assocs, assoc);
}
yp_hash_pattern_node_t *node = yp_hash_pattern_node_node_list_create(parser, &assocs);
free(assocs.nodes);
return node;
}
// Parse a pattern expression primitive.
static yp_node_t *
parse_pattern_primitive(yp_parser_t *parser, const char *message) {
switch (parser->current.type) {
case YP_TOKEN_IDENTIFIER: {
parser_lex(parser);
yp_parser_local_add_token(parser, &parser->previous);
return (yp_node_t *) yp_local_variable_target_node_create(parser, &parser->previous);
}
case YP_TOKEN_BRACKET_LEFT_ARRAY: {
yp_token_t opening = parser->current;
parser_lex(parser);
if (accept(parser, YP_TOKEN_BRACKET_RIGHT)) {
// If we have an empty array pattern, then we'll just return a new
// array pattern node.
return (yp_node_t *)yp_array_pattern_node_empty_create(parser, &opening, &parser->previous);
}
// Otherwise, we'll parse the inner pattern, then deal with it depending
// on the type it returns.
yp_node_t *inner = parse_pattern(parser, true, "Expected a pattern expression after the [ operator.");
accept(parser, YP_TOKEN_NEWLINE);
expect(parser, YP_TOKEN_BRACKET_RIGHT, "Expected a ] to close the pattern expression.");
yp_token_t closing = parser->previous;
switch (inner->type) {
case YP_NODE_ARRAY_PATTERN_NODE: {
yp_array_pattern_node_t *pattern_node = (yp_array_pattern_node_t *) inner;
if (pattern_node->opening_loc.start == NULL) {
pattern_node->base.location.start = opening.start;
pattern_node->base.location.end = closing.end;
pattern_node->opening_loc = (yp_location_t) { .start = opening.start, .end = opening.end };
pattern_node->closing_loc = (yp_location_t) { .start = closing.start, .end = closing.end };
return (yp_node_t *) pattern_node;
}
break;
}
case YP_NODE_FIND_PATTERN_NODE: {
yp_find_pattern_node_t *pattern_node = (yp_find_pattern_node_t *) inner;
if (pattern_node->opening_loc.start == NULL) {
pattern_node->base.location.start = opening.start;
pattern_node->base.location.end = closing.end;
pattern_node->opening_loc = (yp_location_t) { .start = opening.start, .end = opening.end };
pattern_node->closing_loc = (yp_location_t) { .start = closing.start, .end = closing.end };
return (yp_node_t *) pattern_node;
}
break;
}
default:
break;
}
yp_array_pattern_node_t *node = yp_array_pattern_node_empty_create(parser, &opening, &closing);
yp_array_pattern_node_requireds_append(node, inner);
return (yp_node_t *) node;
}
case YP_TOKEN_BRACE_LEFT: {
bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
parser->pattern_matching_newlines = false;
yp_hash_pattern_node_t *node;
yp_token_t opening = parser->current;
parser_lex(parser);
if (accept(parser, YP_TOKEN_BRACE_RIGHT)) {
// If we have an empty hash pattern, then we'll just return a new hash
// pattern node.
node = yp_hash_pattern_node_empty_create(parser, &opening, &parser->previous);
} else {
yp_node_t *key;
switch (parser->current.type) {
case YP_TOKEN_LABEL:
parser_lex(parser);
key = (yp_node_t *) yp_symbol_node_label_create(parser, &parser->previous);
break;
case YP_TOKEN_USTAR_STAR:
key = parse_pattern_keyword_rest(parser);
break;
case YP_TOKEN_STRING_BEGIN:
key = parse_expression(parser, YP_BINDING_POWER_MAX, "Expected a key in the hash pattern.");
if (!yp_symbol_node_label_p(key)) {
yp_diagnostic_list_append(&parser->error_list, key->location.start, key->location.end, "Expected a label as the key in the hash pattern.");
}
break;
default:
parser_lex(parser);
yp_diagnostic_list_append(&parser->error_list, parser->previous.start, parser->previous.end, "Expected a key in the hash pattern.");
key = (yp_node_t *) yp_missing_node_create(parser, parser->previous.start, parser->previous.end);
break;
}
yp_token_t operator = not_provided(parser);
node = parse_pattern_hash(parser, (yp_node_t *) yp_assoc_node_create(parser, key, &operator, NULL));
accept(parser, YP_TOKEN_NEWLINE);
expect(parser, YP_TOKEN_BRACE_RIGHT, "Expected a } to close the pattern expression.");
yp_token_t closing = parser->previous;
node->base.location.start = opening.start;
node->base.location.end = closing.end;
node->opening_loc = (yp_location_t) { .start = opening.start, .end = opening.end };
node->closing_loc = (yp_location_t) { .start = closing.start, .end = closing.end };
}
parser->pattern_matching_newlines = previous_pattern_matching_newlines;
return (yp_node_t *) node;
}
case YP_TOKEN_UDOT_DOT:
case YP_TOKEN_UDOT_DOT_DOT: {
yp_token_t operator = parser->current;
parser_lex(parser);
// Since we have a unary range operator, we need to parse the subsequent
// expression as the right side of the range.
switch (parser->current.type) {
case YP_CASE_PRIMITIVE: {
yp_node_t *right = parse_expression(parser, YP_BINDING_POWER_MAX, "Expected an expression after the range operator.");
return (yp_node_t *) yp_range_node_create(parser, NULL, &operator, right);
}
default: {
yp_diagnostic_list_append(&parser->error_list, operator.start, operator.end, "Expected an expression after the range operator.");
yp_node_t *right = (yp_node_t *) yp_missing_node_create(parser, operator.start, operator.end);
return (yp_node_t *) yp_range_node_create(parser, NULL, &operator, right);
}
}
}
case YP_CASE_PRIMITIVE: {
yp_node_t *node = parse_expression(parser, YP_BINDING_POWER_MAX, message);
// Now that we have a primitive, we need to check if it's part of a range.
if (accept_any(parser, 2, YP_TOKEN_DOT_DOT, YP_TOKEN_DOT_DOT_DOT)) {
yp_token_t operator = parser->previous;
// Now that we have the operator, we need to check if this is followed
// by another expression. If it is, then we will create a full range
// node. Otherwise, we'll create an endless range.
switch (parser->current.type) {
case YP_CASE_PRIMITIVE: {
yp_node_t *right = parse_expression(parser, YP_BINDING_POWER_MAX, "Expected an expression after the range operator.");
return (yp_node_t *) yp_range_node_create(parser, node, &operator, right);
}
default:
return (yp_node_t *) yp_range_node_create(parser, node, &operator, NULL);
}
}
return node;
}
case YP_TOKEN_CARET: {
parser_lex(parser);
yp_token_t operator = parser->previous;
// At this point we have a pin operator. We need to check the subsequent
// expression to determine if it's a variable or an expression.
switch (parser->current.type) {
case YP_TOKEN_IDENTIFIER: {
parser_lex(parser);
yp_node_t *variable = (yp_node_t *) yp_local_variable_read_node_create(parser, &parser->previous, 0);
return (yp_node_t *) yp_pinned_variable_node_create(parser, &operator, variable);
}
case YP_TOKEN_INSTANCE_VARIABLE: {
parser_lex(parser);
yp_node_t *variable = (yp_node_t *) yp_instance_variable_read_node_create(parser, &parser->previous);
return (yp_node_t *) yp_pinned_variable_node_create(parser, &operator, variable);
}
case YP_TOKEN_CLASS_VARIABLE: {
parser_lex(parser);
yp_node_t *variable = (yp_node_t *) yp_class_variable_read_node_create(parser, &parser->previous);
return (yp_node_t *) yp_pinned_variable_node_create(parser, &operator, variable);
}
case YP_TOKEN_GLOBAL_VARIABLE: {
parser_lex(parser);
yp_node_t *variable = (yp_node_t *) yp_global_variable_read_node_create(parser, &parser->previous);
return (yp_node_t *) yp_pinned_variable_node_create(parser, &operator, variable);
}
case YP_TOKEN_NUMBERED_REFERENCE: {
parser_lex(parser);
yp_node_t *variable = (yp_node_t *) yp_numbered_reference_read_node_create(parser, &parser->previous);
return (yp_node_t *) yp_pinned_variable_node_create(parser, &operator, variable);
}
case YP_TOKEN_BACK_REFERENCE: {
parser_lex(parser);
yp_node_t *variable = (yp_node_t *) yp_back_reference_read_node_create(parser, &parser->previous);
return (yp_node_t *) yp_pinned_variable_node_create(parser, &operator, variable);
}
case YP_TOKEN_PARENTHESIS_LEFT: {
bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
parser->pattern_matching_newlines = false;
yp_token_t lparen = parser->current;
parser_lex(parser);
yp_node_t *expression = parse_expression(parser, YP_BINDING_POWER_STATEMENT, "Expected an expression after the pin operator.");
parser->pattern_matching_newlines = previous_pattern_matching_newlines;
accept(parser, YP_TOKEN_NEWLINE);
expect(parser, YP_TOKEN_PARENTHESIS_RIGHT, "Expected a closing parenthesis after the expression.");
return (yp_node_t *) yp_pinned_expression_node_create(parser, expression, &operator, &lparen, &parser->previous);
}
default: {
// If we get here, then we have a pin operator followed by something
// not understood. We'll create a missing node and return that.
yp_diagnostic_list_append(&parser->error_list, operator.start, operator.end, "Expected a variable after the pin operator.");
yp_node_t *variable = (yp_node_t *) yp_missing_node_create(parser, operator.start, operator.end);
return (yp_node_t *) yp_pinned_variable_node_create(parser, &operator, variable);
}
}
}
case YP_TOKEN_UCOLON_COLON: {
yp_token_t delimiter = parser->current;
parser_lex(parser);
expect(parser, YP_TOKEN_CONSTANT, "Expected a constant after the :: operator.");
yp_node_t *child = (yp_node_t *) yp_constant_read_node_create(parser, &parser->previous);
yp_constant_path_node_t *node = yp_constant_path_node_create(parser, NULL, &delimiter, child);
return parse_pattern_constant_path(parser, (yp_node_t *)node);
}
case YP_TOKEN_CONSTANT: {
yp_token_t constant = parser->current;
parser_lex(parser);
yp_node_t *node = (yp_node_t *) yp_constant_read_node_create(parser, &constant);
return parse_pattern_constant_path(parser, node);
}
default:
yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, message);
return (yp_node_t *) yp_missing_node_create(parser, parser->current.start, parser->current.end);
}
}
// Parse any number of primitives joined by alternation and ended optionally by
// assignment.
static yp_node_t *
parse_pattern_primitives(yp_parser_t *parser, const char *message) {
yp_node_t *node = NULL;
do {
yp_token_t operator = parser->previous;
switch (parser->current.type) {
case YP_TOKEN_IDENTIFIER:
case YP_TOKEN_BRACKET_LEFT_ARRAY:
case YP_TOKEN_BRACE_LEFT:
case YP_TOKEN_CARET:
case YP_TOKEN_CONSTANT:
case YP_TOKEN_UCOLON_COLON:
case YP_TOKEN_UDOT_DOT:
case YP_TOKEN_UDOT_DOT_DOT:
case YP_CASE_PRIMITIVE: {
if (node == NULL) {
node = parse_pattern_primitive(parser, message);
} else {
yp_node_t *right = parse_pattern_primitive(parser, "Expected to be able to parse a pattern after `|'.");
node = (yp_node_t *) yp_alternation_pattern_node_create(parser, node, right, &operator);
}
break;
}
case YP_TOKEN_PARENTHESIS_LEFT: {
parser_lex(parser);
if (node != NULL) {
yp_node_destroy(parser, node);
}
node = parse_pattern(parser, false, "Expected a pattern after the opening parenthesis.");
expect(parser, YP_TOKEN_PARENTHESIS_RIGHT, "Expected a closing parenthesis after the pattern.");
break;
}
default: {
yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, message);
yp_node_t *right = (yp_node_t *) yp_missing_node_create(parser, parser->current.start, parser->current.end);
if (node == NULL) {
node = right;
} else {
node = (yp_node_t *) yp_alternation_pattern_node_create(parser, node, right, &operator);
}
break;
}
}
} while (accept(parser, YP_TOKEN_PIPE));
// If we have an =>, then we are assigning this pattern to a variable.
// In this case we should create an assignment node.
while (accept(parser, YP_TOKEN_EQUAL_GREATER)) {
yp_token_t operator = parser->previous;
expect(parser, YP_TOKEN_IDENTIFIER, "Expected an identifier after the `=>' operator.");
yp_token_t identifier = parser->previous;
yp_parser_local_add_token(parser, &identifier);
yp_node_t *target = (yp_node_t *) yp_local_variable_target_node_create(parser, &identifier);
node = (yp_node_t *) yp_capture_pattern_node_create(parser, node, target, &operator);
}
return node;
}
// Parse a pattern matching expression.
static yp_node_t *
parse_pattern(yp_parser_t *parser, bool top_pattern, const char *message) {
yp_node_t *node = NULL;
bool leading_rest = false;
bool trailing_rest = false;
switch (parser->current.type) {
case YP_TOKEN_LABEL: {
parser_lex(parser);
yp_node_t *key = (yp_node_t *) yp_symbol_node_label_create(parser, &parser->previous);
yp_token_t operator = not_provided(parser);
return (yp_node_t *) parse_pattern_hash(parser, (yp_node_t *) yp_assoc_node_create(parser, key, &operator, NULL));
}
case YP_TOKEN_USTAR_STAR: {
node = parse_pattern_keyword_rest(parser);
return (yp_node_t *) parse_pattern_hash(parser, node);
}
case YP_TOKEN_USTAR: {
if (top_pattern) {
parser_lex(parser);
node = (yp_node_t *) parse_pattern_rest(parser);
leading_rest = true;
break;
}
}
/* fallthrough */
default:
node = parse_pattern_primitives(parser, message);
break;
}
// If we got a dynamic label symbol, then we need to treat it like the
// beginning of a hash pattern.
if (yp_symbol_node_label_p(node)) {
yp_token_t operator = not_provided(parser);
return (yp_node_t *) parse_pattern_hash(parser, (yp_node_t *) yp_assoc_node_create(parser, node, &operator, NULL));
}
if (top_pattern && match_type_p(parser, YP_TOKEN_COMMA)) {
// If we have a comma, then we are now parsing either an array pattern or a
// find pattern. We need to parse all of the patterns, put them into a big
// list, and then determine which type of node we have.
yp_node_list_t nodes = YP_EMPTY_NODE_LIST;
yp_node_list_append(&nodes, node);
// Gather up all of the patterns into the list.
while (accept(parser, YP_TOKEN_COMMA)) {
// Break early here in case we have a trailing comma.
if (match_any_type_p(parser, 5, YP_TOKEN_KEYWORD_THEN, YP_TOKEN_BRACE_RIGHT, YP_TOKEN_BRACKET_RIGHT, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON)) {
break;
}
if (accept(parser, YP_TOKEN_USTAR)) {
node = (yp_node_t *) parse_pattern_rest(parser);
// If we have already parsed a splat pattern, then this is an error. We
// will continue to parse the rest of the patterns, but we will indicate
// it as an error.
if (trailing_rest) {
yp_diagnostic_list_append(&parser->error_list, parser->previous.start, parser->previous.end, "Unexpected rest pattern.");
}
trailing_rest = true;
} else {
node = parse_pattern_primitives(parser, "Expected a pattern after the comma.");
}
yp_node_list_append(&nodes, node);
}
// If the first pattern and the last pattern are rest patterns, then we will
// call this a find pattern, regardless of how many rest patterns are in
// between because we know we already added the appropriate errors.
// Otherwise we will create an array pattern.
if (nodes.nodes[0]->type == YP_NODE_SPLAT_NODE && nodes.nodes[nodes.size - 1]->type == YP_NODE_SPLAT_NODE) {
node = (yp_node_t *) yp_find_pattern_node_create(parser, &nodes);
} else {
node = (yp_node_t *) yp_array_pattern_node_node_list_create(parser, &nodes);
}
free(nodes.nodes);
} else if (leading_rest) {
// Otherwise, if we parsed a single splat pattern, then we know we have an
// array pattern, so we can go ahead and create that node.
node = (yp_node_t *) yp_array_pattern_node_rest_create(parser, node);
}
return node;
}
// Parse an expression that begins with the previous node that we just lexed.
static inline yp_node_t *
parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
switch (parser->current.type) {
case YP_TOKEN_BRACKET_LEFT_ARRAY: {
parser_lex(parser);
yp_array_node_t *array = yp_array_node_create(parser, &parser->previous);
yp_accepts_block_stack_push(parser, true);
bool parsed_bare_hash = false;
while (!match_any_type_p(parser, 2, YP_TOKEN_BRACKET_RIGHT, YP_TOKEN_EOF)) {
// Handle the case where we don't have a comma and we have a newline followed by a right bracket.
if (accept(parser, YP_TOKEN_NEWLINE) && match_type_p(parser, YP_TOKEN_BRACKET_RIGHT)) {
break;
}
if (yp_array_node_size(array) != 0) {
expect(parser, YP_TOKEN_COMMA, "Expected a separator for the elements in an array.");
}
// If we have a right bracket immediately following a comma, this is
// allowed since it's a trailing comma. In this case we can break out of
// the loop.
if (match_type_p(parser, YP_TOKEN_BRACKET_RIGHT)) break;
yp_node_t *element;
if (accept(parser, YP_TOKEN_USTAR)) {
yp_token_t operator = parser->previous;
yp_node_t *expression = parse_expression(parser, YP_BINDING_POWER_DEFINED, "Expected an expression after '*' in the array.");
element = (yp_node_t *) yp_splat_node_create(parser, &operator, expression);
} else if (match_any_type_p(parser, 2, YP_TOKEN_LABEL, YP_TOKEN_USTAR_STAR)) {
if (parsed_bare_hash) {
yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, "Unexpected bare hash.");
}
yp_keyword_hash_node_t *hash = yp_keyword_hash_node_create(parser);
element = (yp_node_t *)hash;
if (!match_any_type_p(parser, 8, YP_TOKEN_EOF, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON, YP_TOKEN_EOF, YP_TOKEN_BRACE_RIGHT, YP_TOKEN_BRACKET_RIGHT, YP_TOKEN_KEYWORD_DO, YP_TOKEN_PARENTHESIS_RIGHT)) {
parse_assocs(parser, (yp_node_t *) hash);
}
parsed_bare_hash = true;
} else {
element = parse_expression(parser, YP_BINDING_POWER_DEFINED, "Expected an element for the array.");
if (yp_symbol_node_label_p(element) || accept(parser, YP_TOKEN_EQUAL_GREATER)) {
if (parsed_bare_hash) {
yp_diagnostic_list_append(&parser->error_list, parser->previous.start, parser->previous.end, "Unexpected bare hash.");
}
yp_keyword_hash_node_t *hash = yp_keyword_hash_node_create(parser);
yp_token_t operator;
if (parser->previous.type == YP_TOKEN_EQUAL_GREATER) {
operator = parser->previous;
} else {
operator = not_provided(parser);
}
yp_node_t *value = parse_expression(parser, YP_BINDING_POWER_DEFINED, "Expected a value in the hash literal.");
yp_node_t *assoc = (yp_node_t *) yp_assoc_node_create(parser, element, &operator, value);
yp_keyword_hash_node_elements_append(hash, assoc);
element = (yp_node_t *)hash;
if (accept(parser, YP_TOKEN_COMMA) && !match_type_p(parser, YP_TOKEN_BRACKET_RIGHT)) {
parse_assocs(parser, (yp_node_t *) hash);
}
parsed_bare_hash = true;
}
}
yp_array_node_elements_append(array, element);
if (element->type == YP_NODE_MISSING_NODE) break;
}
accept(parser, YP_TOKEN_NEWLINE);
expect(parser, YP_TOKEN_BRACKET_RIGHT, "Expected a closing bracket for the array.");
yp_array_node_close_set(array, &parser->previous);
yp_accepts_block_stack_pop(parser);
return (yp_node_t *) array;
}
case YP_TOKEN_PARENTHESIS_LEFT:
case YP_TOKEN_PARENTHESIS_LEFT_PARENTHESES: {
parser_lex(parser);
yp_token_t opening = parser->previous;
while (accept_any(parser, 2, YP_TOKEN_SEMICOLON, YP_TOKEN_NEWLINE));
// If this is the end of the file or we match a right parenthesis, then
// we have an empty parentheses node, and we can immediately return.
if (match_any_type_p(parser, 2, YP_TOKEN_PARENTHESIS_RIGHT, YP_TOKEN_EOF)) {
expect(parser, YP_TOKEN_PARENTHESIS_RIGHT, "Expected a closing parenthesis.");
return (yp_node_t *) yp_parentheses_node_create(parser, &opening, NULL, &parser->previous);
}
// Otherwise, we're going to parse the first statement in the list of
// statements within the parentheses.
yp_accepts_block_stack_push(parser, true);
yp_node_t *statement = parse_expression(parser, YP_BINDING_POWER_STATEMENT, "Expected to be able to parse an expression.");
while (accept_any(parser, 2, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON));
// If we hit a right parenthesis, then we're done parsing the parentheses
// node, and we can check which kind of node we should return.
if (accept(parser, YP_TOKEN_PARENTHESIS_RIGHT)) {
yp_accepts_block_stack_pop(parser);
// If we have a single statement and are ending on a right parenthesis,
// then we need to check if this is possibly a multiple assignment node.
if (binding_power == YP_BINDING_POWER_STATEMENT && statement->type == YP_NODE_MULTI_WRITE_NODE) {
yp_multi_write_node_t *multi_statement = (yp_multi_write_node_t *) statement;
if (multi_statement->value == NULL) {
yp_location_t lparen_loc = { .start = opening.start, .end = opening.end };
yp_location_t rparen_loc = { .start = parser->previous.start, .end = parser->previous.end };
yp_multi_write_node_t *multi_write;
if (multi_statement->lparen_loc.start == NULL) {
multi_write = (yp_multi_write_node_t *) statement;
multi_write->lparen_loc = lparen_loc;
multi_write->rparen_loc = rparen_loc;
} else {
yp_token_t operator = not_provided(parser);
multi_write = yp_multi_write_node_create(parser, &operator, NULL, &lparen_loc, &rparen_loc);
yp_multi_write_node_targets_append(multi_write, statement);
}
return parse_targets(parser, (yp_node_t *) multi_write, YP_BINDING_POWER_INDEX);
}
}
// If we have a single statement and are ending on a right parenthesis
// and we didn't return a multiple assignment node, then we can return a
// regular parentheses node now.
yp_statements_node_t *statements = yp_statements_node_create(parser);
yp_statements_node_body_append(statements, statement);
return (yp_node_t *) yp_parentheses_node_create(parser, &opening, (yp_node_t *) statements, &parser->previous);
}
// If we have more than one statement in the set of parentheses, then we
// are going to parse all of them as a list of statements. We'll do that
// here.
context_push(parser, YP_CONTEXT_PARENS);
yp_statements_node_t *statements = yp_statements_node_create(parser);
yp_statements_node_body_append(statements, statement);
while (!match_type_p(parser, YP_TOKEN_PARENTHESIS_RIGHT)) {
// Ignore semicolon without statements before them
if (accept_any(parser, 2, YP_TOKEN_SEMICOLON, YP_TOKEN_NEWLINE)) continue;
yp_node_t *node = parse_expression(parser, YP_BINDING_POWER_STATEMENT, "Expected to be able to parse an expression.");
yp_statements_node_body_append(statements, node);
// If we're recovering from a syntax error, then we need to stop parsing the
// statements now.
if (parser->recovering) {
// If this is the level of context where the recovery has happened, then
// we can mark the parser as done recovering.
if (match_type_p(parser, YP_TOKEN_PARENTHESIS_RIGHT)) parser->recovering = false;
break;
}
if (!accept_any(parser, 2, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON)) break;
}
context_pop(parser);
yp_accepts_block_stack_pop(parser);
expect(parser, YP_TOKEN_PARENTHESIS_RIGHT, "Expected a closing parenthesis.");
return (yp_node_t *) yp_parentheses_node_create(parser, &opening, (yp_node_t *) statements, &parser->previous);
}
case YP_TOKEN_BRACE_LEFT: {
yp_accepts_block_stack_push(parser, true);
parser_lex(parser);
yp_hash_node_t *node = yp_hash_node_create(parser, &parser->previous);
if (!match_any_type_p(parser, 2, YP_TOKEN_BRACE_RIGHT, YP_TOKEN_EOF)) {
parse_assocs(parser, (yp_node_t *) node);
accept(parser, YP_TOKEN_NEWLINE);
}
yp_accepts_block_stack_pop(parser);
expect(parser, YP_TOKEN_BRACE_RIGHT, "Expected a closing delimiter for a hash literal.");
yp_hash_node_closing_loc_set(node, &parser->previous);
return (yp_node_t *) node;
}
case YP_TOKEN_CHARACTER_LITERAL: {
parser_lex(parser);
yp_token_t opening = parser->previous;
opening.type = YP_TOKEN_STRING_BEGIN;
opening.end = opening.start + 1;
yp_token_t content = parser->previous;
content.type = YP_TOKEN_STRING_CONTENT;
content.start = content.start + 1;
yp_token_t closing = not_provided(parser);
return (yp_node_t *) yp_string_node_create_and_unescape(parser, &opening, &content, &closing, YP_UNESCAPE_ALL);
}
case YP_TOKEN_CLASS_VARIABLE: {
parser_lex(parser);
yp_node_t *node = (yp_node_t *) yp_class_variable_read_node_create(parser, &parser->previous);
if (binding_power == YP_BINDING_POWER_STATEMENT && match_type_p(parser, YP_TOKEN_COMMA)) {
node = parse_targets(parser, node, YP_BINDING_POWER_INDEX);
}
return node;
}
case YP_TOKEN_CONSTANT: {
parser_lex(parser);
yp_token_t constant = parser->previous;
// If a constant is immediately followed by parentheses, then this is in
// fact a method call, not a constant read.
if (
match_type_p(parser, YP_TOKEN_PARENTHESIS_LEFT) ||
(binding_power <= YP_BINDING_POWER_ASSIGNMENT && (token_begins_expression_p(parser->current.type) || match_any_type_p(parser, 2, YP_TOKEN_USTAR, YP_TOKEN_USTAR_STAR))) ||
(yp_accepts_block_stack_p(parser) && match_any_type_p(parser, 2, YP_TOKEN_KEYWORD_DO, YP_TOKEN_BRACE_LEFT))
) {
yp_arguments_t arguments = YP_EMPTY_ARGUMENTS;
parse_arguments_list(parser, &arguments, true);
return (yp_node_t *) yp_call_node_fcall_create(parser, &constant, &arguments);
}
yp_node_t *node = (yp_node_t *) yp_constant_read_node_create(parser, &parser->previous);
if ((binding_power == YP_BINDING_POWER_STATEMENT) && match_type_p(parser, YP_TOKEN_COMMA)) {
// If we get here, then we have a comma immediately following a
// constant, so we're going to parse this as a multiple assignment.
node = parse_targets(parser, node, YP_BINDING_POWER_INDEX);
}
return node;
}
case YP_TOKEN_UCOLON_COLON: {
parser_lex(parser);
yp_token_t delimiter = parser->previous;
expect(parser, YP_TOKEN_CONSTANT, "Expected a constant after ::.");
yp_node_t *constant = (yp_node_t *) yp_constant_read_node_create(parser, &parser->previous);
yp_node_t *node = (yp_node_t *)yp_constant_path_node_create(parser, NULL, &delimiter, constant);
if ((binding_power == YP_BINDING_POWER_STATEMENT) && match_type_p(parser, YP_TOKEN_COMMA)) {
node = parse_targets(parser, node, YP_BINDING_POWER_INDEX);
}
return node;
}
case YP_TOKEN_UDOT_DOT:
case YP_TOKEN_UDOT_DOT_DOT: {
yp_token_t operator = parser->current;
parser_lex(parser);
yp_node_t *right = parse_expression(parser, binding_power, "Expected a value after the operator.");
return (yp_node_t *) yp_range_node_create(parser, NULL, &operator, right);
}
case YP_TOKEN_FLOAT:
parser_lex(parser);
return (yp_node_t *)yp_float_node_create(parser, &parser->previous);
case YP_TOKEN_NUMBERED_REFERENCE: {
parser_lex(parser);
yp_node_t *node = (yp_node_t *) yp_numbered_reference_read_node_create(parser, &parser->previous);
if (binding_power == YP_BINDING_POWER_STATEMENT && match_type_p(parser, YP_TOKEN_COMMA)) {
node = parse_targets(parser, node, YP_BINDING_POWER_INDEX);
}
return node;
}
case YP_TOKEN_GLOBAL_VARIABLE: {
parser_lex(parser);
yp_node_t *node = (yp_node_t *) yp_global_variable_read_node_create(parser, &parser->previous);
if (binding_power == YP_BINDING_POWER_STATEMENT && match_type_p(parser, YP_TOKEN_COMMA)) {
node = parse_targets(parser, node, YP_BINDING_POWER_INDEX);
}
return node;
}
case YP_TOKEN_BACK_REFERENCE: {
parser_lex(parser);
yp_node_t *node = (yp_node_t *) yp_back_reference_read_node_create(parser, &parser->previous);
if (binding_power == YP_BINDING_POWER_STATEMENT && match_type_p(parser, YP_TOKEN_COMMA)) {
node = parse_targets(parser, node, YP_BINDING_POWER_INDEX);
}
return node;
}
case YP_TOKEN_IDENTIFIER: {
parser_lex(parser);
yp_token_t identifier = parser->previous;
yp_node_t *node = parse_vcall(parser);
if (node->type == YP_NODE_CALL_NODE) {
// If parse_vcall returned with a call node, then we know the identifier
// is not in the local table. In that case we need to check if there are
// arguments following the identifier.
yp_call_node_t *call = (yp_call_node_t *) node;
yp_arguments_t arguments = YP_EMPTY_ARGUMENTS;
parse_arguments_list(parser, &arguments, true);
call->opening_loc = arguments.opening_loc;
call->arguments = arguments.arguments;
call->closing_loc = arguments.closing_loc;
call->block = arguments.block;
if (arguments.block != NULL) {
call->base.location.end = arguments.block->base.location.end;
} else if (arguments.closing_loc.start == NULL) {
if (arguments.arguments != NULL) {
call->base.location.end = arguments.arguments->base.location.end;
} else {
call->base.location.end = call->message_loc.end;
}
} else {
call->base.location.end = arguments.closing_loc.end;
}
} else {
// Otherwise, we know the identifier is in the local table. This can
// still be a method call if it is followed by arguments or a block, so
// we need to check for that here.
if (
(binding_power <= YP_BINDING_POWER_ASSIGNMENT && (token_begins_expression_p(parser->current.type) || match_any_type_p(parser, 2, YP_TOKEN_USTAR, YP_TOKEN_USTAR_STAR))) ||
(yp_accepts_block_stack_p(parser) && match_any_type_p(parser, 2, YP_TOKEN_KEYWORD_DO, YP_TOKEN_BRACE_LEFT))
) {
yp_arguments_t arguments = YP_EMPTY_ARGUMENTS;
parse_arguments_list(parser, &arguments, true);
yp_call_node_t *fcall = yp_call_node_fcall_create(parser, &identifier, &arguments);
yp_node_destroy(parser, node);
return (yp_node_t *) fcall;
}
}
if ((binding_power == YP_BINDING_POWER_STATEMENT) && match_type_p(parser, YP_TOKEN_COMMA)) {
node = parse_targets(parser, node, YP_BINDING_POWER_INDEX);
}
return node;
}
case YP_TOKEN_HEREDOC_START: {
assert(parser->lex_modes.current->mode == YP_LEX_HEREDOC);
yp_heredoc_quote_t quote = parser->lex_modes.current->as.heredoc.quote;
yp_heredoc_indent_t indent = parser->lex_modes.current->as.heredoc.indent;
yp_node_t *node;
if (quote == YP_HEREDOC_QUOTE_BACKTICK) {
node = (yp_node_t *) yp_interpolated_xstring_node_create(parser, &parser->current, &parser->current);
} else {
node = (yp_node_t *) yp_interpolated_string_node_create(parser, &parser->current, NULL, &parser->current);
}
parser_lex(parser);
yp_node_t *part;
while (!match_any_type_p(parser, 2, YP_TOKEN_HEREDOC_END, YP_TOKEN_EOF)) {
if ((part = parse_string_part(parser)) == NULL) continue;
if (quote == YP_HEREDOC_QUOTE_BACKTICK) {
yp_interpolated_xstring_node_append((yp_interpolated_x_string_node_t *) node, part);
} else {
yp_interpolated_string_node_append((yp_interpolated_string_node_t *) node, part);
}
}
lex_state_set(parser, YP_LEX_STATE_END);
expect(parser, YP_TOKEN_HEREDOC_END, "Expected a closing delimiter for heredoc.");
if (quote == YP_HEREDOC_QUOTE_BACKTICK) {
assert(node->type == YP_NODE_INTERPOLATED_X_STRING_NODE);
yp_interpolated_xstring_node_closing_set(((yp_interpolated_x_string_node_t *) node), &parser->previous);
} else {
assert(node->type == YP_NODE_INTERPOLATED_STRING_NODE);
yp_interpolated_string_node_closing_set((yp_interpolated_string_node_t *) node, &parser->previous);
}
// If this is a heredoc that is indented with a ~, then we need to dedent
// each line by the common leading whitespace.
if (indent == YP_HEREDOC_INDENT_TILDE) {
parse_heredoc_dedent(parser, node, quote);
}
// If there's a string immediately following this heredoc, then it's a
// concatenatation. In this case we'll parse the next string and create a
// node in the tree that concatenates the two strings.
if (parser->current.type == YP_TOKEN_STRING_BEGIN) {
return (yp_node_t *) yp_string_concat_node_create(
parser,
node,
parse_expression(parser, YP_BINDING_POWER_CALL, "Expected string on the right side of concatenation.")
);
} else {
return node;
}
}
case YP_TOKEN_IMAGINARY_NUMBER:
parser_lex(parser);
return (yp_node_t *) yp_imaginary_node_create(parser, &parser->previous);
case YP_TOKEN_INSTANCE_VARIABLE: {
parser_lex(parser);
yp_node_t *node = (yp_node_t *) yp_instance_variable_read_node_create(parser, &parser->previous);
if (binding_power == YP_BINDING_POWER_STATEMENT && match_type_p(parser, YP_TOKEN_COMMA)) {
node = parse_targets(parser, node, YP_BINDING_POWER_INDEX);
}
return node;
}
case YP_TOKEN_INTEGER:
parser_lex(parser);
return (yp_node_t *) yp_integer_node_create(parser, &parser->previous);
case YP_TOKEN_KEYWORD___ENCODING__:
parser_lex(parser);
return (yp_node_t *) yp_source_encoding_node_create(parser, &parser->previous);
case YP_TOKEN_KEYWORD___FILE__:
parser_lex(parser);
return (yp_node_t *) yp_source_file_node_create(parser, &parser->previous);
case YP_TOKEN_KEYWORD___LINE__:
parser_lex(parser);
return (yp_node_t *) yp_source_line_node_create(parser, &parser->previous);
case YP_TOKEN_KEYWORD_ALIAS: {
parser_lex(parser);
yp_token_t keyword = parser->previous;
yp_node_t *new_name = parse_alias_argument(parser, true);
yp_node_t *old_name = parse_alias_argument(parser, false);
switch (new_name->type) {
case YP_NODE_SYMBOL_NODE:
case YP_NODE_INTERPOLATED_SYMBOL_NODE: {
if (old_name->type != YP_NODE_SYMBOL_NODE && old_name->type != YP_NODE_INTERPOLATED_SYMBOL_NODE) {
yp_diagnostic_list_append(&parser->error_list, old_name->location.start, old_name->location.end, "Expected a bare word or symbol argument.");
}
break;
}
case YP_NODE_BACK_REFERENCE_READ_NODE:
case YP_NODE_NUMBERED_REFERENCE_READ_NODE:
case YP_NODE_GLOBAL_VARIABLE_READ_NODE: {
if (old_name->type == YP_NODE_BACK_REFERENCE_READ_NODE || old_name->type == YP_NODE_NUMBERED_REFERENCE_READ_NODE || old_name->type == YP_NODE_GLOBAL_VARIABLE_READ_NODE) {
if (old_name->type == YP_NODE_NUMBERED_REFERENCE_READ_NODE) {
yp_diagnostic_list_append(&parser->error_list, old_name->location.start, old_name->location.end, "Can't make alias for number variables.");
}
} else {
yp_diagnostic_list_append(&parser->error_list, old_name->location.start, old_name->location.end, "Expected a global variable.");
}
break;
}
default:
break;
}
return (yp_node_t *) yp_alias_node_create(parser, &keyword, new_name, old_name);
}
case YP_TOKEN_KEYWORD_CASE: {
parser_lex(parser);
yp_token_t case_keyword = parser->previous;
yp_node_t *predicate = NULL;
if (
accept_any(parser, 2, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON) ||
match_any_type_p(parser, 3, YP_TOKEN_KEYWORD_WHEN, YP_TOKEN_KEYWORD_IN, YP_TOKEN_KEYWORD_END) ||
!token_begins_expression_p(parser->current.type)
) {
predicate = NULL;
} else {
predicate = parse_expression(parser, YP_BINDING_POWER_COMPOSITION, "Expected a value after case keyword.");
while (accept_any(parser, 2, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON));
}
if (accept(parser, YP_TOKEN_KEYWORD_END)) {
return (yp_node_t *) yp_case_node_create(parser, &case_keyword, predicate, NULL, &parser->previous);
}
// At this point we can create a case node, though we don't yet know if it
// is a case-in or case-when node.
yp_token_t end_keyword = not_provided(parser);
yp_case_node_t *case_node = yp_case_node_create(parser, &case_keyword, predicate, NULL, &end_keyword);
if (match_type_p(parser, YP_TOKEN_KEYWORD_WHEN)) {
// At this point we've seen a when keyword, so we know this is a
// case-when node. We will continue to parse the when nodes until we hit
// the end of the list.
while (accept(parser, YP_TOKEN_KEYWORD_WHEN)) {
yp_token_t when_keyword = parser->previous;
yp_when_node_t *when_node = yp_when_node_create(parser, &when_keyword);
do {
if (accept(parser, YP_TOKEN_USTAR)) {
yp_token_t operator = parser->previous;
yp_node_t *expression = parse_expression(parser, YP_BINDING_POWER_DEFINED, "Expected a value after `*' operator.");
yp_splat_node_t *splat_node = yp_splat_node_create(parser, &operator, expression);
yp_when_node_conditions_append(when_node, (yp_node_t *) splat_node);
if (expression->type == YP_NODE_MISSING_NODE) break;
} else {
yp_node_t *condition = parse_expression(parser, YP_BINDING_POWER_DEFINED, "Expected a value after when keyword.");
yp_when_node_conditions_append(when_node, condition);
if (condition->type == YP_NODE_MISSING_NODE) break;
}
} while (accept(parser, YP_TOKEN_COMMA));
if (accept_any(parser, 2, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON)) {
accept(parser, YP_TOKEN_KEYWORD_THEN);
} else {
expect(parser, YP_TOKEN_KEYWORD_THEN, "Expected a delimiter after the predicates of a `when' clause.");
}
if (!match_any_type_p(parser, 3, YP_TOKEN_KEYWORD_WHEN, YP_TOKEN_KEYWORD_ELSE, YP_TOKEN_KEYWORD_END)) {
yp_statements_node_t *statements = parse_statements(parser, YP_CONTEXT_CASE_WHEN);
if (statements != NULL) {
yp_when_node_statements_set(when_node, statements);
}
}
yp_case_node_condition_append(case_node, (yp_node_t *) when_node);
}
} else {
// At this point we expect that we're parsing a case-in node. We will
// continue to parse the in nodes until we hit the end of the list.
while (match_type_p(parser, YP_TOKEN_KEYWORD_IN)) {
bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
parser->pattern_matching_newlines = true;
lex_state_set(parser, YP_LEX_STATE_BEG | YP_LEX_STATE_LABEL);
parser->command_start = false;
parser_lex(parser);
yp_token_t in_keyword = parser->previous;
yp_node_t *pattern = parse_pattern(parser, true, "Expected a pattern after `in' keyword.");
parser->pattern_matching_newlines = previous_pattern_matching_newlines;
// Since we're in the top-level of the case-in node we need to check
// for guard clauses in the form of `if` or `unless` statements.
if (accept(parser, YP_TOKEN_KEYWORD_IF_MODIFIER)) {
yp_token_t keyword = parser->previous;
yp_node_t *predicate = parse_expression(parser, YP_BINDING_POWER_DEFINED, "Expected a value after guard keyword.");
pattern = (yp_node_t *) yp_if_node_modifier_create(parser, pattern, &keyword, predicate);
} else if (accept(parser, YP_TOKEN_KEYWORD_UNLESS_MODIFIER)) {
yp_token_t keyword = parser->previous;
yp_node_t *predicate = parse_expression(parser, YP_BINDING_POWER_DEFINED, "Expected a value after guard keyword.");
pattern = (yp_node_t *) yp_unless_node_modifier_create(parser, pattern, &keyword, predicate);
}
// Now we need to check for the terminator of the in node's pattern.
// It can be a newline or semicolon optionally followed by a `then`
// keyword.
yp_token_t then_keyword;
if (accept_any(parser, 2, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON)) {
if (accept(parser, YP_TOKEN_KEYWORD_THEN)) {
then_keyword = parser->previous;
} else {
then_keyword = not_provided(parser);
}
} else {
expect(parser, YP_TOKEN_KEYWORD_THEN, "Expected a delimiter after the predicates of an `in' clause.");
then_keyword = parser->previous;
}
// Now we can actually parse the statements associated with the in
// node.
yp_statements_node_t *statements;
if (match_any_type_p(parser, 3, YP_TOKEN_KEYWORD_IN, YP_TOKEN_KEYWORD_ELSE, YP_TOKEN_KEYWORD_END)) {
statements = NULL;
} else {
statements = parse_statements(parser, YP_CONTEXT_CASE_IN);
}
// Now that we have the full pattern and statements, we can create the
// node and attach it to the case node.
yp_node_t *condition = (yp_node_t *) yp_in_node_create(parser, pattern, statements, &in_keyword, &then_keyword);
yp_case_node_condition_append(case_node, condition);
}
}
accept_any(parser, 2, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON);
if (accept(parser, YP_TOKEN_KEYWORD_ELSE)) {
if (case_node->conditions.size < 1) {
yp_diagnostic_list_append(&parser->error_list, parser->previous.start, parser->previous.end, "Unexpected else without no when clauses in case statement.");
}
yp_token_t else_keyword = parser->previous;
yp_else_node_t *else_node;
if (!match_type_p(parser, YP_TOKEN_KEYWORD_END)) {
else_node = yp_else_node_create(parser, &else_keyword, parse_statements(parser, YP_CONTEXT_ELSE), &parser->current);
} else {
else_node = yp_else_node_create(parser, &else_keyword, NULL, &parser->current);
}
yp_case_node_consequent_set(case_node, else_node);
}
expect(parser, YP_TOKEN_KEYWORD_END, "Expected case statement to end with an end keyword.");
yp_case_node_end_keyword_loc_set(case_node, &parser->previous);
return (yp_node_t *) case_node;
}
case YP_TOKEN_KEYWORD_BEGIN: {
parser_lex(parser);
yp_token_t begin_keyword = parser->previous;
accept_any(parser, 2, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON);
yp_statements_node_t *begin_statements = NULL;
if (!match_any_type_p(parser, 3, YP_TOKEN_KEYWORD_RESCUE, YP_TOKEN_KEYWORD_ENSURE, YP_TOKEN_KEYWORD_END)) {
yp_accepts_block_stack_push(parser, true);
begin_statements = parse_statements(parser, YP_CONTEXT_BEGIN);
yp_accepts_block_stack_pop(parser);
accept_any(parser, 2, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON);
}
yp_begin_node_t *begin_node = yp_begin_node_create(parser, &begin_keyword, begin_statements);
parse_rescues(parser, begin_node);
expect(parser, YP_TOKEN_KEYWORD_END, "Expected `end` to close `begin` statement.");
begin_node->base.location.end = parser->previous.end;
yp_begin_node_end_keyword_set(begin_node, &parser->previous);
if ((begin_node->else_clause != NULL) && (begin_node->rescue_clause == NULL)) {
yp_diagnostic_list_append(
&parser->error_list,
begin_node->else_clause->base.location.start,
begin_node->else_clause->base.location.end,
"else without rescue is useless"
);
}
return (yp_node_t *) begin_node;
}
case YP_TOKEN_KEYWORD_BEGIN_UPCASE: {
parser_lex(parser);
yp_token_t keyword = parser->previous;
expect(parser, YP_TOKEN_BRACE_LEFT, "Expected '{' after 'BEGIN'.");
yp_token_t opening = parser->previous;
yp_statements_node_t *statements = parse_statements(parser, YP_CONTEXT_PREEXE);
expect(parser, YP_TOKEN_BRACE_RIGHT, "Expected '}' after 'BEGIN' statements.");
return (yp_node_t *) yp_pre_execution_node_create(parser, &keyword, &opening, statements, &parser->previous);
}
case YP_TOKEN_KEYWORD_BREAK:
case YP_TOKEN_KEYWORD_NEXT:
case YP_TOKEN_KEYWORD_RETURN: {
parser_lex(parser);
yp_token_t keyword = parser->previous;
yp_arguments_node_t *arguments = NULL;
if (
token_begins_expression_p(parser->current.type) ||
match_any_type_p(parser, 2, YP_TOKEN_USTAR, YP_TOKEN_USTAR_STAR)
) {
yp_binding_power_t binding_power = yp_binding_powers[parser->current.type].left;
if (binding_power == YP_BINDING_POWER_UNSET || binding_power >= YP_BINDING_POWER_RANGE) {
arguments = yp_arguments_node_create(parser);
parse_arguments(parser, arguments, false, YP_TOKEN_EOF);
}
}
switch (keyword.type) {
case YP_TOKEN_KEYWORD_BREAK:
return (yp_node_t *) yp_break_node_create(parser, &keyword, arguments);
case YP_TOKEN_KEYWORD_NEXT:
return (yp_node_t *) yp_next_node_create(parser, &keyword, arguments);
case YP_TOKEN_KEYWORD_RETURN: {
if (
(parser->current_context->context == YP_CONTEXT_CLASS) ||
(parser->current_context->context == YP_CONTEXT_MODULE)
) {
yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, "Invalid return in class/module body");
}
return (yp_node_t *) yp_return_node_create(parser, &keyword, arguments);
}
default:
assert(false && "unreachable");
return (yp_node_t *) yp_missing_node_create(parser, parser->previous.start, parser->previous.end);
}
}
case YP_TOKEN_KEYWORD_SUPER: {
parser_lex(parser);
yp_token_t keyword = parser->previous;
yp_arguments_t arguments = YP_EMPTY_ARGUMENTS;
parse_arguments_list(parser, &arguments, true);
if (arguments.opening_loc.start == NULL && arguments.arguments == NULL) {
return (yp_node_t *) yp_forwarding_super_node_create(parser, &keyword, &arguments);
}
return (yp_node_t *) yp_super_node_create(parser, &keyword, &arguments);
}
case YP_TOKEN_KEYWORD_YIELD: {
parser_lex(parser);
yp_token_t keyword = parser->previous;
yp_arguments_t arguments = YP_EMPTY_ARGUMENTS;
parse_arguments_list(parser, &arguments, false);
return (yp_node_t *) yp_yield_node_create(parser, &keyword, &arguments.opening_loc, arguments.arguments, &arguments.closing_loc);
}
case YP_TOKEN_KEYWORD_CLASS: {
parser_lex(parser);
yp_token_t class_keyword = parser->previous;
yp_do_loop_stack_push(parser, false);
if (accept(parser, YP_TOKEN_LESS_LESS)) {
yp_token_t operator = parser->previous;
yp_node_t *expression = parse_expression(parser, YP_BINDING_POWER_NOT, "Expected to find an expression after `<<`.");
yp_parser_scope_push(parser, true);
accept_any(parser, 2, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON);
yp_node_t *statements = NULL;
if (!match_any_type_p(parser, 3, YP_TOKEN_KEYWORD_RESCUE, YP_TOKEN_KEYWORD_ENSURE, YP_TOKEN_KEYWORD_END)) {
statements = (yp_node_t *) parse_statements(parser, YP_CONTEXT_SCLASS);
}
if (match_any_type_p(parser, 2, YP_TOKEN_KEYWORD_RESCUE, YP_TOKEN_KEYWORD_ENSURE)) {
assert(statements == NULL || statements->type == YP_NODE_STATEMENTS_NODE);
statements = (yp_node_t *) parse_rescues_as_begin(parser, (yp_statements_node_t *) statements);
}
expect(parser, YP_TOKEN_KEYWORD_END, "Expected `end` to close `class` statement.");
yp_constant_id_list_t locals = parser->current_scope->locals;
yp_parser_scope_pop(parser);
yp_do_loop_stack_pop(parser);
return (yp_node_t *) yp_singleton_class_node_create(parser, &locals, &class_keyword, &operator, expression, statements, &parser->previous);
}
yp_node_t *name = parse_expression(parser, YP_BINDING_POWER_INDEX, "Expected to find a class name after `class`.");
yp_token_t inheritance_operator;
yp_node_t *superclass;
if (match_type_p(parser, YP_TOKEN_LESS)) {
inheritance_operator = parser->current;
lex_state_set(parser, YP_LEX_STATE_BEG);
parser->command_start = true;
parser_lex(parser);
superclass = parse_expression(parser, YP_BINDING_POWER_COMPOSITION, "Expected to find a superclass after `<`.");
} else {
inheritance_operator = not_provided(parser);
superclass = NULL;
}
yp_parser_scope_push(parser, true);
accept_any(parser, 2, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON);
yp_node_t *statements = NULL;
if (!match_any_type_p(parser, 3, YP_TOKEN_KEYWORD_RESCUE, YP_TOKEN_KEYWORD_ENSURE, YP_TOKEN_KEYWORD_END)) {
yp_accepts_block_stack_push(parser, true);
statements = (yp_node_t *) parse_statements(parser, YP_CONTEXT_CLASS);
yp_accepts_block_stack_pop(parser);
}
if (match_any_type_p(parser, 2, YP_TOKEN_KEYWORD_RESCUE, YP_TOKEN_KEYWORD_ENSURE)) {
assert(statements == NULL || statements->type == YP_NODE_STATEMENTS_NODE);
statements = (yp_node_t *) parse_rescues_as_begin(parser, (yp_statements_node_t *) statements);
}
expect(parser, YP_TOKEN_KEYWORD_END, "Expected `end` to close `class` statement.");
if (context_def_p(parser)) {
yp_diagnostic_list_append(&parser->error_list, class_keyword.start, class_keyword.end, "Class definition in method body");
}
yp_constant_id_list_t locals = parser->current_scope->locals;
yp_parser_scope_pop(parser);
yp_do_loop_stack_pop(parser);
return (yp_node_t *) yp_class_node_create(parser, &locals, &class_keyword, name, &inheritance_operator, superclass, statements, &parser->previous);
}
case YP_TOKEN_KEYWORD_DEF: {
yp_token_t def_keyword = parser->current;
yp_node_t *receiver = NULL;
yp_token_t operator = not_provided(parser);
yp_token_t name = not_provided(parser);
context_push(parser, YP_CONTEXT_DEF_PARAMS);
parser_lex(parser);
switch (parser->current.type) {
case YP_CASE_OPERATOR:
yp_parser_scope_push(parser, true);
lex_state_set(parser, YP_LEX_STATE_ENDFN);
parser_lex(parser);
name = parser->previous;
break;
case YP_TOKEN_IDENTIFIER: {
yp_parser_scope_push(parser, true);
parser_lex(parser);
if (match_any_type_p(parser, 2, YP_TOKEN_DOT, YP_TOKEN_COLON_COLON)) {
receiver = parse_vcall(parser);
lex_state_set(parser, YP_LEX_STATE_FNAME);
parser_lex(parser);
operator = parser->previous;
name = parse_method_definition_name(parser);
if (name.type == YP_TOKEN_MISSING) {
yp_diagnostic_list_append(&parser->error_list, parser->previous.start, parser->previous.end, "Expected a method name after receiver.");
}
} else {
name = parser->previous;
}
break;
}
case YP_TOKEN_CONSTANT:
case YP_TOKEN_INSTANCE_VARIABLE:
case YP_TOKEN_CLASS_VARIABLE:
case YP_TOKEN_GLOBAL_VARIABLE:
case YP_TOKEN_KEYWORD_NIL:
case YP_TOKEN_KEYWORD_SELF:
case YP_TOKEN_KEYWORD_TRUE:
case YP_TOKEN_KEYWORD_FALSE:
case YP_TOKEN_KEYWORD___FILE__:
case YP_TOKEN_KEYWORD___LINE__:
case YP_TOKEN_KEYWORD___ENCODING__: {
yp_parser_scope_push(parser, true);
parser_lex(parser);
yp_token_t identifier = parser->previous;
if (match_any_type_p(parser, 2, YP_TOKEN_DOT, YP_TOKEN_COLON_COLON)) {
lex_state_set(parser, YP_LEX_STATE_FNAME);
parser_lex(parser);
operator = parser->previous;
switch (identifier.type) {
case YP_TOKEN_CONSTANT:
receiver = (yp_node_t *) yp_constant_read_node_create(parser, &identifier);
break;
case YP_TOKEN_INSTANCE_VARIABLE:
receiver = (yp_node_t *) yp_instance_variable_read_node_create(parser, &identifier);
break;
case YP_TOKEN_CLASS_VARIABLE:
receiver = (yp_node_t *) yp_class_variable_read_node_create(parser, &identifier);
break;
case YP_TOKEN_GLOBAL_VARIABLE:
receiver = (yp_node_t *) yp_global_variable_read_node_create(parser, &identifier);
break;
case YP_TOKEN_KEYWORD_NIL:
receiver = (yp_node_t *) yp_nil_node_create(parser, &identifier);
break;
case YP_TOKEN_KEYWORD_SELF:
receiver = (yp_node_t *) yp_self_node_create(parser, &identifier);
break;
case YP_TOKEN_KEYWORD_TRUE:
receiver = (yp_node_t *) yp_true_node_create(parser, &identifier);
break;
case YP_TOKEN_KEYWORD_FALSE:
receiver = (yp_node_t *)yp_false_node_create(parser, &identifier);
break;
case YP_TOKEN_KEYWORD___FILE__:
receiver = (yp_node_t *) yp_source_file_node_create(parser, &identifier);
break;
case YP_TOKEN_KEYWORD___LINE__:
receiver = (yp_node_t *) yp_source_line_node_create(parser, &identifier);
break;
case YP_TOKEN_KEYWORD___ENCODING__:
receiver = (yp_node_t *) yp_source_encoding_node_create(parser, &identifier);
break;
default:
break;
}
name = parse_method_definition_name(parser);
if (name.type == YP_TOKEN_MISSING) {
yp_diagnostic_list_append(&parser->error_list, parser->previous.start, parser->previous.end, "Expected a method name after receiver.");
}
} else {
name = identifier;
}
break;
}
case YP_TOKEN_PARENTHESIS_LEFT: {
parser_lex(parser);
yp_token_t lparen = parser->previous;
yp_node_t *expression = parse_expression(parser, YP_BINDING_POWER_STATEMENT, "Expected to be able to parse receiver.");
expect(parser, YP_TOKEN_PARENTHESIS_RIGHT, "Expected closing ')' for receiver.");
yp_token_t rparen = parser->previous;
lex_state_set(parser, YP_LEX_STATE_FNAME);
expect_any(parser, "Expected '.' or '::' after receiver", 2, YP_TOKEN_DOT, YP_TOKEN_COLON_COLON);
operator = parser->previous;
receiver = (yp_node_t *) yp_parentheses_node_create(parser, &lparen, expression, &rparen);
yp_parser_scope_push(parser, true);
name = parse_method_definition_name(parser);
break;
}
default:
yp_parser_scope_push(parser, true);
name = parse_method_definition_name(parser);
if (name.type == YP_TOKEN_MISSING) {
yp_diagnostic_list_append(&parser->error_list, parser->previous.start, parser->previous.end, "Expected a method name after receiver.");
}
break;
}
yp_token_t lparen;
yp_token_t rparen;
yp_parameters_node_t *params;
switch (parser->current.type) {
case YP_TOKEN_PARENTHESIS_LEFT: {
parser_lex(parser);
lparen = parser->previous;
if (match_type_p(parser, YP_TOKEN_PARENTHESIS_RIGHT)) {
params = NULL;
} else {
params = parse_parameters(parser, YP_BINDING_POWER_DEFINED, true, false, true);
}
lex_state_set(parser, YP_LEX_STATE_BEG);
parser->command_start = true;
expect(parser, YP_TOKEN_PARENTHESIS_RIGHT, "Expected ')' after left parenthesis.");
rparen = parser->previous;
break;
}
case YP_CASE_PARAMETER: {
lparen = not_provided(parser);
rparen = not_provided(parser);
params = parse_parameters(parser, YP_BINDING_POWER_DEFINED, false, false, true);
break;
}
default: {
lparen = not_provided(parser);
rparen = not_provided(parser);
params = NULL;
break;
}
}
context_pop(parser);
yp_node_t *statements = NULL;
yp_token_t equal;
yp_token_t end_keyword;
if (accept(parser, YP_TOKEN_EQUAL)) {
if (token_is_setter_name(&name)) {
yp_diagnostic_list_append(&parser->error_list, name.start, name.end, "Setter method cannot be defined in an endless method definition");
}
equal = parser->previous;
context_push(parser, YP_CONTEXT_DEF);
statements = (yp_node_t *) yp_statements_node_create(parser);
yp_node_t *statement = parse_expression(parser, YP_BINDING_POWER_DEFINED + 1, "Expected to be able to parse body of endless method definition.");
if (accept(parser, YP_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
yp_token_t rescue_keyword = parser->previous;
yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after the rescue keyword.");
yp_rescue_modifier_node_t *rescue_node = yp_rescue_modifier_node_create(parser, statement, &rescue_keyword, value);
statement = (yp_node_t *)rescue_node;
}
yp_statements_node_body_append((yp_statements_node_t *) statements, statement);
context_pop(parser);
end_keyword = not_provided(parser);
} else {
equal = not_provided(parser);
if (lparen.type == YP_TOKEN_NOT_PROVIDED) {
lex_state_set(parser, YP_LEX_STATE_BEG);
parser->command_start = true;
expect_any(parser, "Expected a terminator after the parameters", 2, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON);
} else {
accept_any(parser, 2, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON);
}
yp_accepts_block_stack_push(parser, true);
yp_do_loop_stack_push(parser, false);
if (!match_any_type_p(parser, 3, YP_TOKEN_KEYWORD_RESCUE, YP_TOKEN_KEYWORD_ENSURE, YP_TOKEN_KEYWORD_END)) {
statements = (yp_node_t *) parse_statements(parser, YP_CONTEXT_DEF);
}
if (match_any_type_p(parser, 2, YP_TOKEN_KEYWORD_RESCUE, YP_TOKEN_KEYWORD_ENSURE)) {
assert(statements == NULL || statements->type == YP_NODE_STATEMENTS_NODE);
statements = (yp_node_t *) parse_rescues_as_begin(parser, (yp_statements_node_t *) statements);
}
yp_accepts_block_stack_pop(parser);
yp_do_loop_stack_pop(parser);
expect(parser, YP_TOKEN_KEYWORD_END, "Expected `end` to close `def` statement.");
end_keyword = parser->previous;
}
yp_constant_id_list_t locals = parser->current_scope->locals;
yp_parser_scope_pop(parser);
return (yp_node_t *) yp_def_node_create(
parser,
&name,
receiver,
params,
statements,
&locals,
&def_keyword,
&operator,
&lparen,
&rparen,
&equal,
&end_keyword
);
}
case YP_TOKEN_KEYWORD_DEFINED: {
parser_lex(parser);
yp_token_t keyword = parser->previous;
yp_token_t lparen;
yp_token_t rparen;
yp_node_t *expression;
if (accept(parser, YP_TOKEN_PARENTHESIS_LEFT)) {
lparen = parser->previous;
expression = parse_expression(parser, YP_BINDING_POWER_COMPOSITION, "Expected expression after `defined?`.");
if (parser->recovering) {
rparen = not_provided(parser);
} else {
expect(parser, YP_TOKEN_PARENTHESIS_RIGHT, "Expected ')' after 'defined?' expression.");
rparen = parser->previous;
}
} else {
lparen = not_provided(parser);
rparen = not_provided(parser);
expression = parse_expression(parser, YP_BINDING_POWER_DEFINED, "Expected expression after `defined?`.");
}
return (yp_node_t *) yp_defined_node_create(
parser,
&lparen,
expression,
&rparen,
&(yp_location_t) { .start = keyword.start, .end = keyword.end }
);
}
case YP_TOKEN_KEYWORD_END_UPCASE: {
parser_lex(parser);
yp_token_t keyword = parser->previous;
expect(parser, YP_TOKEN_BRACE_LEFT, "Expected '{' after 'END'.");
yp_token_t opening = parser->previous;
yp_statements_node_t *statements = parse_statements(parser, YP_CONTEXT_POSTEXE);
expect(parser, YP_TOKEN_BRACE_RIGHT, "Expected '}' after 'END' statements.");
return (yp_node_t *) yp_post_execution_node_create(parser, &keyword, &opening, statements, &parser->previous);
}
case YP_TOKEN_KEYWORD_FALSE:
parser_lex(parser);
return (yp_node_t *)yp_false_node_create(parser, &parser->previous);
case YP_TOKEN_KEYWORD_FOR: {
parser_lex(parser);
yp_token_t for_keyword = parser->previous;
yp_node_t *index = parse_targets(parser, NULL, YP_BINDING_POWER_INDEX);
yp_do_loop_stack_push(parser, true);
expect(parser, YP_TOKEN_KEYWORD_IN, "Expected keyword in.");
yp_token_t in_keyword = parser->previous;
yp_node_t *collection = parse_expression(parser, YP_BINDING_POWER_COMPOSITION, "Expected collection.");
yp_do_loop_stack_pop(parser);
yp_token_t do_keyword;
if (accept(parser, YP_TOKEN_KEYWORD_DO_LOOP)) {
do_keyword = parser->previous;
} else {
do_keyword = not_provided(parser);
}
accept_any(parser, 2, YP_TOKEN_SEMICOLON, YP_TOKEN_NEWLINE);
yp_statements_node_t *statements = NULL;
if (!accept(parser, YP_TOKEN_KEYWORD_END)) {
statements = parse_statements(parser, YP_CONTEXT_FOR);
expect(parser, YP_TOKEN_KEYWORD_END, "Expected `end` to close for loop.");
}
return (yp_node_t *) yp_for_node_create(parser, index, collection, statements, &for_keyword, &in_keyword, &do_keyword, &parser->previous);
}
case YP_TOKEN_KEYWORD_IF:
parser_lex(parser);
return parse_conditional(parser, YP_CONTEXT_IF);
case YP_TOKEN_KEYWORD_UNDEF: {
parser_lex(parser);
yp_undef_node_t *undef = yp_undef_node_create(parser, &parser->previous);
yp_node_t *name = parse_undef_argument(parser);
if (name->type != YP_NODE_MISSING_NODE) {
yp_undef_node_append(undef, name);
while (match_type_p(parser, YP_TOKEN_COMMA)) {
lex_state_set(parser, YP_LEX_STATE_FNAME | YP_LEX_STATE_FITEM);
parser_lex(parser);
name = parse_undef_argument(parser);
if (name->type == YP_NODE_MISSING_NODE) {
yp_node_destroy(parser, name);
break;
}
yp_undef_node_append(undef, name);
}
} else {
yp_node_destroy(parser, name);
}
return (yp_node_t *) undef;
}
case YP_TOKEN_KEYWORD_NOT: {
parser_lex(parser);
yp_token_t message = parser->previous;
yp_arguments_t arguments = YP_EMPTY_ARGUMENTS;
yp_node_t *receiver = NULL;
accept(parser, YP_TOKEN_NEWLINE);
if (accept(parser, YP_TOKEN_PARENTHESIS_LEFT)) {
arguments.opening_loc = ((yp_location_t) { .start = parser->previous.start, .end = parser->previous.end });
if (accept(parser, YP_TOKEN_PARENTHESIS_RIGHT)) {
arguments.closing_loc = ((yp_location_t) { .start = parser->previous.start, .end = parser->previous.end });
} else {
receiver = parse_expression(parser, YP_BINDING_POWER_COMPOSITION, "Expected expression after `not`.");
if (!parser->recovering) {
accept(parser, YP_TOKEN_NEWLINE);
expect(parser, YP_TOKEN_PARENTHESIS_RIGHT, "Expected ')' after 'not' expression.");
arguments.closing_loc = ((yp_location_t) { .start = parser->previous.start, .end = parser->previous.end });
}
}
} else {
receiver = parse_expression(parser, YP_BINDING_POWER_DEFINED, "Expected expression after `not`.");
}
return (yp_node_t *) yp_call_node_not_create(parser, receiver, &message, &arguments);
}
case YP_TOKEN_KEYWORD_UNLESS:
parser_lex(parser);
return parse_conditional(parser, YP_CONTEXT_UNLESS);
case YP_TOKEN_KEYWORD_MODULE: {
parser_lex(parser);
yp_token_t module_keyword = parser->previous;
yp_node_t *name = parse_expression(parser, YP_BINDING_POWER_INDEX, "Expected to find a module name after `module`.");
// If we can recover from a syntax error that occurred while parsing the
// name of the module, then we'll handle that here.
if (name->type == YP_NODE_MISSING_NODE) {
yp_token_t end_keyword = (yp_token_t) { .type = YP_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
return (yp_node_t *) yp_module_node_create(parser, NULL, &module_keyword, name, NULL, &end_keyword);
}
while (accept(parser, YP_TOKEN_COLON_COLON)) {
yp_token_t double_colon = parser->previous;
expect(parser, YP_TOKEN_CONSTANT, "Expected to find a module name after `::`.");
yp_node_t *constant = (yp_node_t *) yp_constant_read_node_create(parser, &parser->previous);
name = (yp_node_t *)yp_constant_path_node_create(parser, name, &double_colon, constant);
}
yp_parser_scope_push(parser, true);
accept_any(parser, 2, YP_TOKEN_SEMICOLON, YP_TOKEN_NEWLINE);
yp_node_t *statements = NULL;
if (!match_any_type_p(parser, 3, YP_TOKEN_KEYWORD_RESCUE, YP_TOKEN_KEYWORD_ENSURE, YP_TOKEN_KEYWORD_END)) {
yp_accepts_block_stack_push(parser, true);
statements = (yp_node_t *) parse_statements(parser, YP_CONTEXT_MODULE);
yp_accepts_block_stack_pop(parser);
}
if (match_any_type_p(parser, 2, YP_TOKEN_KEYWORD_RESCUE, YP_TOKEN_KEYWORD_ENSURE)) {
assert(statements == NULL || statements->type == YP_NODE_STATEMENTS_NODE);
statements = (yp_node_t *) parse_rescues_as_begin(parser, (yp_statements_node_t *) statements);
}
yp_constant_id_list_t locals = parser->current_scope->locals;
yp_parser_scope_pop(parser);
expect(parser, YP_TOKEN_KEYWORD_END, "Expected `end` to close `module` statement.");
if (context_def_p(parser)) {
yp_diagnostic_list_append(&parser->error_list, module_keyword.start, module_keyword.end, "Module definition in method body");
}
return (yp_node_t *) yp_module_node_create(parser, &locals, &module_keyword, name, statements, &parser->previous);
}
case YP_TOKEN_KEYWORD_NIL:
parser_lex(parser);
return (yp_node_t *) yp_nil_node_create(parser, &parser->previous);
case YP_TOKEN_KEYWORD_REDO:
parser_lex(parser);
return (yp_node_t *) yp_redo_node_create(parser, &parser->previous);
case YP_TOKEN_KEYWORD_RETRY:
parser_lex(parser);
return (yp_node_t *) yp_retry_node_create(parser, &parser->previous);
case YP_TOKEN_KEYWORD_SELF:
parser_lex(parser);
return (yp_node_t *) yp_self_node_create(parser, &parser->previous);
case YP_TOKEN_KEYWORD_TRUE:
parser_lex(parser);
return (yp_node_t *) yp_true_node_create(parser, &parser->previous);
case YP_TOKEN_KEYWORD_UNTIL: {
yp_do_loop_stack_push(parser, true);
parser_lex(parser);
yp_token_t keyword = parser->previous;
yp_node_t *predicate = parse_expression(parser, YP_BINDING_POWER_COMPOSITION, "Expected predicate expression after `until`.");
yp_do_loop_stack_pop(parser);
accept_any(parser, 3, YP_TOKEN_KEYWORD_DO_LOOP, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON);
yp_statements_node_t *statements = NULL;
if (!accept(parser, YP_TOKEN_KEYWORD_END)) {
yp_accepts_block_stack_push(parser, true);
statements = parse_statements(parser, YP_CONTEXT_UNTIL);
yp_accepts_block_stack_pop(parser);
accept_any(parser, 2, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON);
expect(parser, YP_TOKEN_KEYWORD_END, "Expected `end` to close `until` statement.");
}
yp_until_node_t *until_node = yp_until_node_create(parser, &keyword, predicate, statements);
if (parser->previous.type == YP_TOKEN_KEYWORD_END) {
until_node->base.location.end = parser->previous.end;
}
return (yp_node_t *) until_node;
}
case YP_TOKEN_KEYWORD_WHILE: {
yp_do_loop_stack_push(parser, true);
parser_lex(parser);
yp_token_t keyword = parser->previous;
yp_node_t *predicate = parse_expression(parser, YP_BINDING_POWER_COMPOSITION, "Expected predicate expression after `while`.");
yp_do_loop_stack_pop(parser);
accept_any(parser, 3, YP_TOKEN_KEYWORD_DO_LOOP, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON);
yp_statements_node_t *statements = NULL;
if (!accept(parser, YP_TOKEN_KEYWORD_END)) {
yp_accepts_block_stack_push(parser, true);
statements = parse_statements(parser, YP_CONTEXT_WHILE);
yp_accepts_block_stack_pop(parser);
accept_any(parser, 2, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON);
expect(parser, YP_TOKEN_KEYWORD_END, "Expected `end` to close `while` statement.");
}
yp_while_node_t *while_node = yp_while_node_create(parser, &keyword, predicate, statements);
if (parser->previous.type == YP_TOKEN_KEYWORD_END) {
while_node->base.location.end = parser->previous.end;
}
return (yp_node_t *) while_node;
}
case YP_TOKEN_PERCENT_LOWER_I: {
parser_lex(parser);
yp_array_node_t *array = yp_array_node_create(parser, &parser->previous);
while (!match_any_type_p(parser, 2, YP_TOKEN_STRING_END, YP_TOKEN_EOF)) {
if (yp_array_node_size(array) == 0) {
accept(parser, YP_TOKEN_WORDS_SEP);
} else {
expect(parser, YP_TOKEN_WORDS_SEP, "Expected a separator for the symbols in a `%i` list.");
if (match_type_p(parser, YP_TOKEN_STRING_END)) break;
}
if (match_type_p(parser, YP_TOKEN_STRING_END)) break;
expect(parser, YP_TOKEN_STRING_CONTENT, "Expected a symbol in a `%i` list.");
yp_token_t opening = not_provided(parser);
yp_token_t closing = not_provided(parser);
yp_node_t *symbol = (yp_node_t *) yp_symbol_node_create_and_unescape(parser, &opening, &parser->previous, &closing, YP_UNESCAPE_MINIMAL);
yp_array_node_elements_append(array, symbol);
}
expect(parser, YP_TOKEN_STRING_END, "Expected a closing delimiter for a `%i` list.");
yp_array_node_close_set(array, &parser->previous);
return (yp_node_t *) array;
}
case YP_TOKEN_PERCENT_UPPER_I: {
parser_lex(parser);
yp_array_node_t *array = yp_array_node_create(parser, &parser->previous);
// This is the current node that we are parsing that will be added to the
// list of elements.
yp_node_t *current = NULL;
while (!match_any_type_p(parser, 2, YP_TOKEN_STRING_END, YP_TOKEN_EOF)) {
switch (parser->current.type) {
case YP_TOKEN_WORDS_SEP: {
if (current == NULL) {
// If we hit a separator before we have any content, then we don't
// need to do anything.
} else {
// If we hit a separator after we've hit content, then we need to
// append that content to the list and reset the current node.
yp_array_node_elements_append(array, current);
current = NULL;
}
parser_lex(parser);
break;
}
case YP_TOKEN_STRING_CONTENT: {
yp_token_t opening = not_provided(parser);
yp_token_t closing = not_provided(parser);
if (current == NULL) {
// If we hit content and the current node is NULL, then this is
// the first string content we've seen. In that case we're going
// to create a new string node and set that to the current.
parser_lex(parser);
current = (yp_node_t *) yp_symbol_node_create_and_unescape(parser, &opening, &parser->previous, &closing, YP_UNESCAPE_ALL);
} else if (current->type == YP_NODE_INTERPOLATED_SYMBOL_NODE) {
// If we hit string content and the current node is an
// interpolated string, then we need to append the string content
// to the list of child nodes.
yp_node_t *part = parse_string_part(parser);
yp_interpolated_symbol_node_append((yp_interpolated_symbol_node_t *) current, part);
} else {
assert(false && "unreachable");
}
break;
}
case YP_TOKEN_EMBVAR: {
bool start_location_set = false;
if (current == NULL) {
// If we hit an embedded variable and the current node is NULL,
// then this is the start of a new string. We'll set the current
// node to a new interpolated string.
yp_token_t opening = not_provided(parser);
yp_token_t closing = not_provided(parser);
current = (yp_node_t *) yp_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
} else if (current->type == YP_NODE_SYMBOL_NODE) {
// If we hit an embedded variable and the current node is a string
// node, then we'll convert the current into an interpolated
// string and add the string node to the list of parts.
yp_token_t opening = not_provided(parser);
yp_token_t closing = not_provided(parser);
yp_interpolated_symbol_node_t *interpolated = yp_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
current = (yp_node_t *) yp_symbol_node_to_string_node(parser, (yp_symbol_node_t *) current);
yp_interpolated_symbol_node_append(interpolated, current);
interpolated->base.location.start = current->location.start;
start_location_set = true;
current = (yp_node_t *) interpolated;
} else {
// If we hit an embedded variable and the current node is an
// interpolated string, then we'll just add the embedded variable.
}
yp_node_t *part = parse_string_part(parser);
yp_interpolated_symbol_node_append((yp_interpolated_symbol_node_t *) current, part);
if (!start_location_set) {
current->location.start = part->location.start;
}
break;
}
case YP_TOKEN_EMBEXPR_BEGIN: {
bool start_location_set = false;
if (current == NULL) {
// If we hit an embedded expression and the current node is NULL,
// then this is the start of a new string. We'll set the current
// node to a new interpolated string.
yp_token_t opening = not_provided(parser);
yp_token_t closing = not_provided(parser);
current = (yp_node_t *) yp_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
} else if (current->type == YP_NODE_SYMBOL_NODE) {
// If we hit an embedded expression and the current node is a
// string node, then we'll convert the current into an
// interpolated string and add the string node to the list of
// parts.
yp_token_t opening = not_provided(parser);
yp_token_t closing = not_provided(parser);
yp_interpolated_symbol_node_t *interpolated = yp_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
current = (yp_node_t *) yp_symbol_node_to_string_node(parser, (yp_symbol_node_t *) current);
yp_interpolated_symbol_node_append(interpolated, current);
interpolated->base.location.start = current->location.start;
start_location_set = true;
current = (yp_node_t *) interpolated;
} else if (current->type == YP_NODE_INTERPOLATED_SYMBOL_NODE) {
// If we hit an embedded expression and the current node is an
// interpolated string, then we'll just continue on.
} else {
assert(false && "unreachable");
}
yp_node_t *part = parse_string_part(parser);
yp_interpolated_symbol_node_append((yp_interpolated_symbol_node_t *) current, part);
if (!start_location_set) {
current->location.start = part->location.start;
}
break;
}
default:
expect(parser, YP_TOKEN_STRING_CONTENT, "Expected a symbol in a `%I` list.");
parser_lex(parser);
break;
}
}
// If we have a current node, then we need to append it to the list.
if (current) {
yp_array_node_elements_append(array, current);
}
expect(parser, YP_TOKEN_STRING_END, "Expected a closing delimiter for a `%I` list.");
yp_array_node_close_set(array, &parser->previous);
return (yp_node_t *) array;
}
case YP_TOKEN_PERCENT_LOWER_W: {
parser_lex(parser);
yp_array_node_t *array = yp_array_node_create(parser, &parser->previous);
// skip all leading whitespaces
accept(parser, YP_TOKEN_WORDS_SEP);
while (!match_any_type_p(parser, 2, YP_TOKEN_STRING_END, YP_TOKEN_EOF)) {
if (yp_array_node_size(array) == 0) {
accept(parser, YP_TOKEN_WORDS_SEP);
} else {
expect(parser, YP_TOKEN_WORDS_SEP, "Expected a separator for the strings in a `%w` list.");
if (match_type_p(parser, YP_TOKEN_STRING_END)) break;
}
expect(parser, YP_TOKEN_STRING_CONTENT, "Expected a string in a `%w` list.");
yp_token_t opening = not_provided(parser);
yp_token_t closing = not_provided(parser);
yp_node_t *string = (yp_node_t *) yp_string_node_create_and_unescape(parser, &opening, &parser->previous, &closing, YP_UNESCAPE_MINIMAL);
yp_array_node_elements_append(array, string);
}
expect(parser, YP_TOKEN_STRING_END, "Expected a closing delimiter for a `%w` list.");
yp_array_node_close_set(array, &parser->previous);
return (yp_node_t *) array;
}
case YP_TOKEN_PERCENT_UPPER_W: {
parser_lex(parser);
yp_array_node_t *array = yp_array_node_create(parser, &parser->previous);
// This is the current node that we are parsing that will be added to the
// list of elements.
yp_node_t *current = NULL;
while (!match_any_type_p(parser, 2, YP_TOKEN_STRING_END, YP_TOKEN_EOF)) {
switch (parser->current.type) {
case YP_TOKEN_WORDS_SEP: {
if (current == NULL) {
// If we hit a separator before we have any content, then we don't
// need to do anything.
} else {
// If we hit a separator after we've hit content, then we need to
// append that content to the list and reset the current node.
yp_array_node_elements_append(array, current);
current = NULL;
}
parser_lex(parser);
break;
}
case YP_TOKEN_STRING_CONTENT: {
if (current == NULL) {
// If we hit content and the current node is NULL, then this is
// the first string content we've seen. In that case we're going
// to create a new string node and set that to the current.
current = parse_string_part(parser);
} else if (current->type == YP_NODE_INTERPOLATED_STRING_NODE) {
// If we hit string content and the current node is an
// interpolated string, then we need to append the string content
// to the list of child nodes.
yp_node_t *part = parse_string_part(parser);
yp_interpolated_string_node_append((yp_interpolated_string_node_t *) current, part);
} else {
assert(false && "unreachable");
}
break;
}
case YP_TOKEN_EMBVAR: {
if (current == NULL) {
// If we hit an embedded variable and the current node is NULL,
// then this is the start of a new string. We'll set the current
// node to a new interpolated string.
yp_token_t opening = not_provided(parser);
yp_token_t closing = not_provided(parser);
current = (yp_node_t *) yp_interpolated_string_node_create(parser, &opening, NULL, &closing);
} else if (current->type == YP_NODE_STRING_NODE) {
// If we hit an embedded variable and the current node is a string
// node, then we'll convert the current into an interpolated
// string and add the string node to the list of parts.
yp_token_t opening = not_provided(parser);
yp_token_t closing = not_provided(parser);
yp_interpolated_string_node_t *interpolated = yp_interpolated_string_node_create(parser, &opening, NULL, &closing);
yp_interpolated_string_node_append(interpolated, current);
current = (yp_node_t *) interpolated;
} else {
// If we hit an embedded variable and the current node is an
// interpolated string, then we'll just add the embedded variable.
}
yp_node_t *part = parse_string_part(parser);
yp_interpolated_string_node_append((yp_interpolated_string_node_t *) current, part);
break;
}
case YP_TOKEN_EMBEXPR_BEGIN: {
if (current == NULL) {
// If we hit an embedded expression and the current node is NULL,
// then this is the start of a new string. We'll set the current
// node to a new interpolated string.
yp_token_t opening = not_provided(parser);
yp_token_t closing = not_provided(parser);
current = (yp_node_t *) yp_interpolated_string_node_create(parser, &opening, NULL, &closing);
} else if (current->type == YP_NODE_STRING_NODE) {
// If we hit an embedded expression and the current node is a
// string node, then we'll convert the current into an
// interpolated string and add the string node to the list of
// parts.
yp_token_t opening = not_provided(parser);
yp_token_t closing = not_provided(parser);
yp_interpolated_string_node_t *interpolated = yp_interpolated_string_node_create(parser, &opening, NULL, &closing);
yp_interpolated_string_node_append(interpolated, current);
current = (yp_node_t *) interpolated;
} else if (current->type == YP_NODE_INTERPOLATED_STRING_NODE) {
// If we hit an embedded expression and the current node is an
// interpolated string, then we'll just continue on.
} else {
assert(false && "unreachable");
}
yp_node_t *part = parse_string_part(parser);
yp_interpolated_string_node_append((yp_interpolated_string_node_t *) current, part);
break;
}
default:
expect(parser, YP_TOKEN_STRING_CONTENT, "Expected a string in a `%W` list.");
parser_lex(parser);
break;
}
}
// If we have a current node, then we need to append it to the list.
if (current) {
yp_array_node_elements_append(array, current);
}
expect(parser, YP_TOKEN_STRING_END, "Expected a closing delimiter for a `%W` list.");
yp_array_node_close_set(array, &parser->previous);
return (yp_node_t *) array;
}
case YP_TOKEN_RATIONAL_NUMBER:
parser_lex(parser);
return (yp_node_t *) yp_rational_node_create(parser, &parser->previous);
case YP_TOKEN_REGEXP_BEGIN: {
yp_token_t opening = parser->current;
parser_lex(parser);
if (match_type_p(parser, YP_TOKEN_REGEXP_END)) {
// If we get here, then we have an end immediately after a start. In
// that case we'll create an empty content token and return an
// uninterpolated regular expression.
yp_token_t content = (yp_token_t) {
.type = YP_TOKEN_STRING_CONTENT,
.start = parser->previous.end,
.end = parser->previous.end
};
parser_lex(parser);
return (yp_node_t *) yp_regular_expression_node_create_and_unescape(parser, &opening, &content, &parser->previous, YP_UNESCAPE_ALL);
}
yp_interpolated_regular_expression_node_t *node;
if (match_type_p(parser, YP_TOKEN_STRING_CONTENT)) {
// In this case we've hit string content so we know the regular
// expression at least has something in it. We'll need to check if the
// following token is the end (in which case we can return a plain
// regular expression) or if it's not then it has interpolation.
yp_token_t content = parser->current;
parser_lex(parser);
// If we hit an end, then we can create a regular expression node
// without interpolation, which can be represented more succinctly and
// more easily compiled.
if (accept(parser, YP_TOKEN_REGEXP_END)) {
return (yp_node_t *) yp_regular_expression_node_create_and_unescape(parser, &opening, &content, &parser->previous, YP_UNESCAPE_ALL);
}
// If we get here, then we have interpolation so we'll need to create
// a regular expression node with interpolation.
node = yp_interpolated_regular_expression_node_create(parser, &opening);
yp_token_t opening = not_provided(parser);
yp_token_t closing = not_provided(parser);
yp_node_t *part = (yp_node_t *) yp_string_node_create_and_unescape(parser, &opening, &parser->previous, &closing, YP_UNESCAPE_ALL);
yp_interpolated_regular_expression_node_append(node, part);
} else {
// If the first part of the body of the regular expression is not a
// string content, then we have interpolation and we need to create an
// interpolated regular expression node.
node = yp_interpolated_regular_expression_node_create(parser, &opening);
}
// Now that we're here and we have interpolation, we'll parse all of the
// parts into the list.
while (!match_any_type_p(parser, 2, YP_TOKEN_REGEXP_END, YP_TOKEN_EOF)) {
yp_node_t *part = parse_string_part(parser);
if (part != NULL) {
yp_interpolated_regular_expression_node_append(node, part);
}
}
expect(parser, YP_TOKEN_REGEXP_END, "Expected a closing delimiter for a regular expression.");
yp_interpolated_regular_expression_node_closing_set(node, &parser->previous);
return (yp_node_t *) node;
}
case YP_TOKEN_BACKTICK:
case YP_TOKEN_PERCENT_LOWER_X: {
parser_lex(parser);
yp_token_t opening = parser->previous;
// When we get here, we don't know if this string is going to have
// interpolation or not, even though it is allowed. Still, we want to be
// able to return a string node without interpolation if we can since
// it'll be faster.
if (match_type_p(parser, YP_TOKEN_STRING_END)) {
// If we get here, then we have an end immediately after a start. In
// that case we'll create an empty content token and return an
// uninterpolated string.
yp_token_t content = (yp_token_t) {
.type = YP_TOKEN_STRING_CONTENT,
.start = parser->previous.end,
.end = parser->previous.end
};
parser_lex(parser);
return (yp_node_t *) yp_xstring_node_create(parser, &opening, &content, &parser->previous);
}
yp_interpolated_x_string_node_t *node;
if (match_type_p(parser, YP_TOKEN_STRING_CONTENT)) {
// In this case we've hit string content so we know the string at least
// has something in it. We'll need to check if the following token is
// the end (in which case we can return a plain string) or if it's not
// then it has interpolation.
yp_token_t content = parser->current;
parser_lex(parser);
if (accept(parser, YP_TOKEN_STRING_END)) {
return (yp_node_t *) yp_xstring_node_create_and_unescape(parser, &opening, &content, &parser->previous);
}
// If we get here, then we have interpolation so we'll need to create
// a string node with interpolation.
node = yp_interpolated_xstring_node_create(parser, &opening, &opening);
yp_token_t opening = not_provided(parser);
yp_token_t closing = not_provided(parser);
yp_node_t *part = (yp_node_t *) yp_string_node_create_and_unescape(parser, &opening, &parser->previous, &closing, YP_UNESCAPE_ALL);
yp_interpolated_xstring_node_append(node, part);
} else {
// If the first part of the body of the string is not a string content,
// then we have interpolation and we need to create an interpolated
// string node.
node = yp_interpolated_xstring_node_create(parser, &opening, &opening);
}
while (!match_any_type_p(parser, 2, YP_TOKEN_STRING_END, YP_TOKEN_EOF)) {
yp_node_t *part = parse_string_part(parser);
if (part != NULL) {
yp_interpolated_xstring_node_append(node, part);
}
}
expect(parser, YP_TOKEN_STRING_END, "Expected a closing delimiter for an xstring.");
yp_interpolated_xstring_node_closing_set(node, &parser->previous);
return (yp_node_t *) node;
}
case YP_TOKEN_USTAR: {
parser_lex(parser);
// * operators at the beginning of expressions are only valid in the
// context of a multiple assignment. We enforce that here. We'll still lex
// past it though and create a missing node place.
if (binding_power != YP_BINDING_POWER_STATEMENT) {
return (yp_node_t *) yp_missing_node_create(parser, parser->previous.start, parser->previous.end);
}
yp_token_t operator = parser->previous;
yp_node_t *name = NULL;
if (token_begins_expression_p(parser->current.type)) {
name = parse_expression(parser, YP_BINDING_POWER_INDEX, "Expected an expression after '*'.");
}
yp_node_t *splat = (yp_node_t *) yp_splat_node_create(parser, &operator, name);
return parse_targets(parser, splat, YP_BINDING_POWER_INDEX);
}
case YP_TOKEN_BANG: {
parser_lex(parser);
yp_token_t operator = parser->previous;
yp_node_t *receiver = parse_expression(parser, yp_binding_powers[parser->previous.type].right, "Expected a receiver after unary !.");
yp_call_node_t *node = yp_call_node_unary_create(parser, &operator, receiver, "!");
return (yp_node_t *) node;
}
case YP_TOKEN_TILDE: {
parser_lex(parser);
yp_token_t operator = parser->previous;
yp_node_t *receiver = parse_expression(parser, yp_binding_powers[parser->previous.type].right, "Expected a receiver after unary ~.");
yp_call_node_t *node = yp_call_node_unary_create(parser, &operator, receiver, "~");
return (yp_node_t *) node;
}
case YP_TOKEN_UMINUS:
case YP_TOKEN_UMINUS_NUM: {
parser_lex(parser);
yp_token_t operator = parser->previous;
yp_node_t *receiver = parse_expression(parser, yp_binding_powers[parser->previous.type].right, "Expected a receiver after unary -.");
yp_call_node_t *node = yp_call_node_unary_create(parser, &operator, receiver, "-@");
return (yp_node_t *) node;
}
case YP_TOKEN_MINUS_GREATER: {
int previous_lambda_enclosure_nesting = parser->lambda_enclosure_nesting;
parser->lambda_enclosure_nesting = parser->enclosure_nesting;
yp_accepts_block_stack_push(parser, true);
parser_lex(parser);
yp_token_t opening = parser->previous;
yp_parser_scope_push(parser, false);
yp_block_parameters_node_t *params;
switch (parser->current.type) {
case YP_TOKEN_PARENTHESIS_LEFT: {
yp_token_t block_parameters_opening = parser->current;
parser_lex(parser);
if (match_type_p(parser, YP_TOKEN_PARENTHESIS_RIGHT)) {
params = yp_block_parameters_node_create(parser, NULL, &block_parameters_opening);
} else {
params = parse_block_parameters(parser, false, &block_parameters_opening, true);
}
accept(parser, YP_TOKEN_NEWLINE);
expect(parser, YP_TOKEN_PARENTHESIS_RIGHT, "Expected ')' after left parenthesis.");
yp_block_parameters_node_closing_set(params, &parser->previous);
break;
}
case YP_CASE_PARAMETER: {
yp_token_t opening = not_provided(parser);
params = parse_block_parameters(parser, false, &opening, true);
break;
}
default: {
params = NULL;
break;
}
}
yp_node_t *body = NULL;
parser->lambda_enclosure_nesting = previous_lambda_enclosure_nesting;
if (accept(parser, YP_TOKEN_LAMBDA_BEGIN)) {
if (!accept(parser, YP_TOKEN_BRACE_RIGHT)) {
body = (yp_node_t *) parse_statements(parser, YP_CONTEXT_LAMBDA_BRACES);
expect(parser, YP_TOKEN_BRACE_RIGHT, "Expecting '}' to close lambda block.");
}
} else {
expect(parser, YP_TOKEN_KEYWORD_DO, "Expected a 'do' keyword or a '{' to open lambda block.");
if (!match_any_type_p(parser, 3, YP_TOKEN_KEYWORD_END, YP_TOKEN_KEYWORD_RESCUE, YP_TOKEN_KEYWORD_ENSURE)) {
body = (yp_node_t *) parse_statements(parser, YP_CONTEXT_LAMBDA_DO_END);
}
if (match_any_type_p(parser, 2, YP_TOKEN_KEYWORD_RESCUE, YP_TOKEN_KEYWORD_ENSURE)) {
assert(body == NULL || body->type == YP_NODE_STATEMENTS_NODE);
body = (yp_node_t *) parse_rescues_as_begin(parser, (yp_statements_node_t *) body);
}
expect(parser, YP_TOKEN_KEYWORD_END, "Expecting 'end' keyword to close lambda block.");
}
yp_constant_id_list_t locals = parser->current_scope->locals;
yp_parser_scope_pop(parser);
yp_accepts_block_stack_pop(parser);
return (yp_node_t *) yp_lambda_node_create(parser, &locals, &opening, params, body, &parser->previous);
}
case YP_TOKEN_UPLUS: {
parser_lex(parser);
yp_token_t operator = parser->previous;
yp_node_t *receiver = parse_expression(parser, yp_binding_powers[parser->previous.type].right, "Expected a receiver after unary +.");
yp_call_node_t *node = yp_call_node_unary_create(parser, &operator, receiver, "+@");
return (yp_node_t *) node;
}
case YP_TOKEN_STRING_BEGIN: {
assert(parser->lex_modes.current->mode == YP_LEX_STRING);
bool lex_interpolation = parser->lex_modes.current->as.string.interpolation;
yp_token_t opening = parser->current;
parser_lex(parser);
yp_node_t *node;
if (accept(parser, YP_TOKEN_STRING_END)) {
// If we get here, then we have an end immediately after a start. In
// that case we'll create an empty content token and return an
// uninterpolated string.
yp_token_t content = (yp_token_t) {
.type = YP_TOKEN_STRING_CONTENT,
.start = parser->previous.start,
.end = parser->previous.start
};
node = (yp_node_t *) yp_string_node_create_and_unescape(parser, &opening, &content, &parser->previous, YP_UNESCAPE_NONE);
} else if (accept(parser, YP_TOKEN_LABEL_END)) {
// If we get here, then we have an end of a label immediately after a
// start. In that case we'll create an empty symbol node.
yp_token_t opening = not_provided(parser);
yp_token_t content = (yp_token_t) {
.type = YP_TOKEN_STRING_CONTENT,
.start = parser->previous.start,
.end = parser->previous.start
};
return (yp_node_t *) yp_symbol_node_create(parser, &opening, &content, &parser->previous);
} else if (!lex_interpolation) {
// If we don't accept interpolation then we expect the string to start
// with a single string content node.
expect(parser, YP_TOKEN_STRING_CONTENT, "Expected string content after opening delimiter.");
yp_token_t content = parser->previous;
// It is unfortunately possible to have multiple string content nodes in
// a row in the case that there's heredoc content in the middle of the
// string, like this cursed example:
//
// <<-END+'b
// a
// END
// c'+'d'
//
// In that case we need to switch to an interpolated string to be able
// to contain all of the parts.
if (match_type_p(parser, YP_TOKEN_STRING_CONTENT)) {
yp_node_list_t parts = YP_EMPTY_NODE_LIST;
yp_token_t delimiters = not_provided(parser);
yp_node_t *part = (yp_node_t *) yp_string_node_create_and_unescape(parser, &delimiters, &content, &delimiters, YP_UNESCAPE_MINIMAL);
yp_node_list_append(&parts, part);
while (accept(parser, YP_TOKEN_STRING_CONTENT)) {
part = (yp_node_t *) yp_string_node_create_and_unescape(parser, &delimiters, &parser->previous, &delimiters, YP_UNESCAPE_MINIMAL);
yp_node_list_append(&parts, part);
}
expect(parser, YP_TOKEN_STRING_END, "Expected a closing delimiter for a string literal.");
return (yp_node_t *) yp_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
}
if (accept(parser, YP_TOKEN_LABEL_END)) {
return (yp_node_t *) yp_symbol_node_create_and_unescape(parser, &opening, &content, &parser->previous, YP_UNESCAPE_ALL);
}
expect(parser, YP_TOKEN_STRING_END, "Expected a closing delimiter for a string literal.");
node = (yp_node_t *) yp_string_node_create_and_unescape(parser, &opening, &content, &parser->previous, YP_UNESCAPE_MINIMAL);
} else if (match_type_p(parser, YP_TOKEN_STRING_CONTENT)) {
// In this case we've hit string content so we know the string at
// least has something in it. We'll need to check if the following
// token is the end (in which case we can return a plain string) or if
// it's not then it has interpolation.
yp_token_t content = parser->current;
parser_lex(parser);
if (accept(parser, YP_TOKEN_STRING_END)) {
node = (yp_node_t *) yp_string_node_create_and_unescape(parser, &opening, &content, &parser->previous, YP_UNESCAPE_ALL);
} else if (accept(parser, YP_TOKEN_LABEL_END)) {
return (yp_node_t *) yp_symbol_node_create_and_unescape(parser, &opening, &content, &parser->previous, YP_UNESCAPE_ALL);
} else {
// If we get here, then we have interpolation so we'll need to create
// a string or symbol node with interpolation.
yp_node_list_t parts = YP_EMPTY_NODE_LIST;
yp_token_t string_opening = not_provided(parser);
yp_token_t string_closing = not_provided(parser);
yp_node_t *part = (yp_node_t *) yp_string_node_create_and_unescape(parser, &string_opening, &parser->previous, &string_closing, YP_UNESCAPE_ALL);
yp_node_list_append(&parts, part);
while (!match_any_type_p(parser, 3, YP_TOKEN_STRING_END, YP_TOKEN_LABEL_END, YP_TOKEN_EOF)) {
yp_node_t *part = parse_string_part(parser);
if (part != NULL) yp_node_list_append(&parts, part);
}
if (accept(parser, YP_TOKEN_LABEL_END)) {
return (yp_node_t *) yp_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous);
}
expect(parser, YP_TOKEN_STRING_END, "Expected a closing delimiter for an interpolated string.");
node = (yp_node_t *) yp_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
}
} else {
// If we get here, then the first part of the string is not plain string
// content, in which case we need to parse the string as an interpolated
// string.
yp_node_list_t parts = YP_EMPTY_NODE_LIST;
while (!match_any_type_p(parser, 3, YP_TOKEN_STRING_END, YP_TOKEN_LABEL_END, YP_TOKEN_EOF)) {
yp_node_t *part = parse_string_part(parser);
if (part != NULL) yp_node_list_append(&parts, part);
}
if (accept(parser, YP_TOKEN_LABEL_END)) {
return (yp_node_t *) yp_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous);
}
expect(parser, YP_TOKEN_STRING_END, "Expected a closing delimiter for an interpolated string.");
node = (yp_node_t *) yp_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
}
// If there's a string immediately following this string, then it's a
// concatenatation. In this case we'll parse the next string and create a
// node in the tree that concatenates the two strings.
if (parser->current.type == YP_TOKEN_STRING_BEGIN) {
return (yp_node_t *) yp_string_concat_node_create(
parser,
node,
parse_expression(parser, YP_BINDING_POWER_CALL, "Expected string on the right side of concatenation.")
);
} else {
return node;
}
}
case YP_TOKEN_SYMBOL_BEGIN: {
yp_lex_mode_t lex_mode = *parser->lex_modes.current;
parser_lex(parser);
return parse_symbol(parser, &lex_mode, YP_LEX_STATE_END);
}
default:
if (context_recoverable(parser, &parser->current)) {
parser->recovering = true;
}
return (yp_node_t *) yp_missing_node_create(parser, parser->previous.start, parser->previous.end);
}
}
static inline yp_node_t *
parse_assignment_value(yp_parser_t *parser, yp_binding_power_t previous_binding_power, yp_binding_power_t binding_power, const char *message) {
yp_node_t *value = parse_starred_expression(parser, binding_power, message);
if (previous_binding_power == YP_BINDING_POWER_STATEMENT && accept(parser, YP_TOKEN_COMMA)) {
yp_token_t opening = not_provided(parser);
yp_array_node_t *array = yp_array_node_create(parser, &opening);
yp_array_node_elements_append(array, value);
value = (yp_node_t *) array;
do {
yp_node_t *element = parse_starred_expression(parser, binding_power, "Expected an element for the array.");
yp_array_node_elements_append(array, element);
if (element->type == YP_NODE_MISSING_NODE) break;
} while (accept(parser, YP_TOKEN_COMMA));
}
return value;
}
static inline yp_node_t *
parse_expression_infix(yp_parser_t *parser, yp_node_t *node, yp_binding_power_t previous_binding_power, yp_binding_power_t binding_power) {
yp_token_t token = parser->current;
switch (token.type) {
case YP_TOKEN_EQUAL: {
switch (node->type) {
case YP_NODE_CALL_NODE: {
// If we have no arguments to the call node and we need this to be a
// target then this is either a method call or a local variable write.
// This _must_ happen before the value is parsed because it could be
// referenced in the value.
yp_call_node_t *call_node = (yp_call_node_t *) node;
if (yp_call_node_vcall_p(call_node)) {
yp_parser_local_add_location(parser, call_node->message_loc.start, call_node->message_loc.end);
}
}
/* fallthrough */
case YP_CASE_WRITABLE: {
parser_lex(parser);
yp_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, "Expected a value after =.");
return parse_target(parser, node, &token, value);
}
case YP_NODE_SPLAT_NODE: {
yp_splat_node_t *splat_node = (yp_splat_node_t *) node;
switch (splat_node->expression->type) {
case YP_CASE_WRITABLE: {
parser_lex(parser);
yp_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, "Expected a value after =.");
return parse_target(parser, (yp_node_t *) splat_node, &token, value);
}
default: {}
}
}
/* fallthrough */
default:
parser_lex(parser);
// In this case we have an = sign, but we don't know what it's for. We
// need to treat it as an error. For now, we'll mark it as an error
// and just skip right past it.
yp_diagnostic_list_append(&parser->error_list, token.start, token.end, "Unexpected `='.");
return node;
}
}
case YP_TOKEN_AMPERSAND_AMPERSAND_EQUAL: {
switch (node->type) {
case YP_NODE_BACK_REFERENCE_READ_NODE:
case YP_NODE_NUMBERED_REFERENCE_READ_NODE:
yp_diagnostic_list_append(&parser->error_list, node->location.start, node->location.end, "Can't set variable");
/* fallthrough */
case YP_NODE_GLOBAL_VARIABLE_READ_NODE: {
parser_lex(parser);
yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after &&=");
yp_node_t *result = (yp_node_t *) yp_global_variable_operator_and_write_node_create(parser, node, &token, value);
yp_node_destroy(parser, node);
return result;
}
case YP_NODE_CALL_NODE: {
yp_call_node_t *call_node = (yp_call_node_t *) node;
// If we have a vcall (a method with no arguments and no
// receiver that could have been a local variable) then we
// will transform it into a local variable write.
if (yp_call_node_vcall_p(call_node)) {
yp_location_t message_loc = call_node->message_loc;
yp_parser_local_add_location(parser, message_loc.start, message_loc.end);
if (token_is_numbered_parameter(message_loc.start, message_loc.end)) {
yp_diagnostic_list_append(&parser->error_list, message_loc.start, message_loc.end, "reserved for numbered parameter");
}
parser_lex(parser);
yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after &&=");
yp_constant_id_t constant_id = yp_parser_constant_id_location(parser, message_loc.start, message_loc.end);
yp_node_t *result = (yp_node_t *) yp_local_variable_operator_and_write_node_create(parser, node, &token, value, constant_id);
yp_node_destroy(parser, node);
return result;
}
parser_lex(parser);
yp_token_t operator = not_provided(parser);
node = parse_target(parser, node, &operator, NULL);
yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after &&=");
return (yp_node_t *) yp_call_operator_and_write_node_create(parser, call_node, &token, value);
}
case YP_NODE_CLASS_VARIABLE_READ_NODE: {
parser_lex(parser);
yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after &&=");
yp_node_t *result = (yp_node_t *) yp_class_variable_operator_and_write_node_create(parser, node, &token, value);
yp_node_destroy(parser, node);
return result;
}
case YP_NODE_CONSTANT_PATH_NODE: {
parser_lex(parser);
yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after &&=");
return (yp_node_t *) yp_constant_path_operator_and_write_node_create(parser, (yp_constant_path_node_t *) node, &token, value);
}
case YP_NODE_CONSTANT_READ_NODE: {
parser_lex(parser);
yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after &&=");
yp_node_t *result = (yp_node_t *) yp_constant_operator_and_write_node_create(parser, node, &token, value);
yp_node_destroy(parser, node);
return result;
}
case YP_NODE_INSTANCE_VARIABLE_READ_NODE: {
parser_lex(parser);
yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after &&=");
yp_node_t *result = (yp_node_t *) yp_instance_variable_operator_and_write_node_create(parser, node, &token, value);
yp_node_destroy(parser, node);
return result;
}
case YP_NODE_LOCAL_VARIABLE_READ_NODE: {
parser_lex(parser);
yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after &&=");
yp_constant_id_t constant_id = ((yp_local_variable_read_node_t *) node)->constant_id;
yp_node_t *result = (yp_node_t *) yp_local_variable_operator_and_write_node_create(parser, node, &token, value, constant_id);
yp_node_destroy(parser, node);
return result;
}
case YP_NODE_MULTI_WRITE_NODE: {
parser_lex(parser);
yp_diagnostic_list_append(&parser->error_list, token.start, token.end, "Cannot use `&&=' on a multi-write.");
return node;
}
default:
parser_lex(parser);
// In this case we have an &&= sign, but we don't know what it's for.
// We need to treat it as an error. For now, we'll mark it as an error
// and just skip right past it.
yp_diagnostic_list_append(&parser->error_list, token.start, token.end, "Unexpected `&&='.");
return node;
}
}
case YP_TOKEN_PIPE_PIPE_EQUAL: {
switch (node->type) {
case YP_NODE_BACK_REFERENCE_READ_NODE:
case YP_NODE_NUMBERED_REFERENCE_READ_NODE:
yp_diagnostic_list_append(&parser->error_list, node->location.start, node->location.end, "Can't set variable");
/* fallthrough */
case YP_NODE_GLOBAL_VARIABLE_READ_NODE: {
parser_lex(parser);
yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after ||=");
yp_node_t *result = (yp_node_t *) yp_global_variable_operator_or_write_node_create(parser, node, &token, value);
yp_node_destroy(parser, node);
return result;
}
case YP_NODE_CALL_NODE: {
yp_call_node_t *call_node = (yp_call_node_t *) node;
// If we have a vcall (a method with no arguments and no
// receiver that could have been a local variable) then we
// will transform it into a local variable write.
if (yp_call_node_vcall_p(call_node)) {
yp_location_t message_loc = call_node->message_loc;
yp_parser_local_add_location(parser, message_loc.start, message_loc.end);
if (token_is_numbered_parameter(message_loc.start, message_loc.end)) {
yp_diagnostic_list_append(&parser->error_list, message_loc.start, message_loc.end, "reserved for numbered parameter");
}
parser_lex(parser);
yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after ||=");
yp_constant_id_t constant_id = yp_parser_constant_id_location(parser, message_loc.start, message_loc.end);
yp_node_t *result = (yp_node_t *) yp_local_variable_operator_or_write_node_create(parser, node, &token, value, constant_id);
yp_node_destroy(parser, node);
return result;
}
parser_lex(parser);
yp_token_t operator = not_provided(parser);
node = parse_target(parser, node, &operator, NULL);
yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after ||=");
return (yp_node_t *) yp_call_operator_or_write_node_create(parser, call_node, &token, value);
}
case YP_NODE_CLASS_VARIABLE_READ_NODE: {
parser_lex(parser);
yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after ||=");
yp_node_t *result = (yp_node_t *) yp_class_variable_operator_or_write_node_create(parser, node, &token, value);
yp_node_destroy(parser, node);
return result;
}
case YP_NODE_CONSTANT_PATH_NODE: {
parser_lex(parser);
yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after ||=");
return (yp_node_t *) yp_constant_path_operator_or_write_node_create(parser, (yp_constant_path_node_t *) node, &token, value);
}
case YP_NODE_CONSTANT_READ_NODE: {
parser_lex(parser);
yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after ||=");
yp_node_t *result = (yp_node_t *) yp_constant_operator_or_write_node_create(parser, node, &token, value);
yp_node_destroy(parser, node);
return result;
}
case YP_NODE_INSTANCE_VARIABLE_READ_NODE: {
parser_lex(parser);
yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after ||=");
yp_node_t *result = (yp_node_t *) yp_instance_variable_operator_or_write_node_create(parser, node, &token, value);
yp_node_destroy(parser, node);
return result;
}
case YP_NODE_LOCAL_VARIABLE_READ_NODE: {
parser_lex(parser);
yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after ||=");
yp_constant_id_t constant_id = ((yp_local_variable_read_node_t *) node)->constant_id;
yp_node_t *result = (yp_node_t *) yp_local_variable_operator_or_write_node_create(parser, node, &token, value, constant_id);
yp_node_destroy(parser, node);
return result;
}
case YP_NODE_MULTI_WRITE_NODE: {
parser_lex(parser);
yp_diagnostic_list_append(&parser->error_list, token.start, token.end, "Cannot use `||=' on a multi-write.");
return node;
}
default:
parser_lex(parser);
// In this case we have an ||= sign, but we don't know what it's for.
// We need to treat it as an error. For now, we'll mark it as an error
// and just skip right past it.
yp_diagnostic_list_append(&parser->error_list, token.start, token.end, "Unexpected `||='.");
return node;
}
}
case YP_TOKEN_AMPERSAND_EQUAL:
case YP_TOKEN_CARET_EQUAL:
case YP_TOKEN_GREATER_GREATER_EQUAL:
case YP_TOKEN_LESS_LESS_EQUAL:
case YP_TOKEN_MINUS_EQUAL:
case YP_TOKEN_PERCENT_EQUAL:
case YP_TOKEN_PIPE_EQUAL:
case YP_TOKEN_PLUS_EQUAL:
case YP_TOKEN_SLASH_EQUAL:
case YP_TOKEN_STAR_EQUAL:
case YP_TOKEN_STAR_STAR_EQUAL: {
switch (node->type) {
case YP_NODE_BACK_REFERENCE_READ_NODE:
case YP_NODE_NUMBERED_REFERENCE_READ_NODE:
yp_diagnostic_list_append(&parser->error_list, node->location.start, node->location.end, "Can't set variable");
/* fallthrough */
case YP_NODE_GLOBAL_VARIABLE_READ_NODE: {
parser_lex(parser);
yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after the operator");
yp_node_t *result = (yp_node_t *) yp_global_variable_operator_write_node_create(parser, node, &token, value);
yp_node_destroy(parser, node);
return result;
}
case YP_NODE_CALL_NODE: {
yp_call_node_t *call_node = (yp_call_node_t *) node;
// If we have a vcall (a method with no arguments and no
// receiver that could have been a local variable) then we
// will transform it into a local variable write.
if (yp_call_node_vcall_p(call_node)) {
yp_location_t message_loc = call_node->message_loc;
yp_parser_local_add_location(parser, message_loc.start, message_loc.end);
if (token_is_numbered_parameter(message_loc.start, message_loc.end)) {
yp_diagnostic_list_append(&parser->error_list, message_loc.start, message_loc.end, "reserved for numbered parameter");
}
parser_lex(parser);
yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after &&=");
yp_constant_id_t constant_id = yp_parser_constant_id_location(parser, message_loc.start, message_loc.end);
yp_node_t *result = (yp_node_t *) yp_local_variable_operator_write_node_create(parser, node, &token, value, constant_id);
yp_node_destroy(parser, node);
return result;
}
yp_token_t operator = not_provided(parser);
node = parse_target(parser, node, &operator, NULL);
parser_lex(parser);
yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after the operator.");
return (yp_node_t *) yp_call_operator_write_node_create(parser, call_node, &token, value);
}
case YP_NODE_CLASS_VARIABLE_READ_NODE: {
parser_lex(parser);
yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after the operator.");
yp_node_t *result = (yp_node_t *) yp_class_variable_operator_write_node_create(parser, node, &token, value);
yp_node_destroy(parser, node);
return result;
}
case YP_NODE_CONSTANT_PATH_NODE: {
parser_lex(parser);
yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after the operator.");
return (yp_node_t *) yp_constant_path_operator_write_node_create(parser, (yp_constant_path_node_t *) node, &token, value);
}
case YP_NODE_CONSTANT_READ_NODE: {
parser_lex(parser);
yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after the operator.");
yp_node_t *result = (yp_node_t *) yp_constant_operator_write_node_create(parser, node, &token, value);
yp_node_destroy(parser, node);
return result;
}
case YP_NODE_INSTANCE_VARIABLE_READ_NODE: {
parser_lex(parser);
yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after the operator.");
yp_node_t *result = (yp_node_t *) yp_instance_variable_operator_write_node_create(parser, node, &token, value);
yp_node_destroy(parser, node);
return result;
}
case YP_NODE_LOCAL_VARIABLE_READ_NODE: {
parser_lex(parser);
yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after the operator.");
yp_constant_id_t constant_id = ((yp_local_variable_read_node_t *) node)->constant_id;
yp_node_t *result = (yp_node_t *) yp_local_variable_operator_write_node_create(parser, node, &token, value, constant_id);
yp_node_destroy(parser, node);
return result;
}
case YP_NODE_MULTI_WRITE_NODE: {
parser_lex(parser);
yp_diagnostic_list_append(&parser->error_list, token.start, token.end, "Unexpected operator.");
return node;
}
default:
parser_lex(parser);
// In this case we have an operator but we don't know what it's for.
// We need to treat it as an error. For now, we'll mark it as an error
// and just skip right past it.
yp_diagnostic_list_append(&parser->error_list, parser->previous.start, parser->previous.end, "Unexpected operator.");
return node;
}
}
case YP_TOKEN_AMPERSAND_AMPERSAND:
case YP_TOKEN_KEYWORD_AND: {
parser_lex(parser);
yp_node_t *right = parse_expression(parser, binding_power, "Expected a value after the operator.");
return (yp_node_t *) yp_and_node_create(parser, node, &token, right);
}
case YP_TOKEN_KEYWORD_OR:
case YP_TOKEN_PIPE_PIPE: {
parser_lex(parser);
yp_node_t *right = parse_expression(parser, binding_power, "Expected a value after the operator.");
return (yp_node_t *) yp_or_node_create(parser, node, &token, right);
}
case YP_TOKEN_EQUAL_TILDE: {
// Note that we _must_ parse the value before adding the local variables
// in order to properly mirror the behavior of Ruby. For example,
//
// /(?<foo>bar)/ =~ foo
//
// In this case, `foo` should be a method call and not a local yet.
parser_lex(parser);
yp_node_t *argument = parse_expression(parser, binding_power, "Expected a value after the operator.");
// If the receiver of this =~ is a regular expression node, then we need
// to introduce local variables for it based on its named capture groups.
if (node->type == YP_NODE_REGULAR_EXPRESSION_NODE) {
yp_string_list_t named_captures;
yp_string_list_init(&named_captures);
yp_location_t *content_loc = &((yp_regular_expression_node_t *) node)->content_loc;
YP_ATTRIBUTE_UNUSED bool captured_group_names =
yp_regexp_named_capture_group_names(content_loc->start, (size_t) (content_loc->end - content_loc->start), &named_captures);
// We assert that the regex was successfully parsed
assert(captured_group_names);
for (size_t index = 0; index < named_captures.length; index++) {
yp_string_t *name = &named_captures.strings[index];
assert(name->type == YP_STRING_SHARED);
yp_parser_local_add_location(parser, name->as.shared.start, name->as.shared.end);
}
yp_string_list_free(&named_captures);
}
return (yp_node_t *) yp_call_node_binary_create(parser, node, &token, argument);
}
case YP_TOKEN_BANG_EQUAL:
case YP_TOKEN_BANG_TILDE:
case YP_TOKEN_EQUAL_EQUAL:
case YP_TOKEN_EQUAL_EQUAL_EQUAL:
case YP_TOKEN_LESS_EQUAL_GREATER:
case YP_TOKEN_GREATER:
case YP_TOKEN_GREATER_EQUAL:
case YP_TOKEN_LESS:
case YP_TOKEN_LESS_EQUAL:
case YP_TOKEN_CARET:
case YP_TOKEN_PIPE:
case YP_TOKEN_AMPERSAND:
case YP_TOKEN_GREATER_GREATER:
case YP_TOKEN_LESS_LESS:
case YP_TOKEN_MINUS:
case YP_TOKEN_PLUS:
case YP_TOKEN_PERCENT:
case YP_TOKEN_SLASH:
case YP_TOKEN_STAR:
case YP_TOKEN_STAR_STAR: {
parser_lex(parser);
yp_node_t *argument = parse_expression(parser, binding_power, "Expected a value after the operator.");
return (yp_node_t *) yp_call_node_binary_create(parser, node, &token, argument);
}
case YP_TOKEN_AMPERSAND_DOT:
case YP_TOKEN_DOT: {
parser_lex(parser);
yp_token_t operator = parser->previous;
yp_arguments_t arguments = YP_EMPTY_ARGUMENTS;
// This if statement handles the foo.() syntax.
if (match_type_p(parser, YP_TOKEN_PARENTHESIS_LEFT)) {
parse_arguments_list(parser, &arguments, true);
return (yp_node_t *) yp_call_node_shorthand_create(parser, node, &operator, &arguments);
}
yp_token_t message;
switch (parser->current.type) {
case YP_CASE_OPERATOR:
case YP_CASE_KEYWORD:
case YP_TOKEN_CONSTANT:
case YP_TOKEN_IDENTIFIER: {
parser_lex(parser);
message = parser->previous;
break;
}
default: {
yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, "Expected a valid method name");
message = (yp_token_t) { .type = YP_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
}
}
parse_arguments_list(parser, &arguments, true);
yp_call_node_t *call = yp_call_node_call_create(parser, node, &operator, &message, &arguments);
if (
(previous_binding_power == YP_BINDING_POWER_STATEMENT) &&
arguments.arguments == NULL &&
arguments.opening_loc.start == NULL &&
match_type_p(parser, YP_TOKEN_COMMA)
) {
return parse_targets(parser, (yp_node_t *) call, YP_BINDING_POWER_INDEX);
} else {
return (yp_node_t *) call;
}
}
case YP_TOKEN_DOT_DOT:
case YP_TOKEN_DOT_DOT_DOT: {
parser_lex(parser);
yp_node_t *right = NULL;
if (token_begins_expression_p(parser->current.type)) {
right = parse_expression(parser, binding_power, "Expected a value after the operator.");
}
return (yp_node_t *) yp_range_node_create(parser, node, &token, right);
}
case YP_TOKEN_KEYWORD_IF_MODIFIER: {
yp_token_t keyword = parser->current;
parser_lex(parser);
yp_node_t *predicate = parse_expression(parser, binding_power, "Expected a predicate after `if'.");
return (yp_node_t *) yp_if_node_modifier_create(parser, node, &keyword, predicate);
}
case YP_TOKEN_KEYWORD_UNLESS_MODIFIER: {
yp_token_t keyword = parser->current;
parser_lex(parser);
yp_node_t *predicate = parse_expression(parser, binding_power, "Expected a predicate after `unless'.");
return (yp_node_t *) yp_unless_node_modifier_create(parser, node, &keyword, predicate);
}
case YP_TOKEN_KEYWORD_UNTIL_MODIFIER: {
parser_lex(parser);
yp_statements_node_t *statements = yp_statements_node_create(parser);
yp_statements_node_body_append(statements, node);
yp_node_t *predicate = parse_expression(parser, binding_power, "Expected a predicate after 'until'");
return (yp_node_t *) yp_until_node_create(parser, &token, predicate, statements);
}
case YP_TOKEN_KEYWORD_WHILE_MODIFIER: {
parser_lex(parser);
yp_statements_node_t *statements = yp_statements_node_create(parser);
yp_statements_node_body_append(statements, node);
yp_node_t *predicate = parse_expression(parser, binding_power, "Expected a predicate after 'while'");
return (yp_node_t *) yp_while_node_create(parser, &token, predicate, statements);
}
case YP_TOKEN_QUESTION_MARK: {
parser_lex(parser);
yp_node_t *true_expression = parse_expression(parser, YP_BINDING_POWER_DEFINED, "Expected a value after '?'");
if (parser->recovering) {
// If parsing the true expression of this ternary resulted in a syntax
// error that we can recover from, then we're going to put missing nodes
// and tokens into the remaining places. We want to be sure to do this
// before the `expect` function call to make sure it doesn't
// accidentally move past a ':' token that occurs after the syntax
// error.
yp_token_t colon = (yp_token_t) { .type = YP_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
yp_node_t *false_expression = (yp_node_t *) yp_missing_node_create(parser, colon.start, colon.end);
return (yp_node_t *) yp_if_node_ternary_create(parser, node, true_expression, &colon, false_expression);
}
accept(parser, YP_TOKEN_NEWLINE);
expect(parser, YP_TOKEN_COLON, "Expected ':' after true expression in ternary operator.");
yp_token_t colon = parser->previous;
yp_node_t *false_expression = parse_expression(parser, YP_BINDING_POWER_DEFINED, "Expected a value after ':'");
return (yp_node_t *) yp_if_node_ternary_create(parser, node, true_expression, &colon, false_expression);
}
case YP_TOKEN_COLON_COLON: {
parser_lex(parser);
yp_token_t delimiter = parser->previous;
switch (parser->current.type) {
case YP_TOKEN_CONSTANT: {
parser_lex(parser);
yp_node_t *path;
if (
(parser->current.type == YP_TOKEN_PARENTHESIS_LEFT) ||
(token_begins_expression_p(parser->current.type) || match_any_type_p(parser, 2, YP_TOKEN_USTAR, YP_TOKEN_USTAR_STAR))
) {
// If we have a constant immediately following a '::' operator, then
// this can either be a constant path or a method call, depending on
// what follows the constant.
//
// If we have parentheses, then this is a method call. That would
// look like Foo::Bar().
yp_token_t message = parser->previous;
yp_arguments_t arguments = YP_EMPTY_ARGUMENTS;
parse_arguments_list(parser, &arguments, true);
path = (yp_node_t *) yp_call_node_call_create(parser, node, &delimiter, &message, &arguments);
} else {
// Otherwise, this is a constant path. That would look like Foo::Bar.
yp_node_t *child = (yp_node_t *) yp_constant_read_node_create(parser, &parser->previous);
path = (yp_node_t *)yp_constant_path_node_create(parser, node, &delimiter, child);
}
// If this is followed by a comma then it is a multiple assignment.
if (previous_binding_power == YP_BINDING_POWER_STATEMENT && match_type_p(parser, YP_TOKEN_COMMA)) {
return parse_targets(parser, path, YP_BINDING_POWER_INDEX);
}
return path;
}
case YP_CASE_OPERATOR:
case YP_CASE_KEYWORD:
case YP_TOKEN_IDENTIFIER: {
parser_lex(parser);
// If we have an identifier following a '::' operator, then it is for
// sure a method call.
yp_arguments_t arguments = YP_EMPTY_ARGUMENTS;
parse_arguments_list(parser, &arguments, true);
yp_call_node_t *call = yp_call_node_call_create(parser, node, &delimiter, &parser->previous, &arguments);
// If this is followed by a comma then it is a multiple assignment.
if (previous_binding_power == YP_BINDING_POWER_STATEMENT && match_type_p(parser, YP_TOKEN_COMMA)) {
return parse_targets(parser, (yp_node_t *) call, YP_BINDING_POWER_INDEX);
}
return (yp_node_t *) call;
}
case YP_TOKEN_PARENTHESIS_LEFT: {
// If we have a parenthesis following a '::' operator, then it is the
// method call shorthand. That would look like Foo::(bar).
yp_arguments_t arguments = YP_EMPTY_ARGUMENTS;
parse_arguments_list(parser, &arguments, true);
return (yp_node_t *) yp_call_node_shorthand_create(parser, node, &delimiter, &arguments);
}
default: {
yp_diagnostic_list_append(&parser->error_list, delimiter.start, delimiter.end, "Expected identifier or constant after '::'");
yp_node_t *child = (yp_node_t *) yp_missing_node_create(parser, delimiter.start, delimiter.end);
return (yp_node_t *)yp_constant_path_node_create(parser, node, &delimiter, child);
}
}
}
case YP_TOKEN_KEYWORD_RESCUE_MODIFIER: {
parser_lex(parser);
accept(parser, YP_TOKEN_NEWLINE);
yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after the rescue keyword.");
return (yp_node_t *) yp_rescue_modifier_node_create(parser, node, &token, value);
}
case YP_TOKEN_BRACKET_LEFT: {
parser_lex(parser);
yp_arguments_t arguments = YP_EMPTY_ARGUMENTS;
arguments.opening_loc = ((yp_location_t) { .start = parser->previous.start, .end = parser->previous.end });
if (!accept(parser, YP_TOKEN_BRACKET_RIGHT)) {
yp_accepts_block_stack_push(parser, true);
arguments.arguments = yp_arguments_node_create(parser);
parse_arguments(parser, arguments.arguments, false, YP_TOKEN_BRACKET_RIGHT);
yp_accepts_block_stack_pop(parser);
expect(parser, YP_TOKEN_BRACKET_RIGHT, "Expected ']' to close the bracket expression.");
}
arguments.closing_loc = ((yp_location_t) { .start = parser->previous.start, .end = parser->previous.end });
// If we have a comma after the closing bracket then this is a multiple
// assignment and we should parse the targets.
if (previous_binding_power == YP_BINDING_POWER_STATEMENT && match_type_p(parser, YP_TOKEN_COMMA)) {
yp_call_node_t *aref = yp_call_node_aref_create(parser, node, &arguments);
return parse_targets(parser, (yp_node_t *) aref, YP_BINDING_POWER_INDEX);
}
// If we're at the end of the arguments, we can now check if there is a
// block node that starts with a {. If there is, then we can parse it and
// add it to the arguments.
if (accept(parser, YP_TOKEN_BRACE_LEFT)) {
arguments.block = parse_block(parser);
} else if (yp_accepts_block_stack_p(parser) && accept(parser, YP_TOKEN_KEYWORD_DO)) {
arguments.block = parse_block(parser);
}
return (yp_node_t *) yp_call_node_aref_create(parser, node, &arguments);
}
case YP_TOKEN_KEYWORD_IN: {
bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
parser->pattern_matching_newlines = true;
yp_token_t operator = parser->current;
parser->command_start = false;
lex_state_set(parser, YP_LEX_STATE_BEG | YP_LEX_STATE_LABEL);
parser_lex(parser);
yp_node_t *pattern = parse_pattern(parser, true, "Expected a pattern after `in'.");
parser->pattern_matching_newlines = previous_pattern_matching_newlines;
return (yp_node_t *) yp_match_predicate_node_create(parser, node, pattern, &operator);
}
case YP_TOKEN_EQUAL_GREATER: {
bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
parser->pattern_matching_newlines = true;
yp_token_t operator = parser->current;
parser->command_start = false;
lex_state_set(parser, YP_LEX_STATE_BEG | YP_LEX_STATE_LABEL);
parser_lex(parser);
yp_node_t *pattern = parse_pattern(parser, true, "Expected a pattern after `=>'.");
parser->pattern_matching_newlines = previous_pattern_matching_newlines;
return (yp_node_t *) yp_match_required_node_create(parser, node, pattern, &operator);
}
default:
assert(false && "unreachable");
return NULL;
}
}
// Parse an expression at the given point of the parser using the given binding
// power to parse subsequent chains. If this function finds a syntax error, it
// will append the error message to the parser's error list.
//
// Consumers of this function should always check parser->recovering to
// determine if they need to perform additional cleanup.
static yp_node_t *
parse_expression(yp_parser_t *parser, yp_binding_power_t binding_power, const char *message) {
yp_token_t recovery = parser->previous;
yp_node_t *node = parse_expression_prefix(parser, binding_power);
// If we found a syntax error, then the type of node returned by
// parse_expression_prefix is going to be a missing node. In that case we need
// to add the error message to the parser's error list.
if (node->type == YP_NODE_MISSING_NODE) {
yp_diagnostic_list_append(&parser->error_list, recovery.end, recovery.end, message);
return node;
}
// Otherwise we'll look and see if the next token can be parsed as an infix
// operator. If it can, then we'll parse it using parse_expression_infix.
yp_binding_powers_t current_binding_powers;
while (
current_binding_powers = yp_binding_powers[parser->current.type],
binding_power <= current_binding_powers.left &&
current_binding_powers.binary
) {
node = parse_expression_infix(parser, node, binding_power, current_binding_powers.right);
}
return node;
}
static yp_node_t *
parse_program(yp_parser_t *parser) {
yp_parser_scope_push(parser, true);
parser_lex(parser);
yp_statements_node_t *statements = parse_statements(parser, YP_CONTEXT_MAIN);
if (!statements) {
statements = yp_statements_node_create(parser);
}
yp_constant_id_list_t locals = parser->current_scope->locals;
yp_parser_scope_pop(parser);
// If this is an empty file, then we're still going to parse all of the
// statements in order to gather up all of the comments and such. Here we'll
// correct the location information.
if (yp_statements_node_body_length(statements) == 0) {
yp_statements_node_location_set(statements, parser->start, parser->start);
}
return (yp_node_t *) yp_program_node_create(parser, &locals, statements);
}
/******************************************************************************/
/* External functions */
/******************************************************************************/
// Initialize a parser with the given start and end pointers.
YP_EXPORTED_FUNCTION void
yp_parser_init(yp_parser_t *parser, const char *source, size_t size, const char *filepath) {
// Set filepath to the file that was passed
if (!filepath) filepath = "";
yp_string_t filepath_string;
yp_string_constant_init(&filepath_string, filepath, strlen(filepath));
*parser = (yp_parser_t) {
.lex_state = YP_LEX_STATE_BEG,
.command_start = true,
.enclosure_nesting = 0,
.lambda_enclosure_nesting = -1,
.brace_nesting = 0,
.lex_modes = {
.index = 0,
.stack = {{ .mode = YP_LEX_DEFAULT }},
.current = &parser->lex_modes.stack[0],
},
.start = source,
.end = source + size,
.previous = { .type = YP_TOKEN_EOF, .start = source, .end = source },
.current = { .type = YP_TOKEN_EOF, .start = source, .end = source },
.next_start = NULL,
.heredoc_end = NULL,
.current_scope = NULL,
.current_context = NULL,
.recovering = false,
.encoding = yp_encoding_utf_8,
.encoding_changed = false,
.encoding_changed_callback = NULL,
.encoding_decode_callback = NULL,
.encoding_comment_start = source,
.lex_callback = NULL,
.pattern_matching_newlines = false,
.in_keyword_arg = false,
.filepath_string = filepath_string,
};
yp_state_stack_init(&parser->do_loop_stack);
yp_state_stack_init(&parser->accepts_block_stack);
yp_accepts_block_stack_push(parser, true);
yp_list_init(&parser->warning_list);
yp_list_init(&parser->error_list);
yp_list_init(&parser->comment_list);
// Initialize the constant pool. We're going to completely guess as to the
// number of constants that we'll need based on the size of the input. The
// ratio we chose here is actually less arbitrary than you might think.
//
// We took ~50K Ruby files and measured the size of the file versus the
// number of constants that were found in those files. Then we found the
// average and standard deviation of the ratios of constants/bytesize. Then
// we added 1.34 standard deviations to the average to get a ratio that
// would fit 75% of the files (for a two-tailed distribution). This works
// because there was about a 0.77 correlation and the distribution was
// roughly normal.
//
// This ratio will need to change if we add more constants to the constant
// pool for another node type.
size_t constant_size = size / 95;
yp_constant_pool_init(&parser->constant_pool, constant_size < 4 ? 4 : constant_size);
// Initialize the newline list. Similar to the constant pool, we're going to
// guess at the number of newlines that we'll need based on the size of the
// input.
size_t newline_size = size / 22;
yp_newline_list_init(&parser->newline_list, source, newline_size < 4 ? 4 : newline_size);
assert(source != NULL);
if (size >= 3 && (unsigned char) source[0] == 0xef && (unsigned char) source[1] == 0xbb && (unsigned char) source[2] == 0xbf) {
// If the first three bytes of the source are the UTF-8 BOM, then we'll skip
// over them.
parser->current.end += 3;
} else if (size >= 2 && source[0] == '#' && source[1] == '!') {
// If the first two bytes of the source are a shebang, then we'll indicate
// that the encoding comment is at the end of the shebang.
const char *encoding_comment_start = next_newline(source, (ptrdiff_t) size);
if (encoding_comment_start) {
parser->encoding_comment_start = encoding_comment_start + 1;
}
}
}
// Register a callback that will be called whenever YARP changes the encoding it
// is using to parse based on the magic comment.
YP_EXPORTED_FUNCTION void
yp_parser_register_encoding_changed_callback(yp_parser_t *parser, yp_encoding_changed_callback_t callback) {
parser->encoding_changed_callback = callback;
}
// Register a callback that will be called when YARP encounters a magic comment
// with an encoding referenced that it doesn't understand. The callback should
// return NULL if it also doesn't understand the encoding or it should return a
// pointer to a yp_encoding_t struct that contains the functions necessary to
// parse identifiers.
YP_EXPORTED_FUNCTION void
yp_parser_register_encoding_decode_callback(yp_parser_t *parser, yp_encoding_decode_callback_t callback) {
parser->encoding_decode_callback = callback;
}
// Free all of the memory associated with the comment list.
static inline void
yp_comment_list_free(yp_list_t *list) {
yp_list_node_t *node, *next;
for (node = list->head; node != NULL; node = next) {
next = node->next;
yp_comment_t *comment = (yp_comment_t *) node;
free(comment);
}
}
// Free any memory associated with the given parser.
YP_EXPORTED_FUNCTION void
yp_parser_free(yp_parser_t *parser) {
yp_string_free(&parser->filepath_string);
yp_diagnostic_list_free(&parser->error_list);
yp_diagnostic_list_free(&parser->warning_list);
yp_comment_list_free(&parser->comment_list);
yp_constant_pool_free(&parser->constant_pool);
yp_newline_list_free(&parser->newline_list);
while (parser->lex_modes.index >= YP_LEX_STACK_SIZE) {
lex_mode_pop(parser);
}
}
// Parse the Ruby source associated with the given parser and return the tree.
YP_EXPORTED_FUNCTION yp_node_t *
yp_parse(yp_parser_t *parser) {
return parse_program(parser);
}
YP_EXPORTED_FUNCTION void
yp_serialize(yp_parser_t *parser, yp_node_t *node, yp_buffer_t *buffer) {
yp_buffer_append_str(buffer, "YARP", 4);
yp_buffer_append_u8(buffer, YP_VERSION_MAJOR);
yp_buffer_append_u8(buffer, YP_VERSION_MINOR);
yp_buffer_append_u8(buffer, YP_VERSION_PATCH);
yp_serialize_content(parser, node, buffer);
yp_buffer_append_str(buffer, "\0", 1);
}
// Parse and serialize the AST represented by the given source to the given
// buffer.
YP_EXPORTED_FUNCTION void
yp_parse_serialize(const char *source, size_t size, yp_buffer_t *buffer) {
yp_parser_t parser;
yp_parser_init(&parser, source, size, NULL);
yp_node_t *node = yp_parse(&parser);
yp_serialize(&parser, node, buffer);
yp_node_destroy(&parser, node);
yp_parser_free(&parser);
}
#undef YP_CASE_KEYWORD
#undef YP_CASE_OPERATOR
#undef YP_CASE_WRITABLE