[ruby/yarp] Introduce MatchWriteNode

This rarely used node holds information about the local variables
that need to get written in the case a regular expression is used on
the left-hand side of a =~ operator and it has named capture groups.

Note that we already "handled" these nodes by adding locals to the
AST, but we didn't actually expose this information, making it
difficult to compile.

The general idea behind this node is that it maintains the ability
for consumers to find all of the call nodes in the tree easily so
it's not flattening down. However, it should be okay because you
hopefully don't need any information in the call node to determine
what to compile because the locals list is on the top level.

e136e7f9a8
This commit is contained in:
Kevin Newton 2023-09-14 09:01:20 -04:00 committed by git
parent 9d2549ac31
commit 0a8f3670d1
5 changed files with 109 additions and 59 deletions

View file

@ -545,6 +545,10 @@ module YARP
assert_location(MatchRequiredNode, "foo => bar")
end
def test_MatchWriteNode
assert_location(MatchWriteNode, "/(?<foo>)/ =~ foo")
end
def test_ModuleNode
assert_location(ModuleNode, "module Foo end")
end

View file

@ -84,34 +84,37 @@
│ └── flags: ∅
├── @ ArrayNode (location: (59...86))
│ ├── elements: (length: 2)
│ │ ├── @ CallNode (location: (60...80))
│ │ │ ├── receiver:
│ │ │ │ @ RegularExpressionNode (location: (60...73))
│ │ │ │ ├── opening_loc: (60...61) = "/"
│ │ │ │ ├── content_loc: (61...72) = "(?<foo>bar)"
│ │ │ │ ├── closing_loc: (72...73) = "/"
│ │ │ │ ├── unescaped: "(?<foo>bar)"
│ │ │ │ └── flags: ∅
│ │ │ ├── call_operator_loc: ∅
│ │ │ ├── message_loc: (74...76) = "=~"
│ │ │ ├── opening_loc: ∅
│ │ │ ├── arguments:
│ │ │ │ @ ArgumentsNode (location: (77...80))
│ │ │ │ └── arguments: (length: 1)
│ │ │ │ └── @ CallNode (location: (77...80))
│ │ │ │ ├── receiver: ∅
│ │ │ │ ├── call_operator_loc: ∅
│ │ │ │ ├── message_loc: (77...80) = "baz"
│ │ │ │ ├── opening_loc: ∅
│ │ │ │ ├── arguments: ∅
│ │ │ │ ├── closing_loc: ∅
│ │ │ │ ├── block: ∅
│ │ │ │ ├── flags: variable_call
│ │ │ │ └── name: "baz"
│ │ │ ├── closing_loc: ∅
│ │ │ ├── block: ∅
│ │ │ ├── flags: ∅
│ │ │ └── name: "=~"
│ │ ├── @ MatchWriteNode (location: (60...80))
│ │ │ ├── call:
│ │ │ │ @ CallNode (location: (60...80))
│ │ │ │ ├── receiver:
│ │ │ │ │ @ RegularExpressionNode (location: (60...73))
│ │ │ │ │ ├── opening_loc: (60...61) = "/"
│ │ │ │ │ ├── content_loc: (61...72) = "(?<foo>bar)"
│ │ │ │ │ ├── closing_loc: (72...73) = "/"
│ │ │ │ │ ├── unescaped: "(?<foo>bar)"
│ │ │ │ │ └── flags: ∅
│ │ │ │ ├── call_operator_loc: ∅
│ │ │ │ ├── message_loc: (74...76) = "=~"
│ │ │ │ ├── opening_loc: ∅
│ │ │ │ ├── arguments:
│ │ │ │ │ @ ArgumentsNode (location: (77...80))
│ │ │ │ │ └── arguments: (length: 1)
│ │ │ │ │ └── @ CallNode (location: (77...80))
│ │ │ │ │ ├── receiver: ∅
│ │ │ │ │ ├── call_operator_loc: ∅
│ │ │ │ │ ├── message_loc: (77...80) = "baz"
│ │ │ │ │ ├── opening_loc: ∅
│ │ │ │ │ ├── arguments: ∅
│ │ │ │ │ ├── closing_loc: ∅
│ │ │ │ │ ├── block: ∅
│ │ │ │ │ ├── flags: variable_call
│ │ │ │ │ └── name: "baz"
│ │ │ │ ├── closing_loc: ∅
│ │ │ │ ├── block: ∅
│ │ │ │ ├── flags: ∅
│ │ │ │ └── name: "=~"
│ │ │ └── locals: [:foo]
│ │ └── @ LocalVariableReadNode (location: (82...85))
│ │ ├── name: :foo
│ │ └── depth: 0

View file

@ -3,30 +3,33 @@
└── statements:
@ StatementsNode (location: (0...31))
└── body: (length: 2)
├── @ CallNode (location: (0...24))
│ ├── receiver:
│ │ @ RegularExpressionNode (location: (0...15))
│ │ ├── opening_loc: (0...1) = "/"
│ │ ├── content_loc: (1...14) = "(?<match>bar)"
│ │ ├── closing_loc: (14...15) = "/"
│ │ ├── unescaped: "(?<match>bar)"
│ │ └── flags: ∅
│ ├── call_operator_loc: ∅
│ ├── message_loc: (16...18) = "=~"
│ ├── opening_loc: ∅
│ ├── arguments:
│ │ @ ArgumentsNode (location: (19...24))
│ │ └── arguments: (length: 1)
│ │ └── @ StringNode (location: (19...24))
│ │ ├── flags: ∅
│ │ ├── opening_loc: (19...20) = "'"
│ │ ├── content_loc: (20...23) = "bar"
│ │ ├── closing_loc: (23...24) = "'"
│ │ └── unescaped: "bar"
│ ├── closing_loc: ∅
│ ├── block: ∅
│ ├── flags: ∅
│ └── name: "=~"
├── @ MatchWriteNode (location: (0...24))
│ ├── call:
│ │ @ CallNode (location: (0...24))
│ │ ├── receiver:
│ │ │ @ RegularExpressionNode (location: (0...15))
│ │ │ ├── opening_loc: (0...1) = "/"
│ │ │ ├── content_loc: (1...14) = "(?<match>bar)"
│ │ │ ├── closing_loc: (14...15) = "/"
│ │ │ ├── unescaped: "(?<match>bar)"
│ │ │ └── flags: ∅
│ │ ├── call_operator_loc: ∅
│ │ ├── message_loc: (16...18) = "=~"
│ │ ├── opening_loc: ∅
│ │ ├── arguments:
│ │ │ @ ArgumentsNode (location: (19...24))
│ │ │ └── arguments: (length: 1)
│ │ │ └── @ StringNode (location: (19...24))
│ │ │ ├── flags: ∅
│ │ │ ├── opening_loc: (19...20) = "'"
│ │ │ ├── content_loc: (20...23) = "bar"
│ │ │ ├── closing_loc: (23...24) = "'"
│ │ │ └── unescaped: "bar"
│ │ ├── closing_loc: ∅
│ │ ├── block: ∅
│ │ ├── flags: ∅
│ │ └── name: "=~"
│ └── locals: [:match]
└── @ LocalVariableReadNode (location: (26...31))
├── name: :match
└── depth: 0

View file

@ -1798,6 +1798,19 @@ nodes:
foo => bar
^^^^^^^^^^
- name: MatchWriteNode
fields:
- name: call
type: node
kind: CallNode
- name: locals
type: constant[]
comment: |
Represents writing local variables using a regular expression match with
named capture groups.
/(?<foo>bar)/ =~ baz
^^^^^^^^^^^^^^^^^^^^
- name: MissingNode
comment: |
Represents a node that is missing from the source and results in a syntax

View file

@ -3371,6 +3371,23 @@ yp_match_required_node_create(yp_parser_t *parser, yp_node_t *value, yp_node_t *
return node;
}
// Allocate and initialize a new MatchWriteNode node.
static yp_match_write_node_t *
yp_match_write_node_create(yp_parser_t *parser, yp_call_node_t *call) {
yp_match_write_node_t *node = YP_ALLOC_NODE(parser, yp_match_write_node_t);
*node = (yp_match_write_node_t) {
{
.type = YP_MATCH_WRITE_NODE,
.location = call->base.location
},
.call = call
};
yp_constant_id_list_init(&node->locals);
return node;
}
// Allocate a new ModuleNode node.
static yp_module_node_t *
yp_module_node_create(yp_parser_t *parser, yp_constant_id_list_t *locals, const yp_token_t *module_keyword, yp_node_t *constant_path, const yp_token_t *name, yp_node_t *body, const yp_token_t *end_keyword) {
@ -13612,8 +13629,9 @@ parse_expression_infix(yp_parser_t *parser, yp_node_t *node, yp_binding_power_t
return (yp_node_t *) yp_or_node_create(parser, node, &token, right);
}
case YP_TOKEN_EQUAL_TILDE: {
// Note that we _must_ parse the value before adding the local variables
// in order to properly mirror the behavior of Ruby. For example,
// Note that we _must_ parse the value before adding the local
// variables in order to properly mirror the behavior of Ruby. For
// example,
//
// /(?<foo>bar)/ =~ foo
//
@ -13621,27 +13639,36 @@ parse_expression_infix(yp_parser_t *parser, yp_node_t *node, yp_binding_power_t
parser_lex(parser);
yp_node_t *argument = parse_expression(parser, binding_power, YP_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
// If the receiver of this =~ is a regular expression node, then we need
// to introduce local variables for it based on its named capture groups.
// By default, we're going to create a call node and then return it.
yp_call_node_t *call = yp_call_node_binary_create(parser, node, &token, argument);
yp_node_t *result = (yp_node_t *) call;
// If the receiver of this =~ is a regular expression node, then we
// need to introduce local variables for it based on its named
// capture groups.
if (YP_NODE_TYPE_P(node, YP_REGULAR_EXPRESSION_NODE)) {
yp_string_list_t named_captures;
yp_string_list_init(&named_captures);
const yp_location_t *content_loc = &((yp_regular_expression_node_t *) node)->content_loc;
if (yp_regexp_named_capture_group_names(content_loc->start, (size_t) (content_loc->end - content_loc->start), &named_captures, parser->encoding_changed, &parser->encoding) && (named_captures.length > 0)) {
yp_match_write_node_t *match = yp_match_write_node_create(parser, call);
if (yp_regexp_named_capture_group_names(content_loc->start, (size_t) (content_loc->end - content_loc->start), &named_captures, parser->encoding_changed, &parser->encoding)) {
for (size_t index = 0; index < named_captures.length; index++) {
yp_string_t *name = &named_captures.strings[index];
assert(name->type == YP_STRING_SHARED);
yp_parser_local_add_location(parser, name->source, name->source + name->length);
yp_constant_id_t local = yp_parser_local_add_location(parser, name->source, name->source + name->length);
yp_constant_id_list_append(&match->locals, local);
}
result = (yp_node_t *) match;
}
yp_string_list_free(&named_captures);
}
return (yp_node_t *) yp_call_node_binary_create(parser, node, &token, argument);
return result;
}
case YP_TOKEN_UAMPERSAND:
case YP_TOKEN_USTAR: