[ruby/prism] Use MatchWriteNode on split InterpolatedREN

ee54244800
This commit is contained in:
Kevin Newton 2023-10-23 14:31:30 -04:00 committed by git
parent 9c5b084c0a
commit a8af5d3808
4 changed files with 176 additions and 72 deletions

View file

@ -14570,6 +14570,50 @@ parse_call_operator_write(pm_parser_t *parser, pm_call_node_t *call_node, const
} }
} }
// Potentially change a =~ with a regular expression with named captures into a
// match write node.
static pm_node_t *
parse_regular_expression_named_captures(pm_parser_t *parser, const pm_string_t *content, pm_call_node_t *call) {
pm_string_list_t named_captures;
pm_string_list_init(&named_captures);
pm_node_t *result;
if (pm_regexp_named_capture_group_names(pm_string_source(content), pm_string_length(content), &named_captures, parser->encoding_changed, &parser->encoding) && (named_captures.length > 0)) {
pm_match_write_node_t *match = pm_match_write_node_create(parser, call);
for (size_t index = 0; index < named_captures.length; index++) {
pm_string_t *name = &named_captures.strings[index];
pm_constant_id_t local;
if (content->type == PM_STRING_SHARED) {
// If the unescaped string is a slice of the source,
// then we can copy the names directly. The pointers
// will line up.
local = pm_parser_local_add_location(parser, name->source, name->source + name->length);
} else {
// Otherwise, the name is a slice of the malloc-ed
// owned string, in which case we need to copy it
// out into a new string.
size_t length = pm_string_length(name);
void *memory = malloc(length);
memcpy(memory, pm_string_source(name), length);
local = pm_parser_local_add_owned(parser, (const uint8_t *) memory, length);
}
pm_constant_id_list_append(&match->locals, local);
}
result = (pm_node_t *) match;
} else {
result = (pm_node_t *) call;
}
pm_string_list_free(&named_captures);
return result;
}
static inline pm_node_t * static inline pm_node_t *
parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power) { parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power) {
pm_token_t token = parser->current; pm_token_t token = parser->current;
@ -14995,42 +15039,51 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
// If the receiver of this =~ is a regular expression node, then we // If the receiver of this =~ is a regular expression node, then we
// need to introduce local variables for it based on its named // need to introduce local variables for it based on its named
// capture groups. // capture groups.
if (PM_NODE_TYPE_P(node, PM_REGULAR_EXPRESSION_NODE)) { if (PM_NODE_TYPE_P(node, PM_INTERPOLATED_REGULAR_EXPRESSION_NODE)) {
pm_string_list_t named_captures; // It's possible to have an interpolated regular expression node
pm_string_list_init(&named_captures); // that only contains strings. This is because it can be split
// up by a heredoc. In this case we need to concat the unescaped
// strings together and then parse them as a regular expression.
pm_node_list_t *parts = &((pm_interpolated_regular_expression_node_t *) node)->parts;
const pm_string_t *unescaped = &((pm_regular_expression_node_t *) node)->unescaped; bool interpolated = false;
if (pm_regexp_named_capture_group_names(pm_string_source(unescaped), pm_string_length(unescaped), &named_captures, parser->encoding_changed, &parser->encoding) && (named_captures.length > 0)) { size_t total_length = 0;
pm_match_write_node_t *match = pm_match_write_node_create(parser, call);
for (size_t index = 0; index < named_captures.length; index++) { for (size_t index = 0; index < parts->size; index++) {
pm_string_t *name = &named_captures.strings[index]; pm_node_t *part = parts->nodes[index];
pm_constant_id_t local;
if (unescaped->type == PM_STRING_SHARED) { if (PM_NODE_TYPE_P(part, PM_STRING_NODE)) {
// If the unescaped string is a slice of the source, total_length += pm_string_length(&((pm_string_node_t *) part)->unescaped);
// then we can copy the names directly. The pointers } else {
// will line up. interpolated = true;
local = pm_parser_local_add_location(parser, name->source, name->source + name->length); break;
} else {
// Otherwise, the name is a slice of the malloc-ed
// owned string, in which case we need to copy it
// out into a new string.
size_t length = pm_string_length(name);
void *memory = malloc(length);
memcpy(memory, pm_string_source(name), length);
local = pm_parser_local_add_owned(parser, (const uint8_t *) memory, length);
}
pm_constant_id_list_append(&match->locals, local);
} }
result = (pm_node_t *) match;
} }
pm_string_list_free(&named_captures); if (!interpolated) {
void *memory = malloc(total_length);
if (!memory) abort();
uint8_t *cursor = memory;
for (size_t index = 0; index < parts->size; index++) {
pm_string_t *unescaped = &((pm_string_node_t *) parts->nodes[index])->unescaped;
size_t length = pm_string_length(unescaped);
memcpy(cursor, pm_string_source(unescaped), length);
cursor += length;
}
pm_string_t owned;
pm_string_owned_init(&owned, (uint8_t *) memory, total_length);
result = parse_regular_expression_named_captures(parser, &owned, call);
pm_string_free(&owned);
}
} else if (PM_NODE_TYPE_P(node, PM_REGULAR_EXPRESSION_NODE)) {
// If we have a regular expression node, then we can just parse
// the named captures directly off the unescaped string.
const pm_string_t *content = &((pm_regular_expression_node_t *) node)->unescaped;
result = parse_regular_expression_named_captures(parser, content, call);
} }
return result; return result;

View file

@ -188,6 +188,8 @@ pm_regexp_parse_range_quantifier(pm_regexp_parser_t *parser) {
// ; // ;
static bool static bool
pm_regexp_parse_quantifier(pm_regexp_parser_t *parser) { pm_regexp_parse_quantifier(pm_regexp_parser_t *parser) {
if (pm_regexp_char_is_eof(parser)) return true;
switch (*parser->cursor) { switch (*parser->cursor) {
case '*': case '*':
case '+': case '+':

View file

@ -49,3 +49,7 @@ pp <<-A, %I[p\
o o
A A
p] p]
<<A; /\
A
(?<a>)/ =~ ''

View file

@ -1,8 +1,8 @@
@ ProgramNode (location: (4,0)-(51,2)) @ ProgramNode (location: (4,0)-(55,13))
├── locals: [] ├── locals: [:a]
└── statements: └── statements:
@ StatementsNode (location: (4,0)-(51,2)) @ StatementsNode (location: (4,0)-(55,13))
└── body: (length: 8) └── body: (length: 10)
├── @ CallNode (location: (4,0)-(7,7)) ├── @ CallNode (location: (4,0)-(7,7))
│ ├── receiver: ∅ │ ├── receiver: ∅
│ ├── call_operator_loc: ∅ │ ├── call_operator_loc: ∅
@ -270,41 +270,86 @@
│ ├── block: ∅ │ ├── block: ∅
│ ├── flags: ∅ │ ├── flags: ∅
│ └── name: :pp │ └── name: :pp
└── @ CallNode (location: (48,0)-(51,2)) ├── @ CallNode (location: (48,0)-(51,2))
├── receiver: ∅ │ ├── receiver: ∅
├── call_operator_loc: ∅ │ ├── call_operator_loc: ∅
├── message_loc: (48,0)-(48,2) = "pp" │ ├── message_loc: (48,0)-(48,2) = "pp"
├── opening_loc: ∅ │ ├── opening_loc: ∅
├── arguments: │ ├── arguments:
│ @ ArgumentsNode (location: (48,3)-(51,2)) │ │ @ ArgumentsNode (location: (48,3)-(51,2))
│ ├── arguments: (length: 2) │ │ ├── arguments: (length: 2)
│ │ ├── @ StringNode (location: (48,3)-(48,7)) │ │ │ ├── @ StringNode (location: (48,3)-(48,7))
│ │ │ ├── flags: ∅ │ │ │ │ ├── flags: ∅
│ │ │ ├── opening_loc: (48,3)-(48,7) = "<<-A" │ │ │ │ ├── opening_loc: (48,3)-(48,7) = "<<-A"
│ │ │ ├── content_loc: (49,0)-(50,0) = "o\n" │ │ │ │ ├── content_loc: (49,0)-(50,0) = "o\n"
│ │ │ ├── closing_loc: (50,0)-(51,0) = "A\n" │ │ │ │ ├── closing_loc: (50,0)-(51,0) = "A\n"
│ │ │ └── unescaped: "o\n" │ │ │ │ └── unescaped: "o\n"
│ │ └── @ ArrayNode (location: (48,9)-(51,2)) │ │ │ └── @ ArrayNode (location: (48,9)-(51,2))
│ │ ├── elements: (length: 1) │ │ │ ├── elements: (length: 1)
│ │ │ └── @ InterpolatedSymbolNode (location: (48,12)-(48,14)) │ │ │ │ └── @ InterpolatedSymbolNode (location: (48,12)-(48,14))
│ │ │ ├── opening_loc: ∅ │ │ │ │ ├── opening_loc: ∅
│ │ │ ├── parts: (length: 2) │ │ │ │ ├── parts: (length: 2)
│ │ │ │ ├── @ SymbolNode (location: (48,12)-(48,14)) │ │ │ │ │ ├── @ SymbolNode (location: (48,12)-(48,14))
│ │ │ │ │ ├── opening_loc: ∅ │ │ │ │ │ │ ├── opening_loc: ∅
│ │ │ │ │ ├── value_loc: (48,12)-(48,14) = "p\\" │ │ │ │ │ │ ├── value_loc: (48,12)-(48,14) = "p\\"
│ │ │ │ │ ├── closing_loc: ∅ │ │ │ │ │ │ ├── closing_loc: ∅
│ │ │ │ │ └── unescaped: "p\n" │ │ │ │ │ │ └── unescaped: "p\n"
│ │ │ │ └── @ StringNode (location: (48,12)-(48,14)) │ │ │ │ │ └── @ StringNode (location: (48,12)-(48,14))
│ │ │ │ ├── flags: ∅ │ │ │ │ │ ├── flags: ∅
│ │ │ │ ├── opening_loc: ∅ │ │ │ │ │ ├── opening_loc: ∅
│ │ │ │ ├── content_loc: (48,12)-(48,14) = "p\\" │ │ │ │ │ ├── content_loc: (48,12)-(48,14) = "p\\"
│ │ │ │ ├── closing_loc: ∅ │ │ │ │ │ ├── closing_loc: ∅
│ │ │ │ └── unescaped: "p" │ │ │ │ │ └── unescaped: "p"
│ │ │ └── closing_loc: ∅ │ │ │ │ └── closing_loc: ∅
│ │ ├── opening_loc: (48,9)-(48,12) = "%I[" │ │ │ ├── opening_loc: (48,9)-(48,12) = "%I["
│ │ └── closing_loc: (51,1)-(51,2) = "]" │ │ │ └── closing_loc: (51,1)-(51,2) = "]"
│ └── flags: ∅ │ │ └── flags: ∅
├── closing_loc: ∅ │ ├── closing_loc: ∅
├── block: ∅ │ ├── block: ∅
├── flags: ∅ │ ├── flags: ∅
└── name: :pp │ └── name: :pp
├── @ StringNode (location: (53,0)-(53,3))
│ ├── flags: ∅
│ ├── opening_loc: (53,0)-(53,3) = "<<A"
│ ├── content_loc: (54,0)-(54,0) = ""
│ ├── closing_loc: (54,0)-(55,0) = "A\n"
│ └── unescaped: ""
└── @ MatchWriteNode (location: (53,5)-(55,13))
├── call:
│ @ CallNode (location: (53,5)-(55,13))
│ ├── receiver:
│ │ @ InterpolatedRegularExpressionNode (location: (53,5)-(55,7))
│ │ ├── opening_loc: (53,5)-(53,6) = "/"
│ │ ├── parts: (length: 2)
│ │ │ ├── @ StringNode (location: (53,6)-(53,7))
│ │ │ │ ├── flags: ∅
│ │ │ │ ├── opening_loc: ∅
│ │ │ │ ├── content_loc: (53,6)-(53,7) = "\\"
│ │ │ │ ├── closing_loc: ∅
│ │ │ │ └── unescaped: ""
│ │ │ └── @ StringNode (location: (55,0)-(55,6))
│ │ │ ├── flags: ∅
│ │ │ ├── opening_loc: ∅
│ │ │ ├── content_loc: (55,0)-(55,6) = "(?<a>)"
│ │ │ ├── closing_loc: ∅
│ │ │ └── unescaped: "(?<a>)"
│ │ ├── closing_loc: (55,6)-(55,7) = "/"
│ │ └── flags: ∅
│ ├── call_operator_loc: ∅
│ ├── message_loc: (55,8)-(55,10) = "=~"
│ ├── opening_loc: ∅
│ ├── arguments:
│ │ @ ArgumentsNode (location: (55,11)-(55,13))
│ │ ├── arguments: (length: 1)
│ │ │ └── @ StringNode (location: (55,11)-(55,13))
│ │ │ ├── flags: ∅
│ │ │ ├── opening_loc: (55,11)-(55,12) = "'"
│ │ │ ├── content_loc: (55,12)-(55,12) = ""
│ │ │ ├── closing_loc: (55,12)-(55,13) = "'"
│ │ │ └── unescaped: ""
│ │ └── flags: ∅
│ ├── closing_loc: ∅
│ ├── block: ∅
│ ├── flags: ∅
│ └── name: :=~
└── locals: [:a]