mirror of
https://github.com/ruby/ruby.git
synced 2025-08-27 15:06:10 +02:00
parent
9c5b084c0a
commit
a8af5d3808
4 changed files with 176 additions and 72 deletions
113
prism/prism.c
113
prism/prism.c
|
@ -14570,6 +14570,50 @@ parse_call_operator_write(pm_parser_t *parser, pm_call_node_t *call_node, const
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Potentially change a =~ with a regular expression with named captures into a
|
||||||
|
// match write node.
|
||||||
|
static pm_node_t *
|
||||||
|
parse_regular_expression_named_captures(pm_parser_t *parser, const pm_string_t *content, pm_call_node_t *call) {
|
||||||
|
pm_string_list_t named_captures;
|
||||||
|
pm_string_list_init(&named_captures);
|
||||||
|
|
||||||
|
pm_node_t *result;
|
||||||
|
if (pm_regexp_named_capture_group_names(pm_string_source(content), pm_string_length(content), &named_captures, parser->encoding_changed, &parser->encoding) && (named_captures.length > 0)) {
|
||||||
|
pm_match_write_node_t *match = pm_match_write_node_create(parser, call);
|
||||||
|
|
||||||
|
for (size_t index = 0; index < named_captures.length; index++) {
|
||||||
|
pm_string_t *name = &named_captures.strings[index];
|
||||||
|
pm_constant_id_t local;
|
||||||
|
|
||||||
|
if (content->type == PM_STRING_SHARED) {
|
||||||
|
// If the unescaped string is a slice of the source,
|
||||||
|
// then we can copy the names directly. The pointers
|
||||||
|
// will line up.
|
||||||
|
local = pm_parser_local_add_location(parser, name->source, name->source + name->length);
|
||||||
|
} else {
|
||||||
|
// Otherwise, the name is a slice of the malloc-ed
|
||||||
|
// owned string, in which case we need to copy it
|
||||||
|
// out into a new string.
|
||||||
|
size_t length = pm_string_length(name);
|
||||||
|
|
||||||
|
void *memory = malloc(length);
|
||||||
|
memcpy(memory, pm_string_source(name), length);
|
||||||
|
|
||||||
|
local = pm_parser_local_add_owned(parser, (const uint8_t *) memory, length);
|
||||||
|
}
|
||||||
|
|
||||||
|
pm_constant_id_list_append(&match->locals, local);
|
||||||
|
}
|
||||||
|
|
||||||
|
result = (pm_node_t *) match;
|
||||||
|
} else {
|
||||||
|
result = (pm_node_t *) call;
|
||||||
|
}
|
||||||
|
|
||||||
|
pm_string_list_free(&named_captures);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
static inline pm_node_t *
|
static inline pm_node_t *
|
||||||
parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power) {
|
parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power) {
|
||||||
pm_token_t token = parser->current;
|
pm_token_t token = parser->current;
|
||||||
|
@ -14995,42 +15039,51 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
||||||
// If the receiver of this =~ is a regular expression node, then we
|
// If the receiver of this =~ is a regular expression node, then we
|
||||||
// need to introduce local variables for it based on its named
|
// need to introduce local variables for it based on its named
|
||||||
// capture groups.
|
// capture groups.
|
||||||
if (PM_NODE_TYPE_P(node, PM_REGULAR_EXPRESSION_NODE)) {
|
if (PM_NODE_TYPE_P(node, PM_INTERPOLATED_REGULAR_EXPRESSION_NODE)) {
|
||||||
pm_string_list_t named_captures;
|
// It's possible to have an interpolated regular expression node
|
||||||
pm_string_list_init(&named_captures);
|
// that only contains strings. This is because it can be split
|
||||||
|
// up by a heredoc. In this case we need to concat the unescaped
|
||||||
|
// strings together and then parse them as a regular expression.
|
||||||
|
pm_node_list_t *parts = &((pm_interpolated_regular_expression_node_t *) node)->parts;
|
||||||
|
|
||||||
const pm_string_t *unescaped = &((pm_regular_expression_node_t *) node)->unescaped;
|
bool interpolated = false;
|
||||||
if (pm_regexp_named_capture_group_names(pm_string_source(unescaped), pm_string_length(unescaped), &named_captures, parser->encoding_changed, &parser->encoding) && (named_captures.length > 0)) {
|
size_t total_length = 0;
|
||||||
pm_match_write_node_t *match = pm_match_write_node_create(parser, call);
|
|
||||||
|
|
||||||
for (size_t index = 0; index < named_captures.length; index++) {
|
for (size_t index = 0; index < parts->size; index++) {
|
||||||
pm_string_t *name = &named_captures.strings[index];
|
pm_node_t *part = parts->nodes[index];
|
||||||
pm_constant_id_t local;
|
|
||||||
|
|
||||||
if (unescaped->type == PM_STRING_SHARED) {
|
if (PM_NODE_TYPE_P(part, PM_STRING_NODE)) {
|
||||||
// If the unescaped string is a slice of the source,
|
total_length += pm_string_length(&((pm_string_node_t *) part)->unescaped);
|
||||||
// then we can copy the names directly. The pointers
|
} else {
|
||||||
// will line up.
|
interpolated = true;
|
||||||
local = pm_parser_local_add_location(parser, name->source, name->source + name->length);
|
break;
|
||||||
} else {
|
|
||||||
// Otherwise, the name is a slice of the malloc-ed
|
|
||||||
// owned string, in which case we need to copy it
|
|
||||||
// out into a new string.
|
|
||||||
size_t length = pm_string_length(name);
|
|
||||||
|
|
||||||
void *memory = malloc(length);
|
|
||||||
memcpy(memory, pm_string_source(name), length);
|
|
||||||
|
|
||||||
local = pm_parser_local_add_owned(parser, (const uint8_t *) memory, length);
|
|
||||||
}
|
|
||||||
|
|
||||||
pm_constant_id_list_append(&match->locals, local);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
result = (pm_node_t *) match;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pm_string_list_free(&named_captures);
|
if (!interpolated) {
|
||||||
|
void *memory = malloc(total_length);
|
||||||
|
if (!memory) abort();
|
||||||
|
|
||||||
|
uint8_t *cursor = memory;
|
||||||
|
for (size_t index = 0; index < parts->size; index++) {
|
||||||
|
pm_string_t *unescaped = &((pm_string_node_t *) parts->nodes[index])->unescaped;
|
||||||
|
size_t length = pm_string_length(unescaped);
|
||||||
|
|
||||||
|
memcpy(cursor, pm_string_source(unescaped), length);
|
||||||
|
cursor += length;
|
||||||
|
}
|
||||||
|
|
||||||
|
pm_string_t owned;
|
||||||
|
pm_string_owned_init(&owned, (uint8_t *) memory, total_length);
|
||||||
|
|
||||||
|
result = parse_regular_expression_named_captures(parser, &owned, call);
|
||||||
|
pm_string_free(&owned);
|
||||||
|
}
|
||||||
|
} else if (PM_NODE_TYPE_P(node, PM_REGULAR_EXPRESSION_NODE)) {
|
||||||
|
// If we have a regular expression node, then we can just parse
|
||||||
|
// the named captures directly off the unescaped string.
|
||||||
|
const pm_string_t *content = &((pm_regular_expression_node_t *) node)->unescaped;
|
||||||
|
result = parse_regular_expression_named_captures(parser, content, call);
|
||||||
}
|
}
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
|
|
|
@ -188,6 +188,8 @@ pm_regexp_parse_range_quantifier(pm_regexp_parser_t *parser) {
|
||||||
// ;
|
// ;
|
||||||
static bool
|
static bool
|
||||||
pm_regexp_parse_quantifier(pm_regexp_parser_t *parser) {
|
pm_regexp_parse_quantifier(pm_regexp_parser_t *parser) {
|
||||||
|
if (pm_regexp_char_is_eof(parser)) return true;
|
||||||
|
|
||||||
switch (*parser->cursor) {
|
switch (*parser->cursor) {
|
||||||
case '*':
|
case '*':
|
||||||
case '+':
|
case '+':
|
||||||
|
|
|
@ -49,3 +49,7 @@ pp <<-A, %I[p\
|
||||||
o
|
o
|
||||||
A
|
A
|
||||||
p]
|
p]
|
||||||
|
|
||||||
|
<<A; /\
|
||||||
|
A
|
||||||
|
(?<a>)/ =~ ''
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
@ ProgramNode (location: (4,0)-(51,2))
|
@ ProgramNode (location: (4,0)-(55,13))
|
||||||
├── locals: []
|
├── locals: [:a]
|
||||||
└── statements:
|
└── statements:
|
||||||
@ StatementsNode (location: (4,0)-(51,2))
|
@ StatementsNode (location: (4,0)-(55,13))
|
||||||
└── body: (length: 8)
|
└── body: (length: 10)
|
||||||
├── @ CallNode (location: (4,0)-(7,7))
|
├── @ CallNode (location: (4,0)-(7,7))
|
||||||
│ ├── receiver: ∅
|
│ ├── receiver: ∅
|
||||||
│ ├── call_operator_loc: ∅
|
│ ├── call_operator_loc: ∅
|
||||||
|
@ -270,41 +270,86 @@
|
||||||
│ ├── block: ∅
|
│ ├── block: ∅
|
||||||
│ ├── flags: ∅
|
│ ├── flags: ∅
|
||||||
│ └── name: :pp
|
│ └── name: :pp
|
||||||
└── @ CallNode (location: (48,0)-(51,2))
|
├── @ CallNode (location: (48,0)-(51,2))
|
||||||
├── receiver: ∅
|
│ ├── receiver: ∅
|
||||||
├── call_operator_loc: ∅
|
│ ├── call_operator_loc: ∅
|
||||||
├── message_loc: (48,0)-(48,2) = "pp"
|
│ ├── message_loc: (48,0)-(48,2) = "pp"
|
||||||
├── opening_loc: ∅
|
│ ├── opening_loc: ∅
|
||||||
├── arguments:
|
│ ├── arguments:
|
||||||
│ @ ArgumentsNode (location: (48,3)-(51,2))
|
│ │ @ ArgumentsNode (location: (48,3)-(51,2))
|
||||||
│ ├── arguments: (length: 2)
|
│ │ ├── arguments: (length: 2)
|
||||||
│ │ ├── @ StringNode (location: (48,3)-(48,7))
|
│ │ │ ├── @ StringNode (location: (48,3)-(48,7))
|
||||||
│ │ │ ├── flags: ∅
|
│ │ │ │ ├── flags: ∅
|
||||||
│ │ │ ├── opening_loc: (48,3)-(48,7) = "<<-A"
|
│ │ │ │ ├── opening_loc: (48,3)-(48,7) = "<<-A"
|
||||||
│ │ │ ├── content_loc: (49,0)-(50,0) = "o\n"
|
│ │ │ │ ├── content_loc: (49,0)-(50,0) = "o\n"
|
||||||
│ │ │ ├── closing_loc: (50,0)-(51,0) = "A\n"
|
│ │ │ │ ├── closing_loc: (50,0)-(51,0) = "A\n"
|
||||||
│ │ │ └── unescaped: "o\n"
|
│ │ │ │ └── unescaped: "o\n"
|
||||||
│ │ └── @ ArrayNode (location: (48,9)-(51,2))
|
│ │ │ └── @ ArrayNode (location: (48,9)-(51,2))
|
||||||
│ │ ├── elements: (length: 1)
|
│ │ │ ├── elements: (length: 1)
|
||||||
│ │ │ └── @ InterpolatedSymbolNode (location: (48,12)-(48,14))
|
│ │ │ │ └── @ InterpolatedSymbolNode (location: (48,12)-(48,14))
|
||||||
│ │ │ ├── opening_loc: ∅
|
│ │ │ │ ├── opening_loc: ∅
|
||||||
│ │ │ ├── parts: (length: 2)
|
│ │ │ │ ├── parts: (length: 2)
|
||||||
│ │ │ │ ├── @ SymbolNode (location: (48,12)-(48,14))
|
│ │ │ │ │ ├── @ SymbolNode (location: (48,12)-(48,14))
|
||||||
│ │ │ │ │ ├── opening_loc: ∅
|
│ │ │ │ │ │ ├── opening_loc: ∅
|
||||||
│ │ │ │ │ ├── value_loc: (48,12)-(48,14) = "p\\"
|
│ │ │ │ │ │ ├── value_loc: (48,12)-(48,14) = "p\\"
|
||||||
│ │ │ │ │ ├── closing_loc: ∅
|
│ │ │ │ │ │ ├── closing_loc: ∅
|
||||||
│ │ │ │ │ └── unescaped: "p\n"
|
│ │ │ │ │ │ └── unescaped: "p\n"
|
||||||
│ │ │ │ └── @ StringNode (location: (48,12)-(48,14))
|
│ │ │ │ │ └── @ StringNode (location: (48,12)-(48,14))
|
||||||
│ │ │ │ ├── flags: ∅
|
│ │ │ │ │ ├── flags: ∅
|
||||||
│ │ │ │ ├── opening_loc: ∅
|
│ │ │ │ │ ├── opening_loc: ∅
|
||||||
│ │ │ │ ├── content_loc: (48,12)-(48,14) = "p\\"
|
│ │ │ │ │ ├── content_loc: (48,12)-(48,14) = "p\\"
|
||||||
│ │ │ │ ├── closing_loc: ∅
|
│ │ │ │ │ ├── closing_loc: ∅
|
||||||
│ │ │ │ └── unescaped: "p"
|
│ │ │ │ │ └── unescaped: "p"
|
||||||
│ │ │ └── closing_loc: ∅
|
│ │ │ │ └── closing_loc: ∅
|
||||||
│ │ ├── opening_loc: (48,9)-(48,12) = "%I["
|
│ │ │ ├── opening_loc: (48,9)-(48,12) = "%I["
|
||||||
│ │ └── closing_loc: (51,1)-(51,2) = "]"
|
│ │ │ └── closing_loc: (51,1)-(51,2) = "]"
|
||||||
│ └── flags: ∅
|
│ │ └── flags: ∅
|
||||||
├── closing_loc: ∅
|
│ ├── closing_loc: ∅
|
||||||
├── block: ∅
|
│ ├── block: ∅
|
||||||
├── flags: ∅
|
│ ├── flags: ∅
|
||||||
└── name: :pp
|
│ └── name: :pp
|
||||||
|
├── @ StringNode (location: (53,0)-(53,3))
|
||||||
|
│ ├── flags: ∅
|
||||||
|
│ ├── opening_loc: (53,0)-(53,3) = "<<A"
|
||||||
|
│ ├── content_loc: (54,0)-(54,0) = ""
|
||||||
|
│ ├── closing_loc: (54,0)-(55,0) = "A\n"
|
||||||
|
│ └── unescaped: ""
|
||||||
|
└── @ MatchWriteNode (location: (53,5)-(55,13))
|
||||||
|
├── call:
|
||||||
|
│ @ CallNode (location: (53,5)-(55,13))
|
||||||
|
│ ├── receiver:
|
||||||
|
│ │ @ InterpolatedRegularExpressionNode (location: (53,5)-(55,7))
|
||||||
|
│ │ ├── opening_loc: (53,5)-(53,6) = "/"
|
||||||
|
│ │ ├── parts: (length: 2)
|
||||||
|
│ │ │ ├── @ StringNode (location: (53,6)-(53,7))
|
||||||
|
│ │ │ │ ├── flags: ∅
|
||||||
|
│ │ │ │ ├── opening_loc: ∅
|
||||||
|
│ │ │ │ ├── content_loc: (53,6)-(53,7) = "\\"
|
||||||
|
│ │ │ │ ├── closing_loc: ∅
|
||||||
|
│ │ │ │ └── unescaped: ""
|
||||||
|
│ │ │ └── @ StringNode (location: (55,0)-(55,6))
|
||||||
|
│ │ │ ├── flags: ∅
|
||||||
|
│ │ │ ├── opening_loc: ∅
|
||||||
|
│ │ │ ├── content_loc: (55,0)-(55,6) = "(?<a>)"
|
||||||
|
│ │ │ ├── closing_loc: ∅
|
||||||
|
│ │ │ └── unescaped: "(?<a>)"
|
||||||
|
│ │ ├── closing_loc: (55,6)-(55,7) = "/"
|
||||||
|
│ │ └── flags: ∅
|
||||||
|
│ ├── call_operator_loc: ∅
|
||||||
|
│ ├── message_loc: (55,8)-(55,10) = "=~"
|
||||||
|
│ ├── opening_loc: ∅
|
||||||
|
│ ├── arguments:
|
||||||
|
│ │ @ ArgumentsNode (location: (55,11)-(55,13))
|
||||||
|
│ │ ├── arguments: (length: 1)
|
||||||
|
│ │ │ └── @ StringNode (location: (55,11)-(55,13))
|
||||||
|
│ │ │ ├── flags: ∅
|
||||||
|
│ │ │ ├── opening_loc: (55,11)-(55,12) = "'"
|
||||||
|
│ │ │ ├── content_loc: (55,12)-(55,12) = ""
|
||||||
|
│ │ │ ├── closing_loc: (55,12)-(55,13) = "'"
|
||||||
|
│ │ │ └── unescaped: ""
|
||||||
|
│ │ └── flags: ∅
|
||||||
|
│ ├── closing_loc: ∅
|
||||||
|
│ ├── block: ∅
|
||||||
|
│ ├── flags: ∅
|
||||||
|
│ └── name: :=~
|
||||||
|
└── locals: [:a]
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue