[ruby/prism] Account for encoding in regexp named captures

17dc6b6281
This commit is contained in:
Kevin Newton 2024-02-18 16:36:16 -05:00 committed by git
parent ea529dd409
commit ec1eda7b62
5 changed files with 72 additions and 2 deletions

View file

@ -565,23 +565,38 @@ pm_regexp_parse_group(pm_regexp_parser_t *parser) {
*/
static bool
pm_regexp_parse_item(pm_regexp_parser_t *parser) {
switch (*parser->cursor++) {
switch (*parser->cursor) {
case '^':
case '$':
parser->cursor++;
return true;
case '\\':
parser->cursor++;
if (!pm_regexp_char_is_eof(parser)) {
parser->cursor++;
}
return pm_regexp_parse_quantifier(parser);
case '(':
parser->cursor++;
return pm_regexp_parse_group(parser) && pm_regexp_parse_quantifier(parser);
case '[':
parser->cursor++;
return pm_regexp_parse_lbracket(parser) && pm_regexp_parse_quantifier(parser);
default:
default: {
size_t width;
if (!parser->encoding_changed) {
width = pm_encoding_utf_8_char_width(parser->cursor, (ptrdiff_t) (parser->end - parser->cursor));
} else {
width = parser->encoding->char_width(parser->cursor, (ptrdiff_t) (parser->end - parser->cursor));
}
if (width == 0) return false; // TODO: add appropriate error
parser->cursor += width;
return pm_regexp_parse_quantifier(parser);
}
}
}
/**
* expression : item+

View file

@ -0,0 +1,3 @@
# encoding: sjis
/Ⅷ(?<a>.)(?<b>.)/ =~ 'Ⅷab'
[a, b]

View file

@ -55,6 +55,7 @@ module Prism
dos_endings.txt
heredocs_with_ignored_newlines.txt
regex.txt
regex_char_width.txt
spanning_heredoc.txt
spanning_heredoc_newlines.txt
tilde_heredocs.txt

View file

@ -30,6 +30,7 @@ module Prism
todos = %w[
heredocs_nested.txt
newline_terminated.txt
regex_char_width.txt
seattlerb/bug169.txt
seattlerb/dstr_evstr.txt
seattlerb/heredoc_squiggly_interp.txt

View file

@ -0,0 +1,50 @@
@ ProgramNode (location: (2,0)-(3,6))
├── locals: [:a, :b]
└── statements:
@ StatementsNode (location: (2,0)-(3,6))
└── body: (length: 2)
├── @ MatchWriteNode (location: (2,0)-(2,36))
│ ├── call:
│ │ @ CallNode (location: (2,0)-(2,36))
│ │ ├── flags: ∅
│ │ ├── receiver:
│ │ │ @ RegularExpressionNode (location: (2,0)-(2,22))
│ │ │ ├── flags: ∅
│ │ │ ├── opening_loc: (2,0)-(2,1) = "/"
│ │ │ ├── content_loc: (2,1)-(2,21) = "\x{E285}\xA7(?<a>.)\x{E285}\xA9(?<b>.)"
│ │ │ ├── closing_loc: (2,21)-(2,22) = "/"
│ │ │ └── unescaped: "\x{E285}\xA7(?<a>.)\x{E285}\xA9(?<b>.)"
│ │ ├── call_operator_loc: ∅
│ │ ├── name: :=~
│ │ ├── message_loc: (2,23)-(2,25) = "=~"
│ │ ├── opening_loc: ∅
│ │ ├── arguments:
│ │ │ @ ArgumentsNode (location: (2,26)-(2,36))
│ │ │ ├── flags: ∅
│ │ │ └── arguments: (length: 1)
│ │ │ └── @ StringNode (location: (2,26)-(2,36))
│ │ │ ├── flags: ∅
│ │ │ ├── opening_loc: (2,26)-(2,27) = "'"
│ │ │ ├── content_loc: (2,27)-(2,35) = "\x{E285}\xA7a\x{E285}\xA9b"
│ │ │ ├── closing_loc: (2,35)-(2,36) = "'"
│ │ │ └── unescaped: "\x{E285}\xA7a\x{E285}\xA9b"
│ │ ├── closing_loc: ∅
│ │ └── block: ∅
│ └── targets: (length: 2)
│ ├── @ LocalVariableTargetNode (location: (2,7)-(2,8))
│ │ ├── name: :a
│ │ └── depth: 0
│ └── @ LocalVariableTargetNode (location: (2,17)-(2,18))
│ ├── name: :b
│ └── depth: 0
└── @ ArrayNode (location: (3,0)-(3,6))
├── flags: ∅
├── elements: (length: 2)
│ ├── @ LocalVariableReadNode (location: (3,1)-(3,2))
│ │ ├── name: :a
│ │ └── depth: 0
│ └── @ LocalVariableReadNode (location: (3,4)-(3,5))
│ ├── name: :b
│ └── depth: 0
├── opening_loc: (3,0)-(3,1) = "["
└── closing_loc: (3,5)-(3,6) = "]"