[Bug #21186] multibyte char literal should be a single letter word

[Backport #21186]
This commit is contained in:
Nobuyoshi Nakada 2025-03-17 21:37:00 +09:00 committed by Takashi Kokubun
parent 6ca43e9b16
commit 8b491169c3
2 changed files with 8 additions and 7 deletions

13
parse.y
View file

@ -10047,6 +10047,7 @@ parse_qmark(struct parser_params *p, int space_seen)
rb_encoding *enc;
register int c;
rb_parser_string_t *lit;
const char *start = p->lex.pcur;
if (IS_END()) {
SET_LEX_STATE(EXPR_VALUE);
@ -10071,13 +10072,11 @@ parse_qmark(struct parser_params *p, int space_seen)
}
newtok(p);
enc = p->enc;
if (!parser_isascii(p)) {
if (tokadd_mbchar(p, c) == -1) return 0;
}
else if ((rb_enc_isalnum(c, p->enc) || c == '_') &&
!lex_eol_p(p) && is_identchar(p, p->lex.pcur, p->lex.pend, p->enc)) {
int w = parser_precise_mbclen(p, start);
if (is_identchar(p, start, p->lex.pend, p->enc) &&
!(lex_eol_ptr_n_p(p, start, w) || !is_identchar(p, start + w, p->lex.pend, p->enc))) {
if (space_seen) {
const char *start = p->lex.pcur - 1, *ptr = start;
const char *ptr = start;
do {
int n = parser_precise_mbclen(p, ptr);
if (n < 0) return -1;
@ -10105,7 +10104,7 @@ parse_qmark(struct parser_params *p, int space_seen)
}
}
else {
tokadd(p, c);
if (tokadd_mbchar(p, c) == -1) return 0;
}
tokfix(p);
lit = STR_NEW3(tok(p), toklen(p), enc, 0);

View file

@ -657,6 +657,8 @@ class TestParse < Test::Unit::TestCase
assert_equal("\u{1234}", eval('?\u{1234}'))
assert_equal("\u{1234}", eval('?\u1234'))
assert_syntax_error('?\u{41 42}', 'Multiple codepoints at single character literal')
assert_syntax_error("?and", /unexpected '\?'/)
assert_syntax_error("?\u1234and", /unexpected '\?'/)
e = assert_syntax_error('"#{?\u123}"', 'invalid Unicode escape')
assert_not_match(/end-of-input/, e.message)