mirror of
https://github.com/ruby/ruby.git
synced 2025-09-18 10:03:59 +02:00
merge revision(s) 1bc8838d60
: [Backport #19750]
Handle unterminated unicode escapes in regexps
This fixes an infinite loop possible after ec3542229b
.
For \u{} escapes in regexps, skip validation in the parser, and rely on the regexp
code to handle validation. This is necessary so that invalid unicode escapes in
comments in extended regexps are allowed.
Fixes [Bug #19750]
Co-authored-by: Nobuyoshi Nakada <nobu@ruby-lang.org>
---
parse.y | 97 ++++++++++++++++++++++++++++++++-----------------
test/ruby/test_parse.rb | 16 ++++++++
2 files changed, 79 insertions(+), 34 deletions(-)
This commit is contained in:
parent
aef5316224
commit
3f6187a947
3 changed files with 82 additions and 37 deletions
29
parse.y
29
parse.y
|
@ -7260,6 +7260,8 @@ tokadd_codepoint(struct parser_params *p, rb_encoding **encp,
|
||||||
return TRUE;
|
return TRUE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int tokadd_mbchar(struct parser_params *p, int c);
|
||||||
|
|
||||||
/* return value is for ?\u3042 */
|
/* return value is for ?\u3042 */
|
||||||
static void
|
static void
|
||||||
tokadd_utf8(struct parser_params *p, rb_encoding **encp,
|
tokadd_utf8(struct parser_params *p, rb_encoding **encp,
|
||||||
|
@ -7277,6 +7279,32 @@ tokadd_utf8(struct parser_params *p, rb_encoding **encp,
|
||||||
if (regexp_literal) { tokadd(p, '\\'); tokadd(p, 'u'); }
|
if (regexp_literal) { tokadd(p, '\\'); tokadd(p, 'u'); }
|
||||||
|
|
||||||
if (peek(p, open_brace)) { /* handle \u{...} form */
|
if (peek(p, open_brace)) { /* handle \u{...} form */
|
||||||
|
if (regexp_literal && p->lex.strterm->u.literal.u1.func == str_regexp) {
|
||||||
|
/*
|
||||||
|
* Skip parsing validation code and copy bytes as-is until term or
|
||||||
|
* closing brace, in order to correctly handle extended regexps where
|
||||||
|
* invalid unicode escapes are allowed in comments. The regexp parser
|
||||||
|
* does its own validation and will catch any issues.
|
||||||
|
*/
|
||||||
|
int c = *p->lex.pcur;
|
||||||
|
tokadd(p, c);
|
||||||
|
for (c = *++p->lex.pcur; p->lex.pcur < p->lex.pend; c = *++p->lex.pcur) {
|
||||||
|
if (c == close_brace) {
|
||||||
|
tokadd(p, c);
|
||||||
|
++p->lex.pcur;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
else if (c == term) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (c == '\\' && p->lex.pcur + 1 < p->lex.pend) {
|
||||||
|
tokadd(p, c);
|
||||||
|
c = *++p->lex.pcur;
|
||||||
|
}
|
||||||
|
tokadd_mbchar(p, c);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
const char *second = NULL;
|
const char *second = NULL;
|
||||||
int c, last = nextc(p);
|
int c, last = nextc(p);
|
||||||
if (p->lex.pcur >= p->lex.pend) goto unterminated;
|
if (p->lex.pcur >= p->lex.pend) goto unterminated;
|
||||||
|
@ -7316,6 +7344,7 @@ tokadd_utf8(struct parser_params *p, rb_encoding **encp,
|
||||||
if (regexp_literal) tokadd(p, close_brace);
|
if (regexp_literal) tokadd(p, close_brace);
|
||||||
nextc(p);
|
nextc(p);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
else { /* handle \uxxxx form */
|
else { /* handle \uxxxx form */
|
||||||
if (!tokadd_codepoint(p, encp, regexp_literal, FALSE)) {
|
if (!tokadd_codepoint(p, encp, regexp_literal, FALSE)) {
|
||||||
token_flush(p);
|
token_flush(p);
|
||||||
|
|
|
@ -1041,6 +1041,22 @@ x = __ENCODING__
|
||||||
assert_syntax_error(" 0b\n", /\^/)
|
assert_syntax_error(" 0b\n", /\^/)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def test_unclosed_unicode_escape_at_eol_bug_19750
|
||||||
|
assert_separately([], "#{<<-"begin;"}\n#{<<~'end;'}")
|
||||||
|
begin;
|
||||||
|
assert_syntax_error("/\\u", /too short escape sequence/)
|
||||||
|
assert_syntax_error("/\\u{", /unterminated regexp meets end of file/)
|
||||||
|
assert_syntax_error("/\\u{\\n", /invalid Unicode list/)
|
||||||
|
assert_syntax_error("/a#\\u{\\n/", /invalid Unicode list/)
|
||||||
|
re = eval("/a#\\u{\n$/x")
|
||||||
|
assert_match(re, 'a')
|
||||||
|
assert_not_match(re, 'a#')
|
||||||
|
re = eval("/a#\\u\n$/x")
|
||||||
|
assert_match(re, 'a')
|
||||||
|
assert_not_match(re, 'a#')
|
||||||
|
end;
|
||||||
|
end
|
||||||
|
|
||||||
def test_error_def_in_argument
|
def test_error_def_in_argument
|
||||||
assert_separately([], "#{<<-"begin;"}\n#{<<~"end;"}")
|
assert_separately([], "#{<<-"begin;"}\n#{<<~"end;"}")
|
||||||
begin;
|
begin;
|
||||||
|
|
|
@ -11,7 +11,7 @@
|
||||||
# define RUBY_VERSION_MINOR RUBY_API_VERSION_MINOR
|
# define RUBY_VERSION_MINOR RUBY_API_VERSION_MINOR
|
||||||
#define RUBY_VERSION_TEENY 2
|
#define RUBY_VERSION_TEENY 2
|
||||||
#define RUBY_RELEASE_DATE RUBY_RELEASE_YEAR_STR"-"RUBY_RELEASE_MONTH_STR"-"RUBY_RELEASE_DAY_STR
|
#define RUBY_RELEASE_DATE RUBY_RELEASE_YEAR_STR"-"RUBY_RELEASE_MONTH_STR"-"RUBY_RELEASE_DAY_STR
|
||||||
#define RUBY_PATCHLEVEL 92
|
#define RUBY_PATCHLEVEL 93
|
||||||
|
|
||||||
#include "ruby/version.h"
|
#include "ruby/version.h"
|
||||||
#include "ruby/internal/abi.h"
|
#include "ruby/internal/abi.h"
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue