mirror of
https://github.com/ruby/ruby.git
synced 2025-08-28 15:36:16 +02:00
Mixed encoding error can continue to parse
This commit is contained in:
parent
45ad375acc
commit
2893550452
3 changed files with 22 additions and 15 deletions
17
parse.y
17
parse.y
|
@ -6180,7 +6180,7 @@ tokadd_codepoint(struct parser_params *p, rb_encoding **encp,
|
||||||
}
|
}
|
||||||
|
|
||||||
/* return value is for ?\u3042 */
|
/* return value is for ?\u3042 */
|
||||||
static int
|
static void
|
||||||
parser_tokadd_utf8(struct parser_params *p, rb_encoding **encp,
|
parser_tokadd_utf8(struct parser_params *p, rb_encoding **encp,
|
||||||
int string_literal, int symbol_literal, int regexp_literal)
|
int string_literal, int symbol_literal, int regexp_literal)
|
||||||
{
|
{
|
||||||
|
@ -6214,7 +6214,7 @@ parser_tokadd_utf8(struct parser_params *p, rb_encoding **encp,
|
||||||
unterminated:
|
unterminated:
|
||||||
literal_flush(p, p->lex.pcur);
|
literal_flush(p, p->lex.pcur);
|
||||||
yyerror0("unterminated Unicode escape");
|
yyerror0("unterminated Unicode escape");
|
||||||
return 0;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (regexp_literal) tokadd(p, close_brace);
|
if (regexp_literal) tokadd(p, close_brace);
|
||||||
|
@ -6222,11 +6222,11 @@ parser_tokadd_utf8(struct parser_params *p, rb_encoding **encp,
|
||||||
}
|
}
|
||||||
else { /* handle \uxxxx form */
|
else { /* handle \uxxxx form */
|
||||||
if (!tokadd_codepoint(p, encp, regexp_literal, FALSE)) {
|
if (!tokadd_codepoint(p, encp, regexp_literal, FALSE)) {
|
||||||
return 0;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return TRUE;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
#define ESCAPE_CONTROL 1
|
#define ESCAPE_CONTROL 1
|
||||||
|
@ -6568,11 +6568,9 @@ tokadd_string(struct parser_params *p,
|
||||||
tokadd(p, '\\');
|
tokadd(p, '\\');
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if (!parser_tokadd_utf8(p, enc, term,
|
parser_tokadd_utf8(p, enc, term,
|
||||||
func & STR_FUNC_SYMBOL,
|
func & STR_FUNC_SYMBOL,
|
||||||
func & STR_FUNC_REGEXP)) {
|
func & STR_FUNC_REGEXP);
|
||||||
continue;
|
|
||||||
}
|
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
|
@ -8070,8 +8068,7 @@ parse_qmark(struct parser_params *p, int space_seen)
|
||||||
if (peek(p, 'u')) {
|
if (peek(p, 'u')) {
|
||||||
nextc(p);
|
nextc(p);
|
||||||
enc = rb_utf8_encoding();
|
enc = rb_utf8_encoding();
|
||||||
if (!parser_tokadd_utf8(p, &enc, -1, 0, 0))
|
parser_tokadd_utf8(p, &enc, -1, 0, 0);
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
else if (!lex_eol_p(p) && !(c = *p->lex.pcur, ISASCII(c))) {
|
else if (!lex_eol_p(p) && !(c = *p->lex.pcur, ISASCII(c))) {
|
||||||
nextc(p);
|
nextc(p);
|
||||||
|
|
|
@ -562,6 +562,9 @@ class TestParse < Test::Unit::TestCase
|
||||||
assert_raise(SyntaxError) { eval(" ?a\x8a".force_encoding("utf-8")) }
|
assert_raise(SyntaxError) { eval(" ?a\x8a".force_encoding("utf-8")) }
|
||||||
assert_equal("\u{1234}", eval("?\u{1234}"))
|
assert_equal("\u{1234}", eval("?\u{1234}"))
|
||||||
assert_equal("\u{1234}", eval('?\u{1234}'))
|
assert_equal("\u{1234}", eval('?\u{1234}'))
|
||||||
|
assert_equal("\u{1234}", eval('?\u1234'))
|
||||||
|
e = assert_syntax_error('"#{?\u123}"', 'invalid Unicode escape')
|
||||||
|
assert_not_match(/end-of-input/, e.message)
|
||||||
end
|
end
|
||||||
|
|
||||||
def test_percent
|
def test_percent
|
||||||
|
|
|
@ -775,32 +775,39 @@ eom
|
||||||
end
|
end
|
||||||
|
|
||||||
def test_heredoc_mixed_encoding
|
def test_heredoc_mixed_encoding
|
||||||
assert_syntax_error(<<-'HEREDOC', 'UTF-8 mixed within Windows-31J source')
|
e = assert_syntax_error(<<-'HEREDOC', 'UTF-8 mixed within Windows-31J source')
|
||||||
#encoding: cp932
|
#encoding: cp932
|
||||||
<<-TEXT
|
<<-TEXT
|
||||||
\xe9\x9d\u1234
|
\xe9\x9d\u1234
|
||||||
TEXT
|
TEXT
|
||||||
HEREDOC
|
HEREDOC
|
||||||
assert_syntax_error(<<-'HEREDOC', 'UTF-8 mixed within Windows-31J source')
|
assert_not_match(/end-of-input/, e.message)
|
||||||
|
|
||||||
|
e = assert_syntax_error(<<-'HEREDOC', 'UTF-8 mixed within Windows-31J source')
|
||||||
#encoding: cp932
|
#encoding: cp932
|
||||||
<<-TEXT
|
<<-TEXT
|
||||||
\xe9\x9d
|
\xe9\x9d
|
||||||
\u1234
|
\u1234
|
||||||
TEXT
|
TEXT
|
||||||
HEREDOC
|
HEREDOC
|
||||||
assert_syntax_error(<<-'HEREDOC', 'UTF-8 mixed within Windows-31J source')
|
assert_not_match(/end-of-input/, e.message)
|
||||||
|
|
||||||
|
e = assert_syntax_error(<<-'HEREDOC', 'UTF-8 mixed within Windows-31J source')
|
||||||
#encoding: cp932
|
#encoding: cp932
|
||||||
<<-TEXT
|
<<-TEXT
|
||||||
\u1234\xe9\x9d
|
\u1234\xe9\x9d
|
||||||
TEXT
|
TEXT
|
||||||
HEREDOC
|
HEREDOC
|
||||||
assert_syntax_error(<<-'HEREDOC', 'UTF-8 mixed within Windows-31J source')
|
assert_not_match(/end-of-input/, e.message)
|
||||||
|
|
||||||
|
e = assert_syntax_error(<<-'HEREDOC', 'UTF-8 mixed within Windows-31J source')
|
||||||
#encoding: cp932
|
#encoding: cp932
|
||||||
<<-TEXT
|
<<-TEXT
|
||||||
\u1234
|
\u1234
|
||||||
\xe9\x9d
|
\xe9\x9d
|
||||||
TEXT
|
TEXT
|
||||||
HEREDOC
|
HEREDOC
|
||||||
|
assert_not_match(/end-of-input/, e.message)
|
||||||
end
|
end
|
||||||
|
|
||||||
def test_lineno_operation_brace_block
|
def test_lineno_operation_brace_block
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue