Mixed encoding error can continue to parse

This commit is contained in:
Nobuyoshi Nakada 2019-05-24 16:10:59 +09:00
parent 45ad375acc
commit 2893550452
No known key found for this signature in database
GPG key ID: 4BC7D6DF58D8DF60
3 changed files with 22 additions and 15 deletions

17
parse.y
View file

@ -6180,7 +6180,7 @@ tokadd_codepoint(struct parser_params *p, rb_encoding **encp,
} }
/* return value is for ?\u3042 */ /* return value is for ?\u3042 */
static int static void
parser_tokadd_utf8(struct parser_params *p, rb_encoding **encp, parser_tokadd_utf8(struct parser_params *p, rb_encoding **encp,
int string_literal, int symbol_literal, int regexp_literal) int string_literal, int symbol_literal, int regexp_literal)
{ {
@ -6214,7 +6214,7 @@ parser_tokadd_utf8(struct parser_params *p, rb_encoding **encp,
unterminated: unterminated:
literal_flush(p, p->lex.pcur); literal_flush(p, p->lex.pcur);
yyerror0("unterminated Unicode escape"); yyerror0("unterminated Unicode escape");
return 0; return;
} }
if (regexp_literal) tokadd(p, close_brace); if (regexp_literal) tokadd(p, close_brace);
@ -6222,11 +6222,11 @@ parser_tokadd_utf8(struct parser_params *p, rb_encoding **encp,
} }
else { /* handle \uxxxx form */ else { /* handle \uxxxx form */
if (!tokadd_codepoint(p, encp, regexp_literal, FALSE)) { if (!tokadd_codepoint(p, encp, regexp_literal, FALSE)) {
return 0; return;
} }
} }
return TRUE; return;
} }
#define ESCAPE_CONTROL 1 #define ESCAPE_CONTROL 1
@ -6568,11 +6568,9 @@ tokadd_string(struct parser_params *p,
tokadd(p, '\\'); tokadd(p, '\\');
break; break;
} }
if (!parser_tokadd_utf8(p, enc, term, parser_tokadd_utf8(p, enc, term,
func & STR_FUNC_SYMBOL, func & STR_FUNC_SYMBOL,
func & STR_FUNC_REGEXP)) { func & STR_FUNC_REGEXP);
continue;
}
continue; continue;
default: default:
@ -8070,8 +8068,7 @@ parse_qmark(struct parser_params *p, int space_seen)
if (peek(p, 'u')) { if (peek(p, 'u')) {
nextc(p); nextc(p);
enc = rb_utf8_encoding(); enc = rb_utf8_encoding();
if (!parser_tokadd_utf8(p, &enc, -1, 0, 0)) parser_tokadd_utf8(p, &enc, -1, 0, 0);
return 0;
} }
else if (!lex_eol_p(p) && !(c = *p->lex.pcur, ISASCII(c))) { else if (!lex_eol_p(p) && !(c = *p->lex.pcur, ISASCII(c))) {
nextc(p); nextc(p);

View file

@ -562,6 +562,9 @@ class TestParse < Test::Unit::TestCase
assert_raise(SyntaxError) { eval(" ?a\x8a".force_encoding("utf-8")) } assert_raise(SyntaxError) { eval(" ?a\x8a".force_encoding("utf-8")) }
assert_equal("\u{1234}", eval("?\u{1234}")) assert_equal("\u{1234}", eval("?\u{1234}"))
assert_equal("\u{1234}", eval('?\u{1234}')) assert_equal("\u{1234}", eval('?\u{1234}'))
assert_equal("\u{1234}", eval('?\u1234'))
e = assert_syntax_error('"#{?\u123}"', 'invalid Unicode escape')
assert_not_match(/end-of-input/, e.message)
end end
def test_percent def test_percent

View file

@ -775,32 +775,39 @@ eom
end end
def test_heredoc_mixed_encoding def test_heredoc_mixed_encoding
assert_syntax_error(<<-'HEREDOC', 'UTF-8 mixed within Windows-31J source') e = assert_syntax_error(<<-'HEREDOC', 'UTF-8 mixed within Windows-31J source')
#encoding: cp932 #encoding: cp932
<<-TEXT <<-TEXT
\xe9\x9d\u1234 \xe9\x9d\u1234
TEXT TEXT
HEREDOC HEREDOC
assert_syntax_error(<<-'HEREDOC', 'UTF-8 mixed within Windows-31J source') assert_not_match(/end-of-input/, e.message)
e = assert_syntax_error(<<-'HEREDOC', 'UTF-8 mixed within Windows-31J source')
#encoding: cp932 #encoding: cp932
<<-TEXT <<-TEXT
\xe9\x9d \xe9\x9d
\u1234 \u1234
TEXT TEXT
HEREDOC HEREDOC
assert_syntax_error(<<-'HEREDOC', 'UTF-8 mixed within Windows-31J source') assert_not_match(/end-of-input/, e.message)
e = assert_syntax_error(<<-'HEREDOC', 'UTF-8 mixed within Windows-31J source')
#encoding: cp932 #encoding: cp932
<<-TEXT <<-TEXT
\u1234\xe9\x9d \u1234\xe9\x9d
TEXT TEXT
HEREDOC HEREDOC
assert_syntax_error(<<-'HEREDOC', 'UTF-8 mixed within Windows-31J source') assert_not_match(/end-of-input/, e.message)
e = assert_syntax_error(<<-'HEREDOC', 'UTF-8 mixed within Windows-31J source')
#encoding: cp932 #encoding: cp932
<<-TEXT <<-TEXT
\u1234 \u1234
\xe9\x9d \xe9\x9d
TEXT TEXT
HEREDOC HEREDOC
assert_not_match(/end-of-input/, e.message)
end end
def test_lineno_operation_brace_block def test_lineno_operation_brace_block