From 5861d4f18b615d5eb9335de77b25337f8828da6c Mon Sep 17 00:00:00 2001 From: nobu Date: Wed, 4 Jun 2014 13:09:57 +0000 Subject: [PATCH] re.c: consider name encoding * re.c (match_aref, rb_reg_regsub): consider encoding of captured names, encoding-incompatible should not match. [ruby-dev:48278] [Bug #9903] git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@46346 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- ChangeLog | 6 ++++++ re.c | 13 +++++++------ test/ruby/test_regexp.rb | 9 +++++++++ 3 files changed, 22 insertions(+), 6 deletions(-) diff --git a/ChangeLog b/ChangeLog index 670ed74161..6f2a5e1b9b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +Wed Jun 4 22:09:53 2014 Nobuyoshi Nakada + + * re.c (match_aref, rb_reg_regsub): consider encoding of captured + names, encoding-incompatible should not match. + [ruby-dev:48278] [Bug #9903] + Wed Jun 4 21:23:52 2014 Nobuyoshi Nakada * re.c (match_aref): should not ignore name after NUL byte. diff --git a/re.c b/re.c index 4b28553aab..299d3198ff 100644 --- a/re.c +++ b/re.c @@ -1796,9 +1796,9 @@ match_aref(int argc, VALUE *argv, VALUE match) /* fall through */ case T_STRING: p = StringValuePtr(idx); - num = name_to_backref_number(RMATCH_REGS(match), - RMATCH(match)->regexp, p, p + RSTRING_LEN(idx)); - if (num < 1) { + if (!rb_enc_compatible(RREGEXP(RMATCH(match)->regexp)->src, idx) || + (num = name_to_backref_number(RMATCH_REGS(match), RMATCH(match)->regexp, + p, p + RSTRING_LEN(idx))) < 1) { name_to_backref_error(idx); } return rb_reg_nth_match(num, match); @@ -3417,9 +3417,10 @@ rb_reg_regsub(VALUE str, VALUE src, struct re_registers *regs, VALUE regexp) name_end += c == -1 ? mbclen(name_end, e, str_enc) : clen; } if (name_end < e) { - no = name_to_backref_number(regs, regexp, name, name_end); - if (no < 1) { - VALUE n = rb_str_subseq(str, (long)(name - RSTRING_PTR(str)), (long)(name_end - name)); + VALUE n = rb_str_subseq(str, (long)(name - RSTRING_PTR(str)), + (long)(name_end - name)); + if (!rb_enc_compatible(RREGEXP(regexp)->src, n) || + (no = name_to_backref_number(regs, regexp, name, name_end)) < 1) { name_to_backref_error(n); } p = s = name_end + clen; diff --git a/test/ruby/test_regexp.rb b/test/ruby/test_regexp.rb index 5a10ccfe1a..031d98c439 100644 --- a/test/ruby/test_regexp.rb +++ b/test/ruby/test_regexp.rb @@ -158,6 +158,15 @@ class TestRegexp < Test::Unit::TestCase } end + def test_named_capture_nonascii + bug9903 = '[ruby-dev:48278] [Bug #9903]' + + key = "\xb1\xb2".force_encoding(Encoding::EUC_JP) + m = /(?<#{key}>.*)/.match("xxx") + assert_equal("xxx", m[key]) + assert_raise(IndexError, bug9903) {m[key.dup.force_encoding(Encoding::Shift_JIS)]} + end + def test_assign_named_capture assert_equal("a", eval('/(?.)/ =~ "a"; foo')) assert_equal("a", eval('foo = 1; /(?.)/ =~ "a"; foo'))