mirror of
https://github.com/ruby/ruby.git
synced 2025-09-15 16:44:01 +02:00
[ruby/strscan] Accept String as a pattern at non head
(https://github.com/ruby/strscan/pull/106) It supports non-head match cases such as StringScanner#scan_until. If we use a String as a pattern, we can improve match performance. Here is a result of the including benchmark. ## CRuby It shows String as a pattern is 1.18x faster than Regexp as a pattern. ``` $ benchmark-driver benchmark/check_until.yaml Warming up -------------------------------------- regexp 9.403M i/s - 9.548M times in 1.015459s (106.35ns/i) regexp_var 9.162M i/s - 9.248M times in 1.009479s (109.15ns/i) string 8.966M i/s - 9.274M times in 1.034343s (111.54ns/i) string_var 11.051M i/s - 11.190M times in 1.012538s (90.49ns/i) Calculating ------------------------------------- regexp 10.319M i/s - 28.209M times in 2.733707s (96.91ns/i) regexp_var 10.032M i/s - 27.485M times in 2.739807s (99.68ns/i) string 9.681M i/s - 26.897M times in 2.778397s (103.30ns/i) string_var 12.162M i/s - 33.154M times in 2.726046s (82.22ns/i) Comparison: string_var: 12161920.6 i/s regexp: 10318949.7 i/s - 1.18x slower regexp_var: 10031617.6 i/s - 1.21x slower string: 9680843.7 i/s - 1.26x slower ``` ## JRuby It shows String as a pattern is 2.11x faster than Regexp as a pattern. ``` $ benchmark-driver benchmark/check_until.yaml Warming up -------------------------------------- regexp 7.591M i/s - 7.544M times in 0.993780s (131.74ns/i) regexp_var 6.143M i/s - 6.125M times in 0.997038s (162.77ns/i) string 14.135M i/s - 14.079M times in 0.996067s (70.75ns/i) string_var 14.079M i/s - 14.057M times in 0.998420s (71.03ns/i) Calculating ------------------------------------- regexp 9.409M i/s - 22.773M times in 2.420268s (106.28ns/i) regexp_var 10.116M i/s - 18.430M times in 1.821820s (98.85ns/i) string 21.389M i/s - 42.404M times in 1.982519s (46.75ns/i) string_var 20.897M i/s - 42.237M times in 2.021187s (47.85ns/i) Comparison: string: 21389191.1 i/s string_var: 20897327.5 i/s - 1.02x slower regexp_var: 10116464.7 i/s - 2.11x slower regexp: 9409222.3 i/s - 2.27x slower ``` See:be7815ec02/core/src/main/java/org/jruby/util/StringSupport.java (L1706-L1736)
---------f9d96c446a
Co-authored-by: Sutou Kouhei <kou@clear-code.com>
This commit is contained in:
parent
7d80c139f7
commit
d81b0588bb
2 changed files with 89 additions and 16 deletions
|
@ -686,14 +686,6 @@ strscan_do_scan(VALUE self, VALUE pattern, int succptr, int getstr, int headonly
|
|||
{
|
||||
struct strscanner *p;
|
||||
|
||||
if (headonly) {
|
||||
if (!RB_TYPE_P(pattern, T_REGEXP)) {
|
||||
StringValue(pattern);
|
||||
}
|
||||
}
|
||||
else {
|
||||
Check_Type(pattern, T_REGEXP);
|
||||
}
|
||||
GET_SCANNER(self, p);
|
||||
|
||||
CLEAR_MATCH_STATUS(p);
|
||||
|
@ -714,14 +706,25 @@ strscan_do_scan(VALUE self, VALUE pattern, int succptr, int getstr, int headonly
|
|||
}
|
||||
}
|
||||
else {
|
||||
StringValue(pattern);
|
||||
rb_enc_check(p->str, pattern);
|
||||
if (S_RESTLEN(p) < RSTRING_LEN(pattern)) {
|
||||
return Qnil;
|
||||
}
|
||||
if (memcmp(CURPTR(p), RSTRING_PTR(pattern), RSTRING_LEN(pattern)) != 0) {
|
||||
return Qnil;
|
||||
|
||||
if (headonly) {
|
||||
if (memcmp(CURPTR(p), RSTRING_PTR(pattern), RSTRING_LEN(pattern)) != 0) {
|
||||
return Qnil;
|
||||
}
|
||||
set_registers(p, RSTRING_LEN(pattern));
|
||||
} else {
|
||||
long pos = rb_memsearch(RSTRING_PTR(pattern), RSTRING_LEN(pattern),
|
||||
CURPTR(p), S_RESTLEN(p), rb_enc_get(pattern));
|
||||
if (pos == -1) {
|
||||
return Qnil;
|
||||
}
|
||||
set_registers(p, RSTRING_LEN(pattern) + pos);
|
||||
}
|
||||
set_registers(p, RSTRING_LEN(pattern));
|
||||
}
|
||||
|
||||
MATCHED(p);
|
||||
|
|
|
@ -262,7 +262,7 @@ module StringScannerTests
|
|||
end
|
||||
|
||||
def test_scan
|
||||
s = create_string_scanner('stra strb strc', true)
|
||||
s = create_string_scanner("stra strb\0strc", true)
|
||||
tmp = s.scan(/\w+/)
|
||||
assert_equal 'stra', tmp
|
||||
|
||||
|
@ -270,7 +270,7 @@ module StringScannerTests
|
|||
assert_equal ' ', tmp
|
||||
|
||||
assert_equal 'strb', s.scan(/\w+/)
|
||||
assert_equal ' ', s.scan(/\s+/)
|
||||
assert_equal "\u0000", s.scan(/\0/)
|
||||
|
||||
tmp = s.scan(/\w+/)
|
||||
assert_equal 'strc', tmp
|
||||
|
@ -312,11 +312,14 @@ module StringScannerTests
|
|||
end
|
||||
|
||||
def test_scan_string
|
||||
s = create_string_scanner('stra strb strc')
|
||||
s = create_string_scanner("stra strb\0strc")
|
||||
assert_equal 'str', s.scan('str')
|
||||
assert_equal 'str', s[0]
|
||||
assert_equal 3, s.pos
|
||||
assert_equal 'a ', s.scan('a ')
|
||||
assert_equal 'strb', s.scan('strb')
|
||||
assert_equal "\u0000", s.scan("\0")
|
||||
assert_equal 'strc', s.scan('strc')
|
||||
|
||||
str = 'stra strb strc'.dup
|
||||
s = create_string_scanner(str, false)
|
||||
|
@ -668,13 +671,47 @@ module StringScannerTests
|
|||
assert_equal(nil, s.exist?(/e/))
|
||||
end
|
||||
|
||||
def test_exist_p_string
|
||||
def test_exist_p_invalid_argument
|
||||
s = create_string_scanner("test string")
|
||||
assert_raise(TypeError) do
|
||||
s.exist?(" ")
|
||||
s.exist?(1)
|
||||
end
|
||||
end
|
||||
|
||||
def test_exist_p_string
|
||||
omit("not implemented on TruffleRuby") if RUBY_ENGINE == "truffleruby"
|
||||
s = create_string_scanner("test string")
|
||||
assert_equal(3, s.exist?("s"))
|
||||
assert_equal(0, s.pos)
|
||||
s.scan("test")
|
||||
assert_equal(2, s.exist?("s"))
|
||||
assert_equal(4, s.pos)
|
||||
assert_equal(nil, s.exist?("e"))
|
||||
end
|
||||
|
||||
def test_scan_until
|
||||
s = create_string_scanner("Foo Bar\0Baz")
|
||||
assert_equal("Foo", s.scan_until(/Foo/))
|
||||
assert_equal(3, s.pos)
|
||||
assert_equal(" Bar", s.scan_until(/Bar/))
|
||||
assert_equal(7, s.pos)
|
||||
assert_equal(nil, s.skip_until(/Qux/))
|
||||
assert_equal("\u0000Baz", s.scan_until(/Baz/))
|
||||
assert_equal(11, s.pos)
|
||||
end
|
||||
|
||||
def test_scan_until_string
|
||||
omit("not implemented on TruffleRuby") if RUBY_ENGINE == "truffleruby"
|
||||
s = create_string_scanner("Foo Bar\0Baz")
|
||||
assert_equal("Foo", s.scan_until("Foo"))
|
||||
assert_equal(3, s.pos)
|
||||
assert_equal(" Bar", s.scan_until("Bar"))
|
||||
assert_equal(7, s.pos)
|
||||
assert_equal(nil, s.skip_until("Qux"))
|
||||
assert_equal("\u0000Baz", s.scan_until("Baz"))
|
||||
assert_equal(11, s.pos)
|
||||
end
|
||||
|
||||
def test_skip_until
|
||||
s = create_string_scanner("Foo Bar Baz")
|
||||
assert_equal(3, s.skip_until(/Foo/))
|
||||
|
@ -684,6 +721,16 @@ module StringScannerTests
|
|||
assert_equal(nil, s.skip_until(/Qux/))
|
||||
end
|
||||
|
||||
def test_skip_until_string
|
||||
omit("not implemented on TruffleRuby") if RUBY_ENGINE == "truffleruby"
|
||||
s = create_string_scanner("Foo Bar Baz")
|
||||
assert_equal(3, s.skip_until("Foo"))
|
||||
assert_equal(3, s.pos)
|
||||
assert_equal(4, s.skip_until("Bar"))
|
||||
assert_equal(7, s.pos)
|
||||
assert_equal(nil, s.skip_until("Qux"))
|
||||
end
|
||||
|
||||
def test_check_until
|
||||
s = create_string_scanner("Foo Bar Baz")
|
||||
assert_equal("Foo", s.check_until(/Foo/))
|
||||
|
@ -693,6 +740,16 @@ module StringScannerTests
|
|||
assert_equal(nil, s.check_until(/Qux/))
|
||||
end
|
||||
|
||||
def test_check_until_string
|
||||
omit("not implemented on TruffleRuby") if RUBY_ENGINE == "truffleruby"
|
||||
s = create_string_scanner("Foo Bar Baz")
|
||||
assert_equal("Foo", s.check_until("Foo"))
|
||||
assert_equal(0, s.pos)
|
||||
assert_equal("Foo Bar", s.check_until("Bar"))
|
||||
assert_equal(0, s.pos)
|
||||
assert_equal(nil, s.check_until("Qux"))
|
||||
end
|
||||
|
||||
def test_search_full
|
||||
s = create_string_scanner("Foo Bar Baz")
|
||||
assert_equal(8, s.search_full(/Bar /, false, false))
|
||||
|
@ -705,6 +762,19 @@ module StringScannerTests
|
|||
assert_equal(11, s.pos)
|
||||
end
|
||||
|
||||
def test_search_full_string
|
||||
omit("not implemented on TruffleRuby") if RUBY_ENGINE == "truffleruby"
|
||||
s = create_string_scanner("Foo Bar Baz")
|
||||
assert_equal(8, s.search_full("Bar ", false, false))
|
||||
assert_equal(0, s.pos)
|
||||
assert_equal("Foo Bar ", s.search_full("Bar ", false, true))
|
||||
assert_equal(0, s.pos)
|
||||
assert_equal(8, s.search_full("Bar ", true, false))
|
||||
assert_equal(8, s.pos)
|
||||
assert_equal("Baz", s.search_full("az", true, true))
|
||||
assert_equal(11, s.pos)
|
||||
end
|
||||
|
||||
def test_peek
|
||||
s = create_string_scanner("test string")
|
||||
assert_equal("test st", s.peek(7))
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue