mirror of
https://github.com/ruby/ruby.git
synced 2025-08-15 13:39:04 +02:00

Profiling revealed that we were spending lots of time growing the buffer. Buffer operations is definitely something we want to optimize, but for this specific benchmark what we're interested in is UTF-8 scanning performance. Each iteration of the two scaning benchmark were producing 20MB of JSON, now they only produce 5MB. Now: ``` == Encoding mostly utf8 (5001001 bytes) ruby 3.4.0dev (2024-10-18T19:01:45Z master7be9a333ca
) +YJIT +PRISM [arm64-darwin23] Warming up -------------------------------------- json 35.000 i/100ms oj 36.000 i/100ms rapidjson 10.000 i/100ms Calculating ------------------------------------- json 359.161 (± 1.4%) i/s (2.78 ms/i) - 1.820k in 5.068542s oj 359.699 (± 0.6%) i/s (2.78 ms/i) - 1.800k in 5.004291s rapidjson 99.687 (± 2.0%) i/s (10.03 ms/i) - 500.000 in 5.017321s Comparison: json: 359.2 i/s oj: 359.7 i/s - same-ish: difference falls within error rapidjson: 99.7 i/s - 3.60x slower ```1a338532d2
78 lines
3.4 KiB
Ruby
78 lines
3.4 KiB
Ruby
require "benchmark/ips"
|
|
require "json"
|
|
require "oj"
|
|
require "rapidjson"
|
|
|
|
if ENV["ONLY"]
|
|
RUN = ENV["ONLY"].split(/[,: ]/).map{|x| [x.to_sym, true] }.to_h
|
|
RUN.default = false
|
|
elsif ENV["EXCEPT"]
|
|
RUN = ENV["EXCEPT"].split(/[,: ]/).map{|x| [x.to_sym, false] }.to_h
|
|
RUN.default = true
|
|
else
|
|
RUN = Hash.new(true)
|
|
end
|
|
|
|
def implementations(ruby_obj)
|
|
state = JSON::State.new(JSON.dump_default_options)
|
|
|
|
{
|
|
json_state: ["json (reuse)", proc { state.generate(ruby_obj) }],
|
|
json: ["json", proc { JSON.dump(ruby_obj) }],
|
|
oj: ["oj", proc { Oj.dump(ruby_obj) }],
|
|
rapidjson: ["rapidjson", proc { RapidJSON.dump(ruby_obj) }],
|
|
}
|
|
end
|
|
|
|
def benchmark_encoding(benchmark_name, ruby_obj, check_expected: true, except: [])
|
|
json_output = JSON.dump(ruby_obj)
|
|
puts "== Encoding #{benchmark_name} (#{json_output.bytesize} bytes)"
|
|
|
|
impls = implementations(ruby_obj).select { |name| RUN[name] }
|
|
except.each { |i| impls.delete(i) }
|
|
|
|
Benchmark.ips do |x|
|
|
expected = ::JSON.dump(ruby_obj) if check_expected
|
|
impls.values.each do |name, block|
|
|
begin
|
|
result = block.call
|
|
if check_expected && expected != result
|
|
puts "#{name} does not match expected output. Skipping"
|
|
next
|
|
end
|
|
rescue => error
|
|
puts "#{name} unsupported (#{error})"
|
|
next
|
|
end
|
|
x.report(name, &block)
|
|
end
|
|
x.compare!(order: :baseline)
|
|
end
|
|
puts
|
|
end
|
|
|
|
# On the first two micro benchmarks, the limitting factor is that we have to create a Generator::State object for every
|
|
# call to `JSON.dump`, so we cause 2 allocations per call where alternatives only do one allocation.
|
|
# The performance difference is mostly more time spent in GC because of this extra pressure.
|
|
# If we re-use the same `JSON::State` instance, we're faster than Oj on the array benchmark, and much closer
|
|
# on the Hash one.
|
|
benchmark_encoding "small nested array", [[1,2,3,4,5]]*10
|
|
benchmark_encoding "small hash", { "username" => "jhawthorn", "id" => 123, "event" => "wrote json serializer" }
|
|
|
|
# On these benchmarks we perform well. Either on par or very closely faster/slower
|
|
benchmark_encoding "mixed utf8", ([("a" * 5000) + "€" + ("a" * 5000)] * 500), except: %i(json_state)
|
|
benchmark_encoding "mostly utf8", ([("€" * 3333)] * 500), except: %i(json_state)
|
|
benchmark_encoding "twitter.json", JSON.load_file("#{__dir__}/data/twitter.json"), except: %i(json_state)
|
|
benchmark_encoding "citm_catalog.json", JSON.load_file("#{__dir__}/data/citm_catalog.json"), except: %i(json_state)
|
|
|
|
# This benchmark spent the overwhelming majority of its time in `ruby_dtoa`. We rely on Ruby's implementation
|
|
# which uses a relatively old version of dtoa.c from David M. Gay.
|
|
# Oj is noticeably faster here because it limits the precision of floats, breaking roundtriping. That's not
|
|
# something we should emulate.
|
|
# Since a few years there are now much faster float to string implementations such as Ryu, Dragonbox, etc,
|
|
# but all these are implemented in C++11 or newer, making it hard if not impossible to include them.
|
|
# Short of a pure C99 implementation of these newer algorithms, there isn't much that can be done to match
|
|
# Oj speed without losing precision.
|
|
benchmark_encoding "canada.json", JSON.load_file("#{__dir__}/data/canada.json"), check_expected: false, except: %i(json_state)
|
|
|
|
benchmark_encoding "many #to_json calls", [{Object.new => Object.new, 12 => 54.3, Integer => Float, Time.now => Date.today}] * 20, except: %i(json_state)
|