ZJIT: Create delta debugging script to narrow JIT failures (#14041)

Add support for `--zjit-allowed-iseqs=SomeFile` and
`--zjit-log-compiled-iseqs=SomeFile` so we can restrict and inspect
which ISEQs get compiled.

Then add `jit_bisect.rb` which we can run to try and narrow a failing
script. For example:

    plum% ../tool/zjit_bisect.rb ../build-dev/miniruby "test.rb"
    I, [2025-07-29T12:41:18.657177 #96899]  INFO -- : Starting with JIT list of 4 items.
    I, [2025-07-29T12:41:18.657229 #96899]  INFO -- : Verifying items
    I, [2025-07-29T12:41:18.726213 #96899]  INFO -- : step fixed[0] and items[4]
    I, [2025-07-29T12:41:18.726246 #96899]  INFO -- : 4 candidates
    I, [2025-07-29T12:41:18.797212 #96899]  INFO -- : 2 candidates
    Reduced JIT list:
    bar@test.rb:8
    plum%

We start with 4 compiled functions and shrink to just one.
This commit is contained in:
Max Bernstein 2025-07-29 12:43:14 -07:00 committed by GitHub
parent b07e214bf1
commit 039f4139f8
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 182 additions and 2 deletions

97
tool/zjit_bisect.rb Executable file
View file

@ -0,0 +1,97 @@
#!/usr/bin/env ruby
require 'logger'
require 'open3'
require 'tempfile'
require 'timeout'
RUBY = ARGV[0] || raise("Usage: ruby jit_bisect.rb <path_to_ruby> <options>")
OPTIONS = ARGV[1] || raise("Usage: ruby jit_bisect.rb <path_to_ruby> <options>")
TIMEOUT_SEC = 5
LOGGER = Logger.new($stdout)
# From https://github.com/tekknolagi/omegastar
# MIT License
# Copyright (c) 2024 Maxwell Bernstein and Meta Platforms
# Attempt to reduce the `items` argument as much as possible, returning the
# shorter version. `fixed` will always be used as part of the items when
# running `command`.
# `command` should return True if the command succeeded (the failure did not
# reproduce) and False if the command failed (the failure reproduced).
def bisect_impl(command, fixed, items, indent="")
LOGGER.info("#{indent}step fixed[#{fixed.length}] and items[#{items.length}]")
while items.length > 1
LOGGER.info("#{indent}#{fixed.length + items.length} candidates")
# Return two halves of the given list. For odd-length lists, the second
# half will be larger.
half = items.length / 2
left = items[0...half]
right = items[half..]
if !command.call(fixed + left)
items = left
next
end
if !command.call(fixed + right)
items = right
next
end
# We need something from both halves to trigger the failure. Try
# holding each half fixed and bisecting the other half to reduce the
# candidates.
new_right = bisect_impl(command, fixed + left, right, indent + "< ")
new_left = bisect_impl(command, fixed + new_right, left, indent + "> ")
return new_left + new_right
end
items
end
# From https://github.com/tekknolagi/omegastar
# MIT License
# Copyright (c) 2024 Maxwell Bernstein and Meta Platforms
def run_bisect(command, items)
LOGGER.info("Verifying items")
if command.call(items)
raise StandardError.new("Command succeeded with full items")
end
if !command.call([])
raise StandardError.new("Command failed with empty items")
end
bisect_impl(command, [], items)
end
def run_with_jit_list(ruby, options, jit_list)
# Make a new temporary file containing the JIT list
Tempfile.create("jit_list") do |temp_file|
temp_file.write(jit_list.join("\n"))
temp_file.flush
temp_file.close
# Run the JIT with the temporary file
Open3.capture3("#{ruby} --zjit-allowed-iseqs=#{temp_file.path} #{options}")
end
end
# Try running with no JIT list to get a stable baseline
_, stderr, status = run_with_jit_list(RUBY, OPTIONS, [])
if !status.success?
raise "Command failed with empty JIT list: #{stderr}"
end
# Collect the JIT list from the failing Ruby process
jit_list = nil
Tempfile.create "jit_list" do |temp_file|
Open3.capture3("#{RUBY} --zjit-log-compiled-iseqs=#{temp_file.path} #{OPTIONS}")
jit_list = File.readlines(temp_file.path).map(&:strip).reject(&:empty?)
end
LOGGER.info("Starting with JIT list of #{jit_list.length} items.")
# Now narrow it down
command = lambda do |items|
status = Timeout.timeout(TIMEOUT_SEC) do
_, _, status = run_with_jit_list(RUBY, OPTIONS, items)
status
end
status.success?
end
result = run_bisect(command, jit_list)
File.open("jitlist.txt", "w") do |file|
file.puts(result)
end
puts "Reduced JIT list (available in jitlist.txt):"
puts result

View file

@ -284,6 +284,10 @@ fn gen_function(cb: &mut CodeBlock, iseq: IseqPtr, function: &Function) -> Optio
let iseq_name = iseq_get_location(iseq, 0); let iseq_name = iseq_get_location(iseq, 0);
register_with_perf(iseq_name, start_usize, code_size); register_with_perf(iseq_name, start_usize, code_size);
} }
if ZJITState::should_log_compiled_iseqs() {
let iseq_name = iseq_get_location(iseq, 0);
ZJITState::log_compile(iseq_name);
}
} }
result result
} }

View file

@ -2480,6 +2480,7 @@ pub enum ParseError {
UnknownParameterType(ParameterType), UnknownParameterType(ParameterType),
MalformedIseq(u32), // insn_idx into iseq_encoded MalformedIseq(u32), // insn_idx into iseq_encoded
Validation(ValidationError), Validation(ValidationError),
NotAllowed,
} }
/// Return the number of locals in the current ISEQ (includes parameters) /// Return the number of locals in the current ISEQ (includes parameters)
@ -2545,6 +2546,9 @@ fn filter_unknown_parameter_type(iseq: *const rb_iseq_t) -> Result<(), ParseErro
/// Compile ISEQ into High-level IR /// Compile ISEQ into High-level IR
pub fn iseq_to_hir(iseq: *const rb_iseq_t) -> Result<Function, ParseError> { pub fn iseq_to_hir(iseq: *const rb_iseq_t) -> Result<Function, ParseError> {
if !ZJITState::can_compile_iseq(iseq) {
return Err(ParseError::NotAllowed);
}
filter_unknown_parameter_type(iseq)?; filter_unknown_parameter_type(iseq)?;
let payload = get_or_create_iseq_payload(iseq); let payload = get_or_create_iseq_payload(iseq);
let mut profiles = ProfileOracle::new(payload); let mut profiles = ProfileOracle::new(payload);

View file

@ -1,6 +1,7 @@
use std::{ffi::{CStr, CString}, ptr::null}; use std::{ffi::{CStr, CString}, ptr::null};
use std::os::raw::{c_char, c_int, c_uint}; use std::os::raw::{c_char, c_int, c_uint};
use crate::cruby::*; use crate::cruby::*;
use std::collections::HashSet;
/// Number of calls to start profiling YARV instructions. /// Number of calls to start profiling YARV instructions.
/// They are profiled `rb_zjit_call_threshold - rb_zjit_profile_threshold` times, /// They are profiled `rb_zjit_call_threshold - rb_zjit_profile_threshold` times,
@ -19,7 +20,7 @@ pub static mut rb_zjit_call_threshold: u64 = 2;
#[allow(non_upper_case_globals)] #[allow(non_upper_case_globals)]
static mut zjit_stats_enabled_p: bool = false; static mut zjit_stats_enabled_p: bool = false;
#[derive(Clone, Copy, Debug)] #[derive(Clone, Debug)]
pub struct Options { pub struct Options {
/// Number of times YARV instructions should be profiled. /// Number of times YARV instructions should be profiled.
pub num_profiles: u8, pub num_profiles: u8,
@ -44,6 +45,12 @@ pub struct Options {
/// Dump code map to /tmp for performance profilers. /// Dump code map to /tmp for performance profilers.
pub perf: bool, pub perf: bool,
/// List of ISEQs that can be compiled, identified by their iseq_get_location()
pub allowed_iseqs: Option<HashSet<String>>,
/// Path to a file where compiled ISEQs will be saved.
pub log_compiled_iseqs: Option<String>,
} }
/// Return an Options with default values /// Return an Options with default values
@ -57,6 +64,8 @@ pub fn init_options() -> Options {
dump_lir: false, dump_lir: false,
dump_disasm: false, dump_disasm: false,
perf: false, perf: false,
allowed_iseqs: None,
log_compiled_iseqs: None,
} }
} }
@ -67,6 +76,8 @@ pub const ZJIT_OPTIONS: &'static [(&str, &str)] = &[
("--zjit-num-profiles=num", "Number of profiled calls before JIT (default: 1, max: 255)."), ("--zjit-num-profiles=num", "Number of profiled calls before JIT (default: 1, max: 255)."),
("--zjit-stats", "Enable collecting ZJIT statistics."), ("--zjit-stats", "Enable collecting ZJIT statistics."),
("--zjit-perf", "Dump ISEQ symbols into /tmp/perf-{}.map for Linux perf."), ("--zjit-perf", "Dump ISEQ symbols into /tmp/perf-{}.map for Linux perf."),
("--zjit-log-compiled-iseqs=path",
"Log compiled ISEQs to the file. The file will be truncated."),
]; ];
#[derive(Clone, Copy, Debug)] #[derive(Clone, Copy, Debug)]
@ -108,6 +119,26 @@ pub extern "C" fn rb_zjit_parse_option(options: *const u8, str_ptr: *const c_cha
parse_option(options, str_ptr).is_some() parse_option(options, str_ptr).is_some()
} }
fn parse_jit_list(path_like: &str) -> HashSet<String> {
// Read lines from the file
let mut result = HashSet::new();
if let Ok(lines) = std::fs::read_to_string(path_like) {
for line in lines.lines() {
let trimmed = line.trim();
if !trimmed.is_empty() {
result.insert(trimmed.to_string());
}
}
} else {
eprintln!("Failed to read JIT list from '{}'", path_like);
}
eprintln!("JIT list:");
for item in &result {
eprintln!(" {}", item);
}
result
}
/// Expected to receive what comes after the third dash in "--zjit-*". /// Expected to receive what comes after the third dash in "--zjit-*".
/// Empty string means user passed only "--zjit". C code rejects when /// Empty string means user passed only "--zjit". C code rejects when
/// they pass exact "--zjit-". /// they pass exact "--zjit-".
@ -165,6 +196,19 @@ fn parse_option(options: &mut Options, str_ptr: *const std::os::raw::c_char) ->
("perf", "") => options.perf = true, ("perf", "") => options.perf = true,
("allowed-iseqs", _) if opt_val != "" => options.allowed_iseqs = Some(parse_jit_list(opt_val)),
("log-compiled-iseqs", _) if opt_val != "" => {
// Truncate the file if it exists
std::fs::OpenOptions::new()
.create(true)
.write(true)
.truncate(true)
.open(opt_val)
.map_err(|e| eprintln!("Failed to open file '{}': {}", opt_val, e))
.ok();
options.log_compiled_iseqs = Some(opt_val.into());
}
_ => return None, // Option name not recognized _ => return None, // Option name not recognized
} }

View file

@ -136,6 +136,38 @@ impl ZJITState {
pub fn get_counters() -> &'static mut Counters { pub fn get_counters() -> &'static mut Counters {
&mut ZJITState::get_instance().counters &mut ZJITState::get_instance().counters
} }
/// Was --zjit-save-compiled-iseqs specified?
pub fn should_log_compiled_iseqs() -> bool {
ZJITState::get_instance().options.log_compiled_iseqs.is_some()
}
/// Log the name of a compiled ISEQ to the file specified in options.log_compiled_iseqs
pub fn log_compile(iseq_name: String) {
assert!(ZJITState::should_log_compiled_iseqs());
let filename = ZJITState::get_instance().options.log_compiled_iseqs.as_ref().unwrap();
use std::io::Write;
let mut file = match std::fs::OpenOptions::new().create(true).append(true).open(filename) {
Ok(f) => f,
Err(e) => {
eprintln!("ZJIT: Failed to create file '{}': {}", filename, e);
return;
}
};
if let Err(e) = writeln!(file, "{}", iseq_name) {
eprintln!("ZJIT: Failed to write to file '{}': {}", filename, e);
}
}
/// Check if we are allowed to compile a given ISEQ based on --zjit-allowed-iseqs
pub fn can_compile_iseq(iseq: cruby::IseqPtr) -> bool {
if let Some(ref allowed_iseqs) = ZJITState::get_instance().options.allowed_iseqs {
let name = cruby::iseq_get_location(iseq, 0);
allowed_iseqs.contains(&name)
} else {
true // If no restrictions, allow all ISEQs
}
}
} }
/// Initialize ZJIT, given options allocated by rb_zjit_init_options() /// Initialize ZJIT, given options allocated by rb_zjit_init_options()
@ -148,7 +180,6 @@ pub extern "C" fn rb_zjit_init(options: *const u8) {
let options = unsafe { Box::from_raw(options as *mut Options) }; let options = unsafe { Box::from_raw(options as *mut Options) };
ZJITState::init(*options); ZJITState::init(*options);
std::mem::drop(options);
rb_bug_panic_hook(); rb_bug_panic_hook();