From 38fe710e08db3d93b6225f9c41c18c672e602d7f Mon Sep 17 00:00:00 2001 From: Alan Wu Date: Thu, 9 Nov 2023 12:57:45 -0500 Subject: [PATCH] YJIT: Invoke PosMarker callbacks only with solid positions Previously, PosMarker callbacks ran even when the assembler failed to assemble its contents due to insufficient space. This was problematic because when Assembler::compile() failed, the callbacks were given positions that have no valid code, contrary to general expectation. For example, we use a PosMarker callback to record VM instruction boundaries and patch in jumps to exits in case the guest program starts tracing, however, previously, we could record a location near the end of the code block, where there is no space to patch in jumps. I suspect this is the cause of the recent occurrences of rare random failures on GitHub Actions with the invariants.rs:529 "can rewrite existing code" message. `--yjit-perf` also uses PosMarker and had a similar issue. Buffer the list of callbacks to fire, and only fire them when all code in the assembler are written out successfully. It's more intuitive this way. --- yjit/src/backend/arm64/mod.rs | 23 ++++++++++++++++++++--- yjit/src/backend/tests.rs | 18 ++++++++++++++++++ yjit/src/backend/x86_64/mod.rs | 23 ++++++++++++++++++++--- 3 files changed, 58 insertions(+), 6 deletions(-) diff --git a/yjit/src/backend/arm64/mod.rs b/yjit/src/backend/arm64/mod.rs index 7b58e115c1..f09a07e571 100644 --- a/yjit/src/backend/arm64/mod.rs +++ b/yjit/src/backend/arm64/mod.rs @@ -819,6 +819,9 @@ impl Assembler // List of GC offsets let mut gc_offsets: Vec = Vec::new(); + // Buffered list of PosMarker callbacks to fire if codegen is successful + let mut pos_markers: Vec<(usize, CodePtr)> = vec![]; + // For each instruction let start_write_pos = cb.get_write_pos(); let mut insn_idx: usize = 0; @@ -838,8 +841,8 @@ impl Assembler cb.write_label(target.unwrap_label_idx()); }, // Report back the current position in the generated code - Insn::PosMarker(pos_marker) => { - pos_marker(cb.get_write_ptr()); + Insn::PosMarker(..) => { + pos_markers.push((insn_idx, cb.get_write_ptr())) } Insn::BakeString(text) => { for byte in text.as_bytes() { @@ -1205,7 +1208,21 @@ impl Assembler } } - Ok(gc_offsets) + // Error if we couldn't write out everything + if cb.has_dropped_bytes() { + return Err(EmitError::OutOfMemory) + } else { + // No bytes dropped, so the pos markers point to valid code + for (insn_idx, pos) in pos_markers { + if let Insn::PosMarker(callback) = self.insns.get(insn_idx).unwrap() { + callback(pos); + } else { + panic!("non-PosMarker in pos_markers insn_idx={insn_idx} {self:?}"); + } + } + + return Ok(gc_offsets) + } } /// Optimize and compile the stored instructions diff --git a/yjit/src/backend/tests.rs b/yjit/src/backend/tests.rs index ad46321ace..3278f33f63 100644 --- a/yjit/src/backend/tests.rs +++ b/yjit/src/backend/tests.rs @@ -310,3 +310,21 @@ fn test_cmp_8_bit() { asm.compile_with_num_regs(&mut cb, 1); } + +#[test] +fn test_no_pos_marker_callback_when_compile_fails() { + // When compilation fails (e.g. when out of memory), the code written out is malformed. + // We don't want to invoke the pos_marker callbacks with positions of malformed code. + let mut asm = Assembler::new(); + + // Markers around code to exhaust memory limit + let fail_if_called = |_code_ptr| panic!("pos_marker callback should not be called"); + asm.pos_marker(fail_if_called); + let zero = asm.load(0.into()); + let sum = asm.add(zero, 500.into()); + asm.store(Opnd::mem(64, SP, 8), sum); + asm.pos_marker(fail_if_called); + + let cb = &mut CodeBlock::new_dummy(8); + assert!(asm.compile(cb, None).is_none(), "should fail due to tiny size limit"); +} diff --git a/yjit/src/backend/x86_64/mod.rs b/yjit/src/backend/x86_64/mod.rs index 7dd54e6c21..8d0f70e133 100644 --- a/yjit/src/backend/x86_64/mod.rs +++ b/yjit/src/backend/x86_64/mod.rs @@ -457,6 +457,9 @@ impl Assembler // List of GC offsets let mut gc_offsets: Vec = Vec::new(); + // Buffered list of PosMarker callbacks to fire if codegen is successful + let mut pos_markers: Vec<(usize, CodePtr)> = vec![]; + // For each instruction let start_write_pos = cb.get_write_pos(); let mut insn_idx: usize = 0; @@ -479,8 +482,8 @@ impl Assembler }, // Report back the current position in the generated code - Insn::PosMarker(pos_marker) => { - pos_marker(cb.get_write_ptr()); + Insn::PosMarker(..) => { + pos_markers.push((insn_idx, cb.get_write_ptr())); }, Insn::BakeString(text) => { @@ -817,7 +820,21 @@ impl Assembler } } - Some(gc_offsets) + // Error if we couldn't write out everything + if cb.has_dropped_bytes() { + return None + } else { + // No bytes dropped, so the pos markers point to valid code + for (insn_idx, pos) in pos_markers { + if let Insn::PosMarker(callback) = self.insns.get(insn_idx).unwrap() { + callback(pos); + } else { + panic!("non-PosMarker in pos_markers insn_idx={insn_idx} {self:?}"); + } + } + + return Some(gc_offsets) + } } /// Optimize and compile the stored instructions