YJIT: Move RefCell one level down

This is the second part of making YJIT work with parallel GC.

During GC, `rb_yjit_iseq_mark` and `rb_yjit_iseq_update_references` need
to resolve offsets in `Block::gc_obj_offsets` into absolute addresses
before reading or updating the fields.  This needs the base address
stored in `VirtualMemory::region_start` which was previously behind a
`RefCell`.  When multiple GC threads scan multiple iseq simultaneously
(which is possible for some GC modules such as MMTk), it will panic
because the `RefCell` is already borrowed.

We notice that some fields of `VirtualMemory`, such as `region_start`,
are never modified once `VirtualMemory` is constructed.  We change the
type of the field `CodeBlock::mem_block` from `Rc<RefCell<T>>` to
`Rc<T>`, and push the `RefCell` into `VirtualMemory`.  We extract
mutable fields of `VirtualMemory` into a dedicated struct
`VirtualMemoryMut`, and store them in a field `VirtualMemory::mutable`
which is a `RefCell<VirtualMemoryMut>`.  After this change, methods that
access immutable fields in `VirtualMemory`, particularly `base_ptr()`
which reads `region_start`, will no longer need to borrow any `RefCell`.
Methods that access mutable fields will need to borrow
`VirtualMemory::mutable`, but the number of borrowing operations becomes
strictly fewer than before because borrowing operations previously done
in callers (such as `CodeBlock::write_mem`) are moved into methods of
`VirtualMemory` (such as `VirtualMemory::write_bytes`).
This commit is contained in:
Kunshan Wang 2025-07-10 16:55:53 +08:00 committed by Alan Wu
parent 51a3ea5ade
commit 3a47f4eacf
3 changed files with 68 additions and 54 deletions

View file

@ -1,4 +1,3 @@
use std::cell::RefCell;
use std::fmt;
use std::mem;
use std::rc::Rc;
@ -44,7 +43,7 @@ pub struct LabelRef {
/// Block of memory into which instructions can be assembled
pub struct CodeBlock {
// Memory for storing the encoded instructions
mem_block: Rc<RefCell<VirtualMem>>,
mem_block: Rc<VirtualMem>,
// Size of a code page in bytes. Each code page is split into an inlined and an outlined portion.
// Code GC collects code memory at this granularity.
@ -107,16 +106,16 @@ impl CodeBlock {
const PREFERRED_CODE_PAGE_SIZE: usize = 16 * 1024;
/// Make a new CodeBlock
pub fn new(mem_block: Rc<RefCell<VirtualMem>>, outlined: bool, freed_pages: Rc<Option<Vec<usize>>>, keep_comments: bool) -> Self {
pub fn new(mem_block: Rc<VirtualMem>, outlined: bool, freed_pages: Rc<Option<Vec<usize>>>, keep_comments: bool) -> Self {
// Pick the code page size
let system_page_size = mem_block.borrow().system_page_size();
let system_page_size = mem_block.system_page_size();
let page_size = if 0 == Self::PREFERRED_CODE_PAGE_SIZE % system_page_size {
Self::PREFERRED_CODE_PAGE_SIZE
} else {
system_page_size
};
let mem_size = mem_block.borrow().virtual_region_size();
let mem_size = mem_block.virtual_region_size();
let mut cb = Self {
mem_block,
mem_size,
@ -238,9 +237,9 @@ impl CodeBlock {
}
// Free the grouped pages at once
let start_ptr = self.mem_block.borrow().start_ptr().add_bytes(page_idx * self.page_size);
let start_ptr = self.mem_block.start_ptr().add_bytes(page_idx * self.page_size);
let batch_size = self.page_size * batch_idxs.len();
self.mem_block.borrow_mut().free_bytes(start_ptr, batch_size as u32);
self.mem_block.free_bytes(start_ptr, batch_size as u32);
}
}
@ -249,13 +248,13 @@ impl CodeBlock {
}
pub fn mapped_region_size(&self) -> usize {
self.mem_block.borrow().mapped_region_size()
self.mem_block.mapped_region_size()
}
/// Size of the region in bytes where writes could be attempted.
#[cfg(target_arch = "aarch64")]
pub fn virtual_region_size(&self) -> usize {
self.mem_block.borrow().virtual_region_size()
self.mem_block.virtual_region_size()
}
/// Return the number of code pages that have been mapped by the VirtualMemory.
@ -267,7 +266,7 @@ impl CodeBlock {
/// Return the number of code pages that have been reserved by the VirtualMemory.
pub fn num_virtual_pages(&self) -> usize {
let virtual_region_size = self.mem_block.borrow().virtual_region_size();
let virtual_region_size = self.mem_block.virtual_region_size();
// CodeBlock's page size != VirtualMem's page size on Linux,
// so mapped_region_size % self.page_size may not be 0
((virtual_region_size - 1) / self.page_size) + 1
@ -409,7 +408,7 @@ impl CodeBlock {
}
pub fn write_mem(&self, write_ptr: CodePtr, byte: u8) -> Result<(), WriteError> {
self.mem_block.borrow_mut().write_byte(write_ptr, byte)
self.mem_block.write_byte(write_ptr, byte)
}
// Set the current write position
@ -423,19 +422,19 @@ impl CodeBlock {
// Set the current write position from a pointer
pub fn set_write_ptr(&mut self, code_ptr: CodePtr) {
let pos = code_ptr.as_offset() - self.mem_block.borrow().start_ptr().as_offset();
let pos = code_ptr.as_offset() - self.mem_block.start_ptr().as_offset();
self.set_pos(pos.try_into().unwrap());
}
/// Get a (possibly dangling) direct pointer into the executable memory block
pub fn get_ptr(&self, offset: usize) -> CodePtr {
self.mem_block.borrow().start_ptr().add_bytes(offset)
self.mem_block.start_ptr().add_bytes(offset)
}
/// Convert an address range to memory page indexes against a num_pages()-sized array.
pub fn addrs_to_pages(&self, start_addr: CodePtr, end_addr: CodePtr) -> impl Iterator<Item = usize> {
let mem_start = self.mem_block.borrow().start_ptr().raw_addr(self);
let mem_end = self.mem_block.borrow().mapped_end_ptr().raw_addr(self);
let mem_start = self.mem_block.start_ptr().raw_addr(self);
let mem_end = self.mem_block.mapped_end_ptr().raw_addr(self);
assert!(mem_start <= start_addr.raw_addr(self));
assert!(start_addr.raw_addr(self) <= end_addr.raw_addr(self));
assert!(end_addr.raw_addr(self) <= mem_end);
@ -458,7 +457,7 @@ impl CodeBlock {
/// Write a single byte at the current position.
pub fn write_byte(&mut self, byte: u8) {
let write_ptr = self.get_write_ptr();
if self.has_capacity(1) && self.mem_block.borrow_mut().write_byte(write_ptr, byte).is_ok() {
if self.has_capacity(1) && self.mem_block.write_byte(write_ptr, byte).is_ok() {
self.write_pos += 1;
} else {
self.dropped_bytes = true;
@ -591,11 +590,11 @@ impl CodeBlock {
}
pub fn mark_all_writeable(&mut self) {
self.mem_block.borrow_mut().mark_all_writeable();
self.mem_block.mark_all_writeable();
}
pub fn mark_all_executable(&mut self) {
self.mem_block.borrow_mut().mark_all_executable();
self.mem_block.mark_all_executable();
}
/// Code GC. Free code pages that are not on stack and reuse them.
@ -693,7 +692,7 @@ impl CodeBlock {
let mem_start: *const u8 = alloc.mem_start();
let virt_mem = VirtualMem::new(alloc, 1, NonNull::new(mem_start as *mut u8).unwrap(), mem_size, 128 * 1024 * 1024);
Self::new(Rc::new(RefCell::new(virt_mem)), false, Rc::new(None), true)
Self::new(Rc::new(virt_mem), false, Rc::new(None), true)
}
/// Stubbed CodeBlock for testing conditions that can arise due to code GC. Can't execute generated code.
@ -711,7 +710,7 @@ impl CodeBlock {
let mem_start: *const u8 = alloc.mem_start();
let virt_mem = VirtualMem::new(alloc, 1, NonNull::new(mem_start as *mut u8).unwrap(), mem_size, 128 * 1024 * 1024);
Self::new(Rc::new(RefCell::new(virt_mem)), false, Rc::new(Some(freed_pages)), true)
Self::new(Rc::new(virt_mem), false, Rc::new(Some(freed_pages)), true)
}
}
@ -719,7 +718,7 @@ impl CodeBlock {
impl fmt::LowerHex for CodeBlock {
fn fmt(&self, fmtr: &mut fmt::Formatter) -> fmt::Result {
for pos in 0..self.write_pos {
let mem_block = &*self.mem_block.borrow();
let mem_block = &*self.mem_block;
let byte = unsafe { mem_block.start_ptr().raw_ptr(mem_block).add(pos).read() };
fmtr.write_fmt(format_args!("{:02x}", byte))?;
}
@ -729,7 +728,7 @@ impl fmt::LowerHex for CodeBlock {
impl crate::virtualmem::CodePtrBase for CodeBlock {
fn base_ptr(&self) -> std::ptr::NonNull<u8> {
self.mem_block.borrow().base_ptr()
self.mem_block.base_ptr()
}
}

View file

@ -11037,7 +11037,7 @@ impl CodegenGlobals {
exec_mem_size,
get_option!(mem_size),
);
let mem_block = Rc::new(RefCell::new(mem_block));
let mem_block = Rc::new(mem_block);
let freed_pages = Rc::new(None);

View file

@ -3,7 +3,7 @@
// usize->pointer casts is viable. It seems like a lot of work for us to participate for not much
// benefit.
use std::ptr::NonNull;
use std::{cell::RefCell, ptr::NonNull};
use crate::{backend::ir::Target, stats::yjit_alloc_size, utils::IntoUsize};
@ -36,6 +36,12 @@ pub struct VirtualMemory<A: Allocator> {
/// granularity.
page_size_bytes: usize,
/// Mutable parts.
mutable: RefCell<VirtualMemoryMut<A>>,
}
/// Mutable parts of [`VirtualMemory`].
pub struct VirtualMemoryMut<A: Allocator> {
/// Number of bytes that have we have allocated physical memory for starting at
/// [Self::region_start].
mapped_region_bytes: usize,
@ -124,9 +130,11 @@ impl<A: Allocator> VirtualMemory<A> {
region_size_bytes,
memory_limit_bytes,
page_size_bytes,
mapped_region_bytes: 0,
current_write_page: None,
allocator,
mutable: RefCell::new(VirtualMemoryMut {
mapped_region_bytes: 0,
current_write_page: None,
allocator,
}),
}
}
@ -137,7 +145,7 @@ impl<A: Allocator> VirtualMemory<A> {
}
pub fn mapped_end_ptr(&self) -> CodePtr {
self.start_ptr().add_bytes(self.mapped_region_bytes)
self.start_ptr().add_bytes(self.mutable.borrow().mapped_region_bytes)
}
pub fn virtual_end_ptr(&self) -> CodePtr {
@ -146,7 +154,7 @@ impl<A: Allocator> VirtualMemory<A> {
/// Size of the region in bytes that we have allocated physical memory for.
pub fn mapped_region_size(&self) -> usize {
self.mapped_region_bytes
self.mutable.borrow().mapped_region_bytes
}
/// Size of the region in bytes where writes could be attempted.
@ -161,19 +169,21 @@ impl<A: Allocator> VirtualMemory<A> {
}
/// Write a single byte. The first write to a page makes it readable.
pub fn write_byte(&mut self, write_ptr: CodePtr, byte: u8) -> Result<(), WriteError> {
pub fn write_byte(&self, write_ptr: CodePtr, byte: u8) -> Result<(), WriteError> {
let mut mutable = self.mutable.borrow_mut();
let page_size = self.page_size_bytes;
let raw: *mut u8 = write_ptr.raw_ptr(self) as *mut u8;
let page_addr = (raw as usize / page_size) * page_size;
if self.current_write_page == Some(page_addr) {
if mutable.current_write_page == Some(page_addr) {
// Writing within the last written to page, nothing to do
} else {
// Switching to a different and potentially new page
let start = self.region_start.as_ptr();
let mapped_region_end = start.wrapping_add(self.mapped_region_bytes);
let mapped_region_end = start.wrapping_add(mutable.mapped_region_bytes);
let whole_region_end = start.wrapping_add(self.region_size_bytes);
let alloc = &mut self.allocator;
let alloc = &mut mutable.allocator;
assert!((start..=whole_region_end).contains(&mapped_region_end));
@ -185,7 +195,7 @@ impl<A: Allocator> VirtualMemory<A> {
return Err(FailedPageMapping);
}
self.current_write_page = Some(page_addr);
mutable.current_write_page = Some(page_addr);
} else if (start..whole_region_end).contains(&raw) &&
(page_addr + page_size - start as usize) + yjit_alloc_size() < self.memory_limit_bytes {
// Writing to a brand new page
@ -217,9 +227,9 @@ impl<A: Allocator> VirtualMemory<A> {
unreachable!("unknown arch");
}
}
self.mapped_region_bytes = self.mapped_region_bytes + alloc_size;
mutable.mapped_region_bytes = mutable.mapped_region_bytes + alloc_size;
self.current_write_page = Some(page_addr);
mutable.current_write_page = Some(page_addr);
} else {
return Err(OutOfBounds);
}
@ -233,14 +243,16 @@ impl<A: Allocator> VirtualMemory<A> {
/// Make all the code in the region writeable.
/// Call this during GC before the phase of updating reference fields.
pub fn mark_all_writeable(&mut self) {
self.current_write_page = None;
pub fn mark_all_writeable(&self) {
let mut mutable = self.mutable.borrow_mut();
mutable.current_write_page = None;
let region_start = self.region_start;
let mapped_region_bytes: u32 = self.mapped_region_bytes.try_into().unwrap();
let mapped_region_bytes: u32 = mutable.mapped_region_bytes.try_into().unwrap();
// Make mapped region executable
if !self.allocator.mark_writable(region_start.as_ptr(), mapped_region_bytes) {
if !mutable.allocator.mark_writable(region_start.as_ptr(), mapped_region_bytes) {
panic!("Cannot make memory region writable: {:?}-{:?}",
region_start.as_ptr(),
unsafe { region_start.as_ptr().add(mapped_region_bytes as usize)}
@ -250,18 +262,20 @@ impl<A: Allocator> VirtualMemory<A> {
/// Make all the code in the region executable. Call this at the end of a write session.
/// See [Self] for usual usage flow.
pub fn mark_all_executable(&mut self) {
self.current_write_page = None;
pub fn mark_all_executable(&self) {
let mut mutable = self.mutable.borrow_mut();
mutable.current_write_page = None;
let region_start = self.region_start;
let mapped_region_bytes: u32 = self.mapped_region_bytes.try_into().unwrap();
let mapped_region_bytes: u32 = mutable.mapped_region_bytes.try_into().unwrap();
// Make mapped region executable
self.allocator.mark_executable(region_start.as_ptr(), mapped_region_bytes);
mutable.allocator.mark_executable(region_start.as_ptr(), mapped_region_bytes);
}
/// Free a range of bytes. start_ptr must be memory page-aligned.
pub fn free_bytes(&mut self, start_ptr: CodePtr, size: u32) {
pub fn free_bytes(&self, start_ptr: CodePtr, size: u32) {
assert_eq!(start_ptr.raw_ptr(self) as usize % self.page_size_bytes, 0);
// Bounds check the request. We should only free memory we manage.
@ -274,7 +288,8 @@ impl<A: Allocator> VirtualMemory<A> {
// code page, it's more appropriate to check the last byte against the virtual region.
assert!(virtual_region.contains(&last_byte_to_free));
self.allocator.mark_unused(start_ptr.raw_ptr(self), size);
let mut mutable = self.mutable.borrow_mut();
mutable.allocator.mark_unused(start_ptr.raw_ptr(self), size);
}
}
@ -403,11 +418,11 @@ pub mod tests {
#[test]
#[cfg(target_arch = "x86_64")]
fn new_memory_is_initialized() {
let mut virt = new_dummy_virt_mem();
let virt = new_dummy_virt_mem();
virt.write_byte(virt.start_ptr(), 1).unwrap();
assert!(
virt.allocator.memory[..PAGE_SIZE].iter().all(|&byte| byte != 0),
virt.mutable.borrow().allocator.memory[..PAGE_SIZE].iter().all(|&byte| byte != 0),
"Entire page should be initialized",
);
@ -415,21 +430,21 @@ pub mod tests {
let three_pages = 3 * PAGE_SIZE;
virt.write_byte(virt.start_ptr().add_bytes(three_pages), 1).unwrap();
assert!(
virt.allocator.memory[..three_pages].iter().all(|&byte| byte != 0),
virt.mutable.borrow().allocator.memory[..three_pages].iter().all(|&byte| byte != 0),
"Gaps between write requests should be filled",
);
}
#[test]
fn no_redundant_syscalls_when_writing_to_the_same_page() {
let mut virt = new_dummy_virt_mem();
let virt = new_dummy_virt_mem();
virt.write_byte(virt.start_ptr(), 1).unwrap();
virt.write_byte(virt.start_ptr(), 0).unwrap();
assert!(
matches!(
virt.allocator.requests[..],
virt.mutable.borrow().allocator.requests[..],
[MarkWritable { start_idx: 0, length: PAGE_SIZE }],
)
);
@ -438,7 +453,7 @@ pub mod tests {
#[test]
fn bounds_checking() {
use super::WriteError::*;
let mut virt = new_dummy_virt_mem();
let virt = new_dummy_virt_mem();
let one_past_end = virt.start_ptr().add_bytes(virt.virtual_region_size());
assert_eq!(Err(OutOfBounds), virt.write_byte(one_past_end, 0));
@ -451,7 +466,7 @@ pub mod tests {
fn only_written_to_regions_become_executable() {
// ... so we catch attempts to read/write/execute never-written-to regions
const THREE_PAGES: usize = PAGE_SIZE * 3;
let mut virt = new_dummy_virt_mem();
let virt = new_dummy_virt_mem();
let page_two_start = virt.start_ptr().add_bytes(PAGE_SIZE * 2);
virt.write_byte(page_two_start, 1).unwrap();
virt.mark_all_executable();
@ -459,7 +474,7 @@ pub mod tests {
assert!(virt.virtual_region_size() > THREE_PAGES);
assert!(
matches!(
virt.allocator.requests[..],
virt.mutable.borrow().allocator.requests[..],
[
MarkWritable { start_idx: 0, length: THREE_PAGES },
MarkExecutable { start_idx: 0, length: THREE_PAGES },