8199417: Modularize interpreter GC barriers

Reviewed-by: coleenp, rkennke
This commit is contained in:
Erik Österlund 2018-04-11 16:07:42 +02:00
parent 1094fd9927
commit 2a0986b882
77 changed files with 3087 additions and 2434 deletions

View file

@ -26,9 +26,9 @@
#include "jvm.h"
#include "asm/macroAssembler.inline.hpp"
#include "compiler/disassembler.hpp"
#include "gc/shared/cardTable.hpp"
#include "gc/shared/cardTableBarrierSet.hpp"
#include "gc/shared/collectedHeap.inline.hpp"
#include "gc/shared/barrierSet.hpp"
#include "gc/shared/barrierSetAssembler.hpp"
#include "interpreter/interpreter.hpp"
#include "memory/resourceArea.hpp"
#include "memory/universe.hpp"
@ -45,11 +45,6 @@
#include "runtime/stubRoutines.hpp"
#include "utilities/align.hpp"
#include "utilities/macros.hpp"
#if INCLUDE_ALL_GCS
#include "gc/g1/g1BarrierSet.hpp"
#include "gc/g1/g1CardTable.hpp"
#include "gc/g1/heapRegion.hpp"
#endif // INCLUDE_ALL_GCS
#ifdef COMPILER2
#include "opto/intrinsicnode.hpp"
#endif
@ -173,6 +168,24 @@ int MacroAssembler::branch_destination(int inst, int pos) {
return r;
}
void MacroAssembler::resolve_jobject(Register value, Register tmp) {
Label done, not_weak;
br_null(value, false, Assembler::pn, done); // Use NULL as-is.
delayed()->andcc(value, JNIHandles::weak_tag_mask, G0); // Test for jweak
brx(Assembler::zero, true, Assembler::pt, not_weak);
delayed()->nop();
access_load_at(T_OBJECT, IN_ROOT | ON_PHANTOM_OOP_REF,
Address(value, -JNIHandles::weak_tag_value), value, tmp);
verify_oop(value);
br (Assembler::always, true, Assembler::pt, done);
delayed()->nop();
bind(not_weak);
access_load_at(T_OBJECT, IN_ROOT | IN_CONCURRENT_ROOT,
Address(value, 0), value, tmp);
verify_oop(value);
bind(done);
}
void MacroAssembler::null_check(Register reg, int offset) {
if (needs_explicit_null_check((intptr_t)offset)) {
// provoke OS NULL exception if reg = NULL by
@ -658,14 +671,6 @@ void MacroAssembler::ic_call(address entry, bool emit_delay, jint method_index)
}
}
void MacroAssembler::card_table_write(jbyte* byte_map_base,
Register tmp, Register obj) {
srlx(obj, CardTable::card_shift, obj);
assert(tmp != obj, "need separate temp reg");
set((address) byte_map_base, tmp);
stb(G0, tmp, obj);
}
void MacroAssembler::internal_sethi(const AddressLiteral& addrlit, Register d, bool ForceRelocatable) {
address save_pc;
@ -3386,361 +3391,6 @@ void MacroAssembler::reserved_stack_check() {
bind(no_reserved_zone_enabling);
}
///////////////////////////////////////////////////////////////////////////////////
#if INCLUDE_ALL_GCS
static address satb_log_enqueue_with_frame = NULL;
static u_char* satb_log_enqueue_with_frame_end = NULL;
static address satb_log_enqueue_frameless = NULL;
static u_char* satb_log_enqueue_frameless_end = NULL;
static int EnqueueCodeSize = 128 DEBUG_ONLY( + 256); // Instructions?
static void generate_satb_log_enqueue(bool with_frame) {
BufferBlob* bb = BufferBlob::create("enqueue_with_frame", EnqueueCodeSize);
CodeBuffer buf(bb);
MacroAssembler masm(&buf);
#define __ masm.
address start = __ pc();
Register pre_val;
Label refill, restart;
if (with_frame) {
__ save_frame(0);
pre_val = I0; // Was O0 before the save.
} else {
pre_val = O0;
}
int satb_q_index_byte_offset =
in_bytes(JavaThread::satb_mark_queue_offset() +
SATBMarkQueue::byte_offset_of_index());
int satb_q_buf_byte_offset =
in_bytes(JavaThread::satb_mark_queue_offset() +
SATBMarkQueue::byte_offset_of_buf());
assert(in_bytes(SATBMarkQueue::byte_width_of_index()) == sizeof(intptr_t) &&
in_bytes(SATBMarkQueue::byte_width_of_buf()) == sizeof(intptr_t),
"check sizes in assembly below");
__ bind(restart);
// Load the index into the SATB buffer. SATBMarkQueue::_index is a size_t
// so ld_ptr is appropriate.
__ ld_ptr(G2_thread, satb_q_index_byte_offset, L0);
// index == 0?
__ cmp_and_brx_short(L0, G0, Assembler::equal, Assembler::pn, refill);
__ ld_ptr(G2_thread, satb_q_buf_byte_offset, L1);
__ sub(L0, oopSize, L0);
__ st_ptr(pre_val, L1, L0); // [_buf + index] := I0
if (!with_frame) {
// Use return-from-leaf
__ retl();
__ delayed()->st_ptr(L0, G2_thread, satb_q_index_byte_offset);
} else {
// Not delayed.
__ st_ptr(L0, G2_thread, satb_q_index_byte_offset);
}
if (with_frame) {
__ ret();
__ delayed()->restore();
}
__ bind(refill);
address handle_zero =
CAST_FROM_FN_PTR(address,
&SATBMarkQueueSet::handle_zero_index_for_thread);
// This should be rare enough that we can afford to save all the
// scratch registers that the calling context might be using.
__ mov(G1_scratch, L0);
__ mov(G3_scratch, L1);
__ mov(G4, L2);
// We need the value of O0 above (for the write into the buffer), so we
// save and restore it.
__ mov(O0, L3);
// Since the call will overwrite O7, we save and restore that, as well.
__ mov(O7, L4);
__ call_VM_leaf(L5, handle_zero, G2_thread);
__ mov(L0, G1_scratch);
__ mov(L1, G3_scratch);
__ mov(L2, G4);
__ mov(L3, O0);
__ br(Assembler::always, /*annul*/false, Assembler::pt, restart);
__ delayed()->mov(L4, O7);
if (with_frame) {
satb_log_enqueue_with_frame = start;
satb_log_enqueue_with_frame_end = __ pc();
} else {
satb_log_enqueue_frameless = start;
satb_log_enqueue_frameless_end = __ pc();
}
#undef __
}
void MacroAssembler::g1_write_barrier_pre(Register obj,
Register index,
int offset,
Register pre_val,
Register tmp,
bool preserve_o_regs) {
Label filtered;
if (obj == noreg) {
// We are not loading the previous value so make
// sure that we don't trash the value in pre_val
// with the code below.
assert_different_registers(pre_val, tmp);
} else {
// We will be loading the previous value
// in this code so...
assert(offset == 0 || index == noreg, "choose one");
assert(pre_val == noreg, "check this code");
}
// Is marking active?
if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
ld(G2,
in_bytes(JavaThread::satb_mark_queue_offset() +
SATBMarkQueue::byte_offset_of_active()),
tmp);
} else {
guarantee(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1,
"Assumption");
ldsb(G2,
in_bytes(JavaThread::satb_mark_queue_offset() +
SATBMarkQueue::byte_offset_of_active()),
tmp);
}
// Is marking active?
cmp_and_br_short(tmp, G0, Assembler::equal, Assembler::pt, filtered);
// Do we need to load the previous value?
if (obj != noreg) {
// Load the previous value...
if (index == noreg) {
if (Assembler::is_simm13(offset)) {
load_heap_oop(obj, offset, tmp);
} else {
set(offset, tmp);
load_heap_oop(obj, tmp, tmp);
}
} else {
load_heap_oop(obj, index, tmp);
}
// Previous value has been loaded into tmp
pre_val = tmp;
}
assert(pre_val != noreg, "must have a real register");
// Is the previous value null?
cmp_and_brx_short(pre_val, G0, Assembler::equal, Assembler::pt, filtered);
// OK, it's not filtered, so we'll need to call enqueue. In the normal
// case, pre_val will be a scratch G-reg, but there are some cases in
// which it's an O-reg. In the first case, do a normal call. In the
// latter, do a save here and call the frameless version.
guarantee(pre_val->is_global() || pre_val->is_out(),
"Or we need to think harder.");
if (pre_val->is_global() && !preserve_o_regs) {
call(satb_log_enqueue_with_frame);
delayed()->mov(pre_val, O0);
} else {
save_frame(0);
call(satb_log_enqueue_frameless);
delayed()->mov(pre_val->after_save(), O0);
restore();
}
bind(filtered);
}
static address dirty_card_log_enqueue = 0;
static u_char* dirty_card_log_enqueue_end = 0;
// This gets to assume that o0 contains the object address.
static void generate_dirty_card_log_enqueue(jbyte* byte_map_base) {
BufferBlob* bb = BufferBlob::create("dirty_card_enqueue", EnqueueCodeSize*2);
CodeBuffer buf(bb);
MacroAssembler masm(&buf);
#define __ masm.
address start = __ pc();
Label not_already_dirty, restart, refill, young_card;
__ srlx(O0, CardTable::card_shift, O0);
AddressLiteral addrlit(byte_map_base);
__ set(addrlit, O1); // O1 := <card table base>
__ ldub(O0, O1, O2); // O2 := [O0 + O1]
__ cmp_and_br_short(O2, G1CardTable::g1_young_card_val(), Assembler::equal, Assembler::pt, young_card);
__ membar(Assembler::Membar_mask_bits(Assembler::StoreLoad));
__ ldub(O0, O1, O2); // O2 := [O0 + O1]
assert(CardTable::dirty_card_val() == 0, "otherwise check this code");
__ cmp_and_br_short(O2, G0, Assembler::notEqual, Assembler::pt, not_already_dirty);
__ bind(young_card);
// We didn't take the branch, so we're already dirty: return.
// Use return-from-leaf
__ retl();
__ delayed()->nop();
// Not dirty.
__ bind(not_already_dirty);
// Get O0 + O1 into a reg by itself
__ add(O0, O1, O3);
// First, dirty it.
__ stb(G0, O3, G0); // [cardPtr] := 0 (i.e., dirty).
int dirty_card_q_index_byte_offset =
in_bytes(JavaThread::dirty_card_queue_offset() +
DirtyCardQueue::byte_offset_of_index());
int dirty_card_q_buf_byte_offset =
in_bytes(JavaThread::dirty_card_queue_offset() +
DirtyCardQueue::byte_offset_of_buf());
__ bind(restart);
// Load the index into the update buffer. DirtyCardQueue::_index is
// a size_t so ld_ptr is appropriate here.
__ ld_ptr(G2_thread, dirty_card_q_index_byte_offset, L0);
// index == 0?
__ cmp_and_brx_short(L0, G0, Assembler::equal, Assembler::pn, refill);
__ ld_ptr(G2_thread, dirty_card_q_buf_byte_offset, L1);
__ sub(L0, oopSize, L0);
__ st_ptr(O3, L1, L0); // [_buf + index] := I0
// Use return-from-leaf
__ retl();
__ delayed()->st_ptr(L0, G2_thread, dirty_card_q_index_byte_offset);
__ bind(refill);
address handle_zero =
CAST_FROM_FN_PTR(address,
&DirtyCardQueueSet::handle_zero_index_for_thread);
// This should be rare enough that we can afford to save all the
// scratch registers that the calling context might be using.
__ mov(G1_scratch, L3);
__ mov(G3_scratch, L5);
// We need the value of O3 above (for the write into the buffer), so we
// save and restore it.
__ mov(O3, L6);
// Since the call will overwrite O7, we save and restore that, as well.
__ mov(O7, L4);
__ call_VM_leaf(L7_thread_cache, handle_zero, G2_thread);
__ mov(L3, G1_scratch);
__ mov(L5, G3_scratch);
__ mov(L6, O3);
__ br(Assembler::always, /*annul*/false, Assembler::pt, restart);
__ delayed()->mov(L4, O7);
dirty_card_log_enqueue = start;
dirty_card_log_enqueue_end = __ pc();
// XXX Should have a guarantee here about not going off the end!
// Does it already do so? Do an experiment...
#undef __
}
void MacroAssembler::g1_write_barrier_post(Register store_addr, Register new_val, Register tmp) {
Label filtered;
MacroAssembler* post_filter_masm = this;
if (new_val == G0) return;
G1BarrierSet* bs =
barrier_set_cast<G1BarrierSet>(Universe::heap()->barrier_set());
CardTable* ct = bs->card_table();
if (G1RSBarrierRegionFilter) {
xor3(store_addr, new_val, tmp);
srlx(tmp, HeapRegion::LogOfHRGrainBytes, tmp);
// XXX Should I predict this taken or not? Does it matter?
cmp_and_brx_short(tmp, G0, Assembler::equal, Assembler::pt, filtered);
}
// If the "store_addr" register is an "in" or "local" register, move it to
// a scratch reg so we can pass it as an argument.
bool use_scr = !(store_addr->is_global() || store_addr->is_out());
// Pick a scratch register different from "tmp".
Register scr = (tmp == G1_scratch ? G3_scratch : G1_scratch);
// Make sure we use up the delay slot!
if (use_scr) {
post_filter_masm->mov(store_addr, scr);
} else {
post_filter_masm->nop();
}
save_frame(0);
call(dirty_card_log_enqueue);
if (use_scr) {
delayed()->mov(scr, O0);
} else {
delayed()->mov(store_addr->after_save(), O0);
}
restore();
bind(filtered);
}
// Called from init_globals() after universe_init() and before interpreter_init()
void g1_barrier_stubs_init() {
CollectedHeap* heap = Universe::heap();
if (heap->kind() == CollectedHeap::G1) {
// Only needed for G1
if (dirty_card_log_enqueue == 0) {
G1BarrierSet* bs =
barrier_set_cast<G1BarrierSet>(heap->barrier_set());
CardTable *ct = bs->card_table();
generate_dirty_card_log_enqueue(ct->byte_map_base());
assert(dirty_card_log_enqueue != 0, "postcondition.");
}
if (satb_log_enqueue_with_frame == 0) {
generate_satb_log_enqueue(true);
assert(satb_log_enqueue_with_frame != 0, "postcondition.");
}
if (satb_log_enqueue_frameless == 0) {
generate_satb_log_enqueue(false);
assert(satb_log_enqueue_frameless != 0, "postcondition.");
}
}
}
#endif // INCLUDE_ALL_GCS
///////////////////////////////////////////////////////////////////////////////////
void MacroAssembler::card_write_barrier_post(Register store_addr, Register new_val, Register tmp) {
// If we're writing constant NULL, we can skip the write barrier.
if (new_val == G0) return;
CardTableBarrierSet* bs =
barrier_set_cast<CardTableBarrierSet>(Universe::heap()->barrier_set());
CardTable* ct = bs->card_table();
assert(bs->kind() == BarrierSet::CardTableBarrierSet, "wrong barrier");
card_table_write(ct->byte_map_base(), tmp, store_addr);
}
// ((OopHandle)result).resolve();
void MacroAssembler::resolve_oop_handle(Register result) {
// OopHandle::resolve is an indirection.
@ -3785,65 +3435,63 @@ void MacroAssembler::store_klass_gap(Register s, Register d) {
}
}
void MacroAssembler::load_heap_oop(const Address& s, Register d) {
if (UseCompressedOops) {
lduw(s, d);
decode_heap_oop(d);
void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators,
Register src, Address dst, Register tmp) {
BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
bool as_raw = (decorators & AS_RAW) != 0;
if (as_raw) {
bs->BarrierSetAssembler::store_at(this, decorators, type, src, dst, tmp);
} else {
ld_ptr(s, d);
bs->store_at(this, decorators, type, src, dst, tmp);
}
}
void MacroAssembler::load_heap_oop(Register s1, Register s2, Register d) {
if (UseCompressedOops) {
lduw(s1, s2, d);
decode_heap_oop(d, d);
void MacroAssembler::access_load_at(BasicType type, DecoratorSet decorators,
Address src, Register dst, Register tmp) {
BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
bool as_raw = (decorators & AS_RAW) != 0;
if (as_raw) {
bs->BarrierSetAssembler::load_at(this, decorators, type, src, dst, tmp);
} else {
ld_ptr(s1, s2, d);
bs->load_at(this, decorators, type, src, dst, tmp);
}
}
void MacroAssembler::load_heap_oop(Register s1, int simm13a, Register d) {
if (UseCompressedOops) {
lduw(s1, simm13a, d);
decode_heap_oop(d, d);
void MacroAssembler::load_heap_oop(const Address& s, Register d, Register tmp, DecoratorSet decorators) {
access_load_at(T_OBJECT, IN_HEAP | decorators, s, d, tmp);
}
void MacroAssembler::load_heap_oop(Register s1, Register s2, Register d, Register tmp, DecoratorSet decorators) {
access_load_at(T_OBJECT, IN_HEAP | decorators, Address(s1, s2), d, tmp);
}
void MacroAssembler::load_heap_oop(Register s1, int simm13a, Register d, Register tmp, DecoratorSet decorators) {
access_load_at(T_OBJECT, IN_HEAP | decorators, Address(s1, simm13a), d, tmp);
}
void MacroAssembler::load_heap_oop(Register s1, RegisterOrConstant s2, Register d, Register tmp, DecoratorSet decorators) {
if (s2.is_constant()) {
access_load_at(T_OBJECT, IN_HEAP | decorators, Address(s1, s2.as_constant()), d, tmp);
} else {
ld_ptr(s1, simm13a, d);
access_load_at(T_OBJECT, IN_HEAP | decorators, Address(s1, s2.as_register()), d, tmp);
}
}
void MacroAssembler::load_heap_oop(Register s1, RegisterOrConstant s2, Register d) {
if (s2.is_constant()) load_heap_oop(s1, s2.as_constant(), d);
else load_heap_oop(s1, s2.as_register(), d);
void MacroAssembler::store_heap_oop(Register d, Register s1, Register s2, Register tmp, DecoratorSet decorators) {
access_store_at(T_OBJECT, IN_HEAP | decorators, d, Address(s1, s2), tmp);
}
void MacroAssembler::store_heap_oop(Register d, Register s1, Register s2) {
if (UseCompressedOops) {
assert(s1 != d && s2 != d, "not enough registers");
encode_heap_oop(d);
st(d, s1, s2);
} else {
st_ptr(d, s1, s2);
}
void MacroAssembler::store_heap_oop(Register d, Register s1, int simm13a, Register tmp, DecoratorSet decorators) {
access_store_at(T_OBJECT, IN_HEAP | decorators, d, Address(s1, simm13a), tmp);
}
void MacroAssembler::store_heap_oop(Register d, Register s1, int simm13a) {
if (UseCompressedOops) {
assert(s1 != d, "not enough registers");
encode_heap_oop(d);
st(d, s1, simm13a);
void MacroAssembler::store_heap_oop(Register d, const Address& a, int offset, Register tmp, DecoratorSet decorators) {
if (a.has_index()) {
assert(!a.has_disp(), "not supported yet");
assert(offset == 0, "not supported yet");
access_store_at(T_OBJECT, IN_HEAP | decorators, d, Address(a.base(), a.index()), tmp);
} else {
st_ptr(d, s1, simm13a);
}
}
void MacroAssembler::store_heap_oop(Register d, const Address& a, int offset) {
if (UseCompressedOops) {
assert(a.base() != d, "not enough registers");
encode_heap_oop(d);
st(d, a, offset);
} else {
st_ptr(d, a, offset);
access_store_at(T_OBJECT, IN_HEAP | decorators, d, Address(a.base(), a.disp() + offset), tmp);
}
}