6537506: Provide a mechanism for specifying Java-level USDT-like dtrace probes

Initial checkin of JSDT code

Reviewed-by: acorn, sbohne
This commit is contained in:
Keith McGuigan 2008-04-17 22:18:15 -04:00
parent 849e0dfc44
commit f072bc9d3f
26 changed files with 2935 additions and 26 deletions

View file

@ -1637,7 +1637,7 @@ static void long_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
}
} else if (dst.is_single_phys_reg()) {
if (src.is_adjacent_aligned_on_stack(2)) {
__ ldd(FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_Register());
__ ld_long(FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_Register());
} else {
// dst is a single reg.
// Remember lo is low address not msb for stack slots
@ -2501,6 +2501,551 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
}
#ifdef HAVE_DTRACE_H
// ---------------------------------------------------------------------------
// Generate a dtrace nmethod for a given signature. The method takes arguments
// in the Java compiled code convention, marshals them to the native
// abi and then leaves nops at the position you would expect to call a native
// function. When the probe is enabled the nops are replaced with a trap
// instruction that dtrace inserts and the trace will cause a notification
// to dtrace.
//
// The probes are only able to take primitive types and java/lang/String as
// arguments. No other java types are allowed. Strings are converted to utf8
// strings so that from dtrace point of view java strings are converted to C
// strings. There is an arbitrary fixed limit on the total space that a method
// can use for converting the strings. (256 chars per string in the signature).
// So any java string larger then this is truncated.
static int fp_offset[ConcreteRegisterImpl::number_of_registers] = { 0 };
static bool offsets_initialized = false;
static VMRegPair reg64_to_VMRegPair(Register r) {
VMRegPair ret;
if (wordSize == 8) {
ret.set2(r->as_VMReg());
} else {
ret.set_pair(r->successor()->as_VMReg(), r->as_VMReg());
}
return ret;
}
nmethod *SharedRuntime::generate_dtrace_nmethod(
MacroAssembler *masm, methodHandle method) {
// generate_dtrace_nmethod is guarded by a mutex so we are sure to
// be single threaded in this method.
assert(AdapterHandlerLibrary_lock->owned_by_self(), "must be");
// Fill in the signature array, for the calling-convention call.
int total_args_passed = method->size_of_parameters();
BasicType* in_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_args_passed);
VMRegPair *in_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_args_passed);
// The signature we are going to use for the trap that dtrace will see
// java/lang/String is converted. We drop "this" and any other object
// is converted to NULL. (A one-slot java/lang/Long object reference
// is converted to a two-slot long, which is why we double the allocation).
BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_args_passed * 2);
VMRegPair* out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_args_passed * 2);
int i=0;
int total_strings = 0;
int first_arg_to_pass = 0;
int total_c_args = 0;
int box_offset = java_lang_boxing_object::value_offset_in_bytes();
// Skip the receiver as dtrace doesn't want to see it
if( !method->is_static() ) {
in_sig_bt[i++] = T_OBJECT;
first_arg_to_pass = 1;
}
SignatureStream ss(method->signature());
for ( ; !ss.at_return_type(); ss.next()) {
BasicType bt = ss.type();
in_sig_bt[i++] = bt; // Collect remaining bits of signature
out_sig_bt[total_c_args++] = bt;
if( bt == T_OBJECT) {
symbolOop s = ss.as_symbol_or_null();
if (s == vmSymbols::java_lang_String()) {
total_strings++;
out_sig_bt[total_c_args-1] = T_ADDRESS;
} else if (s == vmSymbols::java_lang_Boolean() ||
s == vmSymbols::java_lang_Byte()) {
out_sig_bt[total_c_args-1] = T_BYTE;
} else if (s == vmSymbols::java_lang_Character() ||
s == vmSymbols::java_lang_Short()) {
out_sig_bt[total_c_args-1] = T_SHORT;
} else if (s == vmSymbols::java_lang_Integer() ||
s == vmSymbols::java_lang_Float()) {
out_sig_bt[total_c_args-1] = T_INT;
} else if (s == vmSymbols::java_lang_Long() ||
s == vmSymbols::java_lang_Double()) {
out_sig_bt[total_c_args-1] = T_LONG;
out_sig_bt[total_c_args++] = T_VOID;
}
} else if ( bt == T_LONG || bt == T_DOUBLE ) {
in_sig_bt[i++] = T_VOID; // Longs & doubles take 2 Java slots
// We convert double to long
out_sig_bt[total_c_args-1] = T_LONG;
out_sig_bt[total_c_args++] = T_VOID;
} else if ( bt == T_FLOAT) {
// We convert float to int
out_sig_bt[total_c_args-1] = T_INT;
}
}
assert(i==total_args_passed, "validly parsed signature");
// Now get the compiled-Java layout as input arguments
int comp_args_on_stack;
comp_args_on_stack = SharedRuntime::java_calling_convention(
in_sig_bt, in_regs, total_args_passed, false);
// We have received a description of where all the java arg are located
// on entry to the wrapper. We need to convert these args to where
// the a native (non-jni) function would expect them. To figure out
// where they go we convert the java signature to a C signature and remove
// T_VOID for any long/double we might have received.
// Now figure out where the args must be stored and how much stack space
// they require (neglecting out_preserve_stack_slots but space for storing
// the 1st six register arguments). It's weird see int_stk_helper.
//
int out_arg_slots;
out_arg_slots = c_calling_convention(out_sig_bt, out_regs, total_c_args);
// Calculate the total number of stack slots we will need.
// First count the abi requirement plus all of the outgoing args
int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots;
// Plus a temp for possible converion of float/double/long register args
int conversion_temp = stack_slots;
stack_slots += 2;
// Now space for the string(s) we must convert
int string_locs = stack_slots;
stack_slots += total_strings *
(max_dtrace_string_size / VMRegImpl::stack_slot_size);
// Ok The space we have allocated will look like:
//
//
// FP-> | |
// |---------------------|
// | string[n] |
// |---------------------| <- string_locs[n]
// | string[n-1] |
// |---------------------| <- string_locs[n-1]
// | ... |
// | ... |
// |---------------------| <- string_locs[1]
// | string[0] |
// |---------------------| <- string_locs[0]
// | temp |
// |---------------------| <- conversion_temp
// | outbound memory |
// | based arguments |
// | |
// |---------------------|
// | |
// SP-> | out_preserved_slots |
//
//
// Now compute actual number of stack words we need rounding to make
// stack properly aligned.
stack_slots = round_to(stack_slots, 4 * VMRegImpl::slots_per_word);
int stack_size = stack_slots * VMRegImpl::stack_slot_size;
intptr_t start = (intptr_t)__ pc();
// First thing make an ic check to see if we should even be here
{
Label L;
const Register temp_reg = G3_scratch;
Address ic_miss(temp_reg, SharedRuntime::get_ic_miss_stub());
__ verify_oop(O0);
__ ld_ptr(O0, oopDesc::klass_offset_in_bytes(), temp_reg);
__ cmp(temp_reg, G5_inline_cache_reg);
__ brx(Assembler::equal, true, Assembler::pt, L);
__ delayed()->nop();
__ jump_to(ic_miss, 0);
__ delayed()->nop();
__ align(CodeEntryAlignment);
__ bind(L);
}
int vep_offset = ((intptr_t)__ pc()) - start;
// The instruction at the verified entry point must be 5 bytes or longer
// because it can be patched on the fly by make_non_entrant. The stack bang
// instruction fits that requirement.
// Generate stack overflow check before creating frame
__ generate_stack_overflow_check(stack_size);
assert(((intptr_t)__ pc() - start - vep_offset) >= 5,
"valid size for make_non_entrant");
// Generate a new frame for the wrapper.
__ save(SP, -stack_size, SP);
// Frame is now completed as far a size and linkage.
int frame_complete = ((intptr_t)__ pc()) - start;
#ifdef ASSERT
bool reg_destroyed[RegisterImpl::number_of_registers];
bool freg_destroyed[FloatRegisterImpl::number_of_registers];
for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) {
reg_destroyed[r] = false;
}
for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) {
freg_destroyed[f] = false;
}
#endif /* ASSERT */
VMRegPair zero;
zero.set2(G0->as_VMReg());
int c_arg, j_arg;
Register conversion_off = noreg;
for (j_arg = first_arg_to_pass, c_arg = 0 ;
j_arg < total_args_passed ; j_arg++, c_arg++ ) {
VMRegPair src = in_regs[j_arg];
VMRegPair dst = out_regs[c_arg];
#ifdef ASSERT
if (src.first()->is_Register()) {
assert(!reg_destroyed[src.first()->as_Register()->encoding()], "ack!");
} else if (src.first()->is_FloatRegister()) {
assert(!freg_destroyed[src.first()->as_FloatRegister()->encoding(
FloatRegisterImpl::S)], "ack!");
}
if (dst.first()->is_Register()) {
reg_destroyed[dst.first()->as_Register()->encoding()] = true;
} else if (dst.first()->is_FloatRegister()) {
freg_destroyed[dst.first()->as_FloatRegister()->encoding(
FloatRegisterImpl::S)] = true;
}
#endif /* ASSERT */
switch (in_sig_bt[j_arg]) {
case T_ARRAY:
case T_OBJECT:
{
if (out_sig_bt[c_arg] == T_BYTE || out_sig_bt[c_arg] == T_SHORT ||
out_sig_bt[c_arg] == T_INT || out_sig_bt[c_arg] == T_LONG) {
// need to unbox a one-slot value
Register in_reg = L0;
Register tmp = L2;
if ( src.first()->is_reg() ) {
in_reg = src.first()->as_Register();
} else {
assert(Assembler::is_simm13(reg2offset(src.first()) + STACK_BIAS),
"must be");
__ ld_ptr(FP, reg2offset(src.first()) + STACK_BIAS, in_reg);
}
// If the final destination is an acceptable register
if ( dst.first()->is_reg() ) {
if ( dst.is_single_phys_reg() || out_sig_bt[c_arg] != T_LONG ) {
tmp = dst.first()->as_Register();
}
}
Label skipUnbox;
if ( wordSize == 4 && out_sig_bt[c_arg] == T_LONG ) {
__ mov(G0, tmp->successor());
}
__ br_null(in_reg, true, Assembler::pn, skipUnbox);
__ delayed()->mov(G0, tmp);
switch (out_sig_bt[c_arg]) {
case T_BYTE:
__ ldub(in_reg, box_offset, tmp); break;
case T_SHORT:
__ lduh(in_reg, box_offset, tmp); break;
case T_INT:
__ ld(in_reg, box_offset, tmp); break;
case T_LONG:
__ ld_long(in_reg, box_offset, tmp); break;
default: ShouldNotReachHere();
}
__ bind(skipUnbox);
// If tmp wasn't final destination copy to final destination
if (tmp == L2) {
VMRegPair tmp_as_VM = reg64_to_VMRegPair(L2);
if (out_sig_bt[c_arg] == T_LONG) {
long_move(masm, tmp_as_VM, dst);
} else {
move32_64(masm, tmp_as_VM, out_regs[c_arg]);
}
}
if (out_sig_bt[c_arg] == T_LONG) {
assert(out_sig_bt[c_arg+1] == T_VOID, "must be");
++c_arg; // move over the T_VOID to keep the loop indices in sync
}
} else if (out_sig_bt[c_arg] == T_ADDRESS) {
Register s =
src.first()->is_reg() ? src.first()->as_Register() : L2;
Register d =
dst.first()->is_reg() ? dst.first()->as_Register() : L2;
// We store the oop now so that the conversion pass can reach
// while in the inner frame. This will be the only store if
// the oop is NULL.
if (s != L2) {
// src is register
if (d != L2) {
// dst is register
__ mov(s, d);
} else {
assert(Assembler::is_simm13(reg2offset(dst.first()) +
STACK_BIAS), "must be");
__ st_ptr(s, SP, reg2offset(dst.first()) + STACK_BIAS);
}
} else {
// src not a register
assert(Assembler::is_simm13(reg2offset(src.first()) +
STACK_BIAS), "must be");
__ ld_ptr(FP, reg2offset(src.first()) + STACK_BIAS, d);
if (d == L2) {
assert(Assembler::is_simm13(reg2offset(dst.first()) +
STACK_BIAS), "must be");
__ st_ptr(d, SP, reg2offset(dst.first()) + STACK_BIAS);
}
}
} else if (out_sig_bt[c_arg] != T_VOID) {
// Convert the arg to NULL
if (dst.first()->is_reg()) {
__ mov(G0, dst.first()->as_Register());
} else {
assert(Assembler::is_simm13(reg2offset(dst.first()) +
STACK_BIAS), "must be");
__ st_ptr(G0, SP, reg2offset(dst.first()) + STACK_BIAS);
}
}
}
break;
case T_VOID:
break;
case T_FLOAT:
if (src.first()->is_stack()) {
// Stack to stack/reg is simple
move32_64(masm, src, dst);
} else {
if (dst.first()->is_reg()) {
// freg -> reg
int off =
STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size;
Register d = dst.first()->as_Register();
if (Assembler::is_simm13(off)) {
__ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(),
SP, off);
__ ld(SP, off, d);
} else {
if (conversion_off == noreg) {
__ set(off, L6);
conversion_off = L6;
}
__ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(),
SP, conversion_off);
__ ld(SP, conversion_off , d);
}
} else {
// freg -> mem
int off = STACK_BIAS + reg2offset(dst.first());
if (Assembler::is_simm13(off)) {
__ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(),
SP, off);
} else {
if (conversion_off == noreg) {
__ set(off, L6);
conversion_off = L6;
}
__ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(),
SP, conversion_off);
}
}
}
break;
case T_DOUBLE:
assert( j_arg + 1 < total_args_passed &&
in_sig_bt[j_arg + 1] == T_VOID &&
out_sig_bt[c_arg+1] == T_VOID, "bad arg list");
if (src.first()->is_stack()) {
// Stack to stack/reg is simple
long_move(masm, src, dst);
} else {
Register d = dst.first()->is_reg() ? dst.first()->as_Register() : L2;
// Destination could be an odd reg on 32bit in which case
// we can't load direct to the destination.
if (!d->is_even() && wordSize == 4) {
d = L2;
}
int off = STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size;
if (Assembler::is_simm13(off)) {
__ stf(FloatRegisterImpl::D, src.first()->as_FloatRegister(),
SP, off);
__ ld_long(SP, off, d);
} else {
if (conversion_off == noreg) {
__ set(off, L6);
conversion_off = L6;
}
__ stf(FloatRegisterImpl::D, src.first()->as_FloatRegister(),
SP, conversion_off);
__ ld_long(SP, conversion_off, d);
}
if (d == L2) {
long_move(masm, reg64_to_VMRegPair(L2), dst);
}
}
break;
case T_LONG :
// 32bit can't do a split move of something like g1 -> O0, O1
// so use a memory temp
if (src.is_single_phys_reg() && wordSize == 4) {
Register tmp = L2;
if (dst.first()->is_reg() &&
(wordSize == 8 || dst.first()->as_Register()->is_even())) {
tmp = dst.first()->as_Register();
}
int off = STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size;
if (Assembler::is_simm13(off)) {
__ stx(src.first()->as_Register(), SP, off);
__ ld_long(SP, off, tmp);
} else {
if (conversion_off == noreg) {
__ set(off, L6);
conversion_off = L6;
}
__ stx(src.first()->as_Register(), SP, conversion_off);
__ ld_long(SP, conversion_off, tmp);
}
if (tmp == L2) {
long_move(masm, reg64_to_VMRegPair(L2), dst);
}
} else {
long_move(masm, src, dst);
}
break;
case T_ADDRESS: assert(false, "found T_ADDRESS in java args");
default:
move32_64(masm, src, dst);
}
}
// If we have any strings we must store any register based arg to the stack
// This includes any still live xmm registers too.
if (total_strings > 0 ) {
// protect all the arg registers
__ save_frame(0);
__ mov(G2_thread, L7_thread_cache);
const Register L2_string_off = L2;
// Get first string offset
__ set(string_locs * VMRegImpl::stack_slot_size, L2_string_off);
for (c_arg = 0 ; c_arg < total_c_args ; c_arg++ ) {
if (out_sig_bt[c_arg] == T_ADDRESS) {
VMRegPair dst = out_regs[c_arg];
const Register d = dst.first()->is_reg() ?
dst.first()->as_Register()->after_save() : noreg;
// It's a string the oop and it was already copied to the out arg
// position
if (d != noreg) {
__ mov(d, O0);
} else {
assert(Assembler::is_simm13(reg2offset(dst.first()) + STACK_BIAS),
"must be");
__ ld_ptr(FP, reg2offset(dst.first()) + STACK_BIAS, O0);
}
Label skip;
__ br_null(O0, false, Assembler::pn, skip);
__ delayed()->add(FP, L2_string_off, O1);
if (d != noreg) {
__ mov(O1, d);
} else {
assert(Assembler::is_simm13(reg2offset(dst.first()) + STACK_BIAS),
"must be");
__ st_ptr(O1, FP, reg2offset(dst.first()) + STACK_BIAS);
}
__ call(CAST_FROM_FN_PTR(address, SharedRuntime::get_utf),
relocInfo::runtime_call_type);
__ delayed()->add(L2_string_off, max_dtrace_string_size, L2_string_off);
__ bind(skip);
}
}
__ mov(L7_thread_cache, G2_thread);
__ restore();
}
// Ok now we are done. Need to place the nop that dtrace wants in order to
// patch in the trap
int patch_offset = ((intptr_t)__ pc()) - start;
__ nop();
// Return
__ ret();
__ delayed()->restore();
__ flush();
nmethod *nm = nmethod::new_dtrace_nmethod(
method, masm->code(), vep_offset, patch_offset, frame_complete,
stack_slots / VMRegImpl::slots_per_word);
return nm;
}
#endif // HAVE_DTRACE_H
// this function returns the adjust size (in number of words) to a c2i adapter
// activation for use during deoptimization
int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) {