mirror of
https://github.com/openjdk/jdk.git
synced 2025-09-17 17:44:40 +02:00
Merge
This commit is contained in:
commit
9aea544943
36 changed files with 1627 additions and 207 deletions
|
@ -88,6 +88,7 @@ class Assembler : public AbstractAssembler {
|
|||
orncc_op3 = 0x16,
|
||||
xnorcc_op3 = 0x17,
|
||||
addccc_op3 = 0x18,
|
||||
aes4_op3 = 0x19,
|
||||
umulcc_op3 = 0x1a,
|
||||
smulcc_op3 = 0x1b,
|
||||
subccc_op3 = 0x1c,
|
||||
|
@ -121,6 +122,8 @@ class Assembler : public AbstractAssembler {
|
|||
fpop1_op3 = 0x34,
|
||||
fpop2_op3 = 0x35,
|
||||
impdep1_op3 = 0x36,
|
||||
aes3_op3 = 0x36,
|
||||
flog3_op3 = 0x36,
|
||||
impdep2_op3 = 0x37,
|
||||
jmpl_op3 = 0x38,
|
||||
rett_op3 = 0x39,
|
||||
|
@ -172,41 +175,56 @@ class Assembler : public AbstractAssembler {
|
|||
|
||||
enum opfs {
|
||||
// selected opfs
|
||||
fmovs_opf = 0x01,
|
||||
fmovd_opf = 0x02,
|
||||
fmovs_opf = 0x01,
|
||||
fmovd_opf = 0x02,
|
||||
|
||||
fnegs_opf = 0x05,
|
||||
fnegd_opf = 0x06,
|
||||
fnegs_opf = 0x05,
|
||||
fnegd_opf = 0x06,
|
||||
|
||||
fadds_opf = 0x41,
|
||||
faddd_opf = 0x42,
|
||||
fsubs_opf = 0x45,
|
||||
fsubd_opf = 0x46,
|
||||
fadds_opf = 0x41,
|
||||
faddd_opf = 0x42,
|
||||
fsubs_opf = 0x45,
|
||||
fsubd_opf = 0x46,
|
||||
|
||||
fmuls_opf = 0x49,
|
||||
fmuld_opf = 0x4a,
|
||||
fdivs_opf = 0x4d,
|
||||
fdivd_opf = 0x4e,
|
||||
fmuls_opf = 0x49,
|
||||
fmuld_opf = 0x4a,
|
||||
fdivs_opf = 0x4d,
|
||||
fdivd_opf = 0x4e,
|
||||
|
||||
fcmps_opf = 0x51,
|
||||
fcmpd_opf = 0x52,
|
||||
fcmps_opf = 0x51,
|
||||
fcmpd_opf = 0x52,
|
||||
|
||||
fstox_opf = 0x81,
|
||||
fdtox_opf = 0x82,
|
||||
fxtos_opf = 0x84,
|
||||
fxtod_opf = 0x88,
|
||||
fitos_opf = 0xc4,
|
||||
fdtos_opf = 0xc6,
|
||||
fitod_opf = 0xc8,
|
||||
fstod_opf = 0xc9,
|
||||
fstoi_opf = 0xd1,
|
||||
fdtoi_opf = 0xd2,
|
||||
fstox_opf = 0x81,
|
||||
fdtox_opf = 0x82,
|
||||
fxtos_opf = 0x84,
|
||||
fxtod_opf = 0x88,
|
||||
fitos_opf = 0xc4,
|
||||
fdtos_opf = 0xc6,
|
||||
fitod_opf = 0xc8,
|
||||
fstod_opf = 0xc9,
|
||||
fstoi_opf = 0xd1,
|
||||
fdtoi_opf = 0xd2,
|
||||
|
||||
mdtox_opf = 0x110,
|
||||
mstouw_opf = 0x111,
|
||||
mstosw_opf = 0x113,
|
||||
mxtod_opf = 0x118,
|
||||
mwtos_opf = 0x119
|
||||
mdtox_opf = 0x110,
|
||||
mstouw_opf = 0x111,
|
||||
mstosw_opf = 0x113,
|
||||
mxtod_opf = 0x118,
|
||||
mwtos_opf = 0x119,
|
||||
|
||||
aes_kexpand0_opf = 0x130,
|
||||
aes_kexpand2_opf = 0x131
|
||||
};
|
||||
|
||||
enum op5s {
|
||||
aes_eround01_op5 = 0x00,
|
||||
aes_eround23_op5 = 0x01,
|
||||
aes_dround01_op5 = 0x02,
|
||||
aes_dround23_op5 = 0x03,
|
||||
aes_eround01_l_op5 = 0x04,
|
||||
aes_eround23_l_op5 = 0x05,
|
||||
aes_dround01_l_op5 = 0x06,
|
||||
aes_dround23_l_op5 = 0x07,
|
||||
aes_kexpand1_op5 = 0x08
|
||||
};
|
||||
|
||||
enum RCondition { rc_z = 1, rc_lez = 2, rc_lz = 3, rc_nz = 5, rc_gz = 6, rc_gez = 7, rc_last = rc_gez };
|
||||
|
@ -427,6 +445,7 @@ class Assembler : public AbstractAssembler {
|
|||
static int immed( bool i) { return u_field(i ? 1 : 0, 13, 13); }
|
||||
static int opf_low6( int w) { return u_field(w, 10, 5); }
|
||||
static int opf_low5( int w) { return u_field(w, 9, 5); }
|
||||
static int op5( int x) { return u_field(x, 8, 5); }
|
||||
static int trapcc( CC cc) { return u_field(cc, 12, 11); }
|
||||
static int sx( int i) { return u_field(i, 12, 12); } // shift x=1 means 64-bit
|
||||
static int opf( int x) { return u_field(x, 13, 5); }
|
||||
|
@ -451,6 +470,7 @@ class Assembler : public AbstractAssembler {
|
|||
static int fd( FloatRegister r, FloatRegisterImpl::Width fwa) { return u_field(r->encoding(fwa), 29, 25); };
|
||||
static int fs1(FloatRegister r, FloatRegisterImpl::Width fwa) { return u_field(r->encoding(fwa), 18, 14); };
|
||||
static int fs2(FloatRegister r, FloatRegisterImpl::Width fwa) { return u_field(r->encoding(fwa), 4, 0); };
|
||||
static int fs3(FloatRegister r, FloatRegisterImpl::Width fwa) { return u_field(r->encoding(fwa), 13, 9); };
|
||||
|
||||
// some float instructions use this encoding on the op3 field
|
||||
static int alt_op3(int op, FloatRegisterImpl::Width w) {
|
||||
|
@ -559,6 +579,12 @@ class Assembler : public AbstractAssembler {
|
|||
return x & ((1 << 10) - 1);
|
||||
}
|
||||
|
||||
// AES crypto instructions supported only on certain processors
|
||||
static void aes_only() { assert( VM_Version::has_aes(), "This instruction only works on SPARC with AES instructions support"); }
|
||||
|
||||
// instruction only in VIS1
|
||||
static void vis1_only() { assert( VM_Version::has_vis1(), "This instruction only works on SPARC with VIS1"); }
|
||||
|
||||
// instruction only in VIS3
|
||||
static void vis3_only() { assert( VM_Version::has_vis3(), "This instruction only works on SPARC with VIS3"); }
|
||||
|
||||
|
@ -682,6 +708,24 @@ public:
|
|||
void addccc( Register s1, int simm13a, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(addc_op3 | cc_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
|
||||
|
||||
|
||||
// 4-operand AES instructions
|
||||
|
||||
void aes_eround01( FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d ) { aes_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(aes4_op3) | fs1(s1, FloatRegisterImpl::D) | fs3(s3, FloatRegisterImpl::D) | op5(aes_eround01_op5) | fs2(s2, FloatRegisterImpl::D) ); }
|
||||
void aes_eround23( FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d ) { aes_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(aes4_op3) | fs1(s1, FloatRegisterImpl::D) | fs3(s3, FloatRegisterImpl::D) | op5(aes_eround23_op5) | fs2(s2, FloatRegisterImpl::D) ); }
|
||||
void aes_dround01( FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d ) { aes_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(aes4_op3) | fs1(s1, FloatRegisterImpl::D) | fs3(s3, FloatRegisterImpl::D) | op5(aes_dround01_op5) | fs2(s2, FloatRegisterImpl::D) ); }
|
||||
void aes_dround23( FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d ) { aes_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(aes4_op3) | fs1(s1, FloatRegisterImpl::D) | fs3(s3, FloatRegisterImpl::D) | op5(aes_dround23_op5) | fs2(s2, FloatRegisterImpl::D) ); }
|
||||
void aes_eround01_l( FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d ) { aes_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(aes4_op3) | fs1(s1, FloatRegisterImpl::D) | fs3(s3, FloatRegisterImpl::D) | op5(aes_eround01_l_op5) | fs2(s2, FloatRegisterImpl::D) ); }
|
||||
void aes_eround23_l( FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d ) { aes_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(aes4_op3) | fs1(s1, FloatRegisterImpl::D) | fs3(s3, FloatRegisterImpl::D) | op5(aes_eround23_l_op5) | fs2(s2, FloatRegisterImpl::D) ); }
|
||||
void aes_dround01_l( FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d ) { aes_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(aes4_op3) | fs1(s1, FloatRegisterImpl::D) | fs3(s3, FloatRegisterImpl::D) | op5(aes_dround01_l_op5) | fs2(s2, FloatRegisterImpl::D) ); }
|
||||
void aes_dround23_l( FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d ) { aes_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(aes4_op3) | fs1(s1, FloatRegisterImpl::D) | fs3(s3, FloatRegisterImpl::D) | op5(aes_dround23_l_op5) | fs2(s2, FloatRegisterImpl::D) ); }
|
||||
void aes_kexpand1( FloatRegister s1, FloatRegister s2, int imm5a, FloatRegister d ) { aes_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(aes4_op3) | fs1(s1, FloatRegisterImpl::D) | u_field(imm5a, 13, 9) | op5(aes_kexpand1_op5) | fs2(s2, FloatRegisterImpl::D) ); }
|
||||
|
||||
|
||||
// 3-operand AES instructions
|
||||
|
||||
void aes_kexpand0( FloatRegister s1, FloatRegister s2, FloatRegister d ) { aes_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(aes3_op3) | fs1(s1, FloatRegisterImpl::D) | opf(aes_kexpand0_opf) | fs2(s2, FloatRegisterImpl::D) ); }
|
||||
void aes_kexpand2( FloatRegister s1, FloatRegister s2, FloatRegister d ) { aes_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(aes3_op3) | fs1(s1, FloatRegisterImpl::D) | opf(aes_kexpand2_opf) | fs2(s2, FloatRegisterImpl::D) ); }
|
||||
|
||||
// pp 136
|
||||
|
||||
inline void bpr(RCondition c, bool a, Predict p, Register s1, address d, relocInfo::relocType rt = relocInfo::none);
|
||||
|
@ -784,6 +828,10 @@ public:
|
|||
void fmul( FloatRegisterImpl::Width sw, FloatRegisterImpl::Width dw, FloatRegister s1, FloatRegister s2, FloatRegister d ) { emit_int32( op(arith_op) | fd(d, dw) | op3(fpop1_op3) | fs1(s1, sw) | opf(0x60 + sw + dw*4) | fs2(s2, sw)); }
|
||||
void fdiv( FloatRegisterImpl::Width w, FloatRegister s1, FloatRegister s2, FloatRegister d ) { emit_int32( op(arith_op) | fd(d, w) | op3(fpop1_op3) | fs1(s1, w) | opf(0x4c + w) | fs2(s2, w)); }
|
||||
|
||||
// FXORs/FXORd instructions
|
||||
|
||||
void fxor( FloatRegisterImpl::Width w, FloatRegister s1, FloatRegister s2, FloatRegister d ) { vis1_only(); emit_int32( op(arith_op) | fd(d, w) | op3(flog3_op3) | fs1(s1, w) | opf(0x6E - w) | fs2(s2, w)); }
|
||||
|
||||
// pp 164
|
||||
|
||||
void fsqrt( FloatRegisterImpl::Width w, FloatRegister s, FloatRegister d ) { emit_int32( op(arith_op) | fd(d, w) | op3(fpop1_op3) | opf(0x28 + w) | fs2(s, w)); }
|
||||
|
|
|
@ -1315,7 +1315,7 @@ void LIR_Assembler::const2reg(LIR_Opr src, LIR_Opr dest, LIR_PatchCode patch_cod
|
|||
}
|
||||
|
||||
Address LIR_Assembler::as_Address(LIR_Address* addr) {
|
||||
Register reg = addr->base()->as_register();
|
||||
Register reg = addr->base()->as_pointer_register();
|
||||
LIR_Opr index = addr->index();
|
||||
if (index->is_illegal()) {
|
||||
return Address(reg, addr->disp());
|
||||
|
@ -3101,7 +3101,145 @@ void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) {
|
|||
}
|
||||
|
||||
void LIR_Assembler::emit_profile_type(LIR_OpProfileType* op) {
|
||||
fatal("Type profiling not implemented on this platform");
|
||||
Register obj = op->obj()->as_register();
|
||||
Register tmp1 = op->tmp()->as_pointer_register();
|
||||
Register tmp2 = G1;
|
||||
Address mdo_addr = as_Address(op->mdp()->as_address_ptr());
|
||||
ciKlass* exact_klass = op->exact_klass();
|
||||
intptr_t current_klass = op->current_klass();
|
||||
bool not_null = op->not_null();
|
||||
bool no_conflict = op->no_conflict();
|
||||
|
||||
Label update, next, none;
|
||||
|
||||
bool do_null = !not_null;
|
||||
bool exact_klass_set = exact_klass != NULL && ciTypeEntries::valid_ciklass(current_klass) == exact_klass;
|
||||
bool do_update = !TypeEntries::is_type_unknown(current_klass) && !exact_klass_set;
|
||||
|
||||
assert(do_null || do_update, "why are we here?");
|
||||
assert(!TypeEntries::was_null_seen(current_klass) || do_update, "why are we here?");
|
||||
|
||||
__ verify_oop(obj);
|
||||
|
||||
if (tmp1 != obj) {
|
||||
__ mov(obj, tmp1);
|
||||
}
|
||||
if (do_null) {
|
||||
__ br_notnull_short(tmp1, Assembler::pt, update);
|
||||
if (!TypeEntries::was_null_seen(current_klass)) {
|
||||
__ ld_ptr(mdo_addr, tmp1);
|
||||
__ or3(tmp1, TypeEntries::null_seen, tmp1);
|
||||
__ st_ptr(tmp1, mdo_addr);
|
||||
}
|
||||
if (do_update) {
|
||||
__ ba(next);
|
||||
__ delayed()->nop();
|
||||
}
|
||||
#ifdef ASSERT
|
||||
} else {
|
||||
__ br_notnull_short(tmp1, Assembler::pt, update);
|
||||
__ stop("unexpect null obj");
|
||||
#endif
|
||||
}
|
||||
|
||||
__ bind(update);
|
||||
|
||||
if (do_update) {
|
||||
#ifdef ASSERT
|
||||
if (exact_klass != NULL) {
|
||||
Label ok;
|
||||
__ load_klass(tmp1, tmp1);
|
||||
metadata2reg(exact_klass->constant_encoding(), tmp2);
|
||||
__ cmp_and_br_short(tmp1, tmp2, Assembler::equal, Assembler::pt, ok);
|
||||
__ stop("exact klass and actual klass differ");
|
||||
__ bind(ok);
|
||||
}
|
||||
#endif
|
||||
|
||||
Label do_update;
|
||||
__ ld_ptr(mdo_addr, tmp2);
|
||||
|
||||
if (!no_conflict) {
|
||||
if (exact_klass == NULL || TypeEntries::is_type_none(current_klass)) {
|
||||
if (exact_klass != NULL) {
|
||||
metadata2reg(exact_klass->constant_encoding(), tmp1);
|
||||
} else {
|
||||
__ load_klass(tmp1, tmp1);
|
||||
}
|
||||
|
||||
__ xor3(tmp1, tmp2, tmp1);
|
||||
__ btst(TypeEntries::type_klass_mask, tmp1);
|
||||
// klass seen before, nothing to do. The unknown bit may have been
|
||||
// set already but no need to check.
|
||||
__ brx(Assembler::zero, false, Assembler::pt, next);
|
||||
__ delayed()->
|
||||
|
||||
btst(TypeEntries::type_unknown, tmp1);
|
||||
// already unknown. Nothing to do anymore.
|
||||
__ brx(Assembler::notZero, false, Assembler::pt, next);
|
||||
|
||||
if (TypeEntries::is_type_none(current_klass)) {
|
||||
__ delayed()->btst(TypeEntries::type_mask, tmp2);
|
||||
__ brx(Assembler::zero, true, Assembler::pt, do_update);
|
||||
// first time here. Set profile type.
|
||||
__ delayed()->or3(tmp2, tmp1, tmp2);
|
||||
} else {
|
||||
__ delayed()->nop();
|
||||
}
|
||||
} else {
|
||||
assert(ciTypeEntries::valid_ciklass(current_klass) != NULL &&
|
||||
ciTypeEntries::valid_ciklass(current_klass) != exact_klass, "conflict only");
|
||||
|
||||
__ btst(TypeEntries::type_unknown, tmp2);
|
||||
// already unknown. Nothing to do anymore.
|
||||
__ brx(Assembler::notZero, false, Assembler::pt, next);
|
||||
__ delayed()->nop();
|
||||
}
|
||||
|
||||
// different than before. Cannot keep accurate profile.
|
||||
__ or3(tmp2, TypeEntries::type_unknown, tmp2);
|
||||
} else {
|
||||
// There's a single possible klass at this profile point
|
||||
assert(exact_klass != NULL, "should be");
|
||||
if (TypeEntries::is_type_none(current_klass)) {
|
||||
metadata2reg(exact_klass->constant_encoding(), tmp1);
|
||||
__ xor3(tmp1, tmp2, tmp1);
|
||||
__ btst(TypeEntries::type_klass_mask, tmp1);
|
||||
__ brx(Assembler::zero, false, Assembler::pt, next);
|
||||
#ifdef ASSERT
|
||||
|
||||
{
|
||||
Label ok;
|
||||
__ delayed()->btst(TypeEntries::type_mask, tmp2);
|
||||
__ brx(Assembler::zero, true, Assembler::pt, ok);
|
||||
__ delayed()->nop();
|
||||
|
||||
__ stop("unexpected profiling mismatch");
|
||||
__ bind(ok);
|
||||
}
|
||||
// first time here. Set profile type.
|
||||
__ or3(tmp2, tmp1, tmp2);
|
||||
#else
|
||||
// first time here. Set profile type.
|
||||
__ delayed()->or3(tmp2, tmp1, tmp2);
|
||||
#endif
|
||||
|
||||
} else {
|
||||
assert(ciTypeEntries::valid_ciklass(current_klass) != NULL &&
|
||||
ciTypeEntries::valid_ciklass(current_klass) != exact_klass, "inconsistent");
|
||||
|
||||
// already unknown. Nothing to do anymore.
|
||||
__ btst(TypeEntries::type_unknown, tmp2);
|
||||
__ brx(Assembler::notZero, false, Assembler::pt, next);
|
||||
__ delayed()->or3(tmp2, TypeEntries::type_unknown, tmp2);
|
||||
}
|
||||
}
|
||||
|
||||
__ bind(do_update);
|
||||
__ st_ptr(tmp2, mdo_addr);
|
||||
|
||||
__ bind(next);
|
||||
}
|
||||
}
|
||||
|
||||
void LIR_Assembler::align_backward_branch_target() {
|
||||
|
@ -3321,9 +3459,14 @@ void LIR_Assembler::unpack64(LIR_Opr src, LIR_Opr dst) {
|
|||
|
||||
void LIR_Assembler::leal(LIR_Opr addr_opr, LIR_Opr dest) {
|
||||
LIR_Address* addr = addr_opr->as_address_ptr();
|
||||
assert(addr->index()->is_illegal() && addr->scale() == LIR_Address::times_1 && Assembler::is_simm13(addr->disp()), "can't handle complex addresses yet");
|
||||
assert(addr->index()->is_illegal() && addr->scale() == LIR_Address::times_1, "can't handle complex addresses yet");
|
||||
|
||||
__ add(addr->base()->as_pointer_register(), addr->disp(), dest->as_pointer_register());
|
||||
if (Assembler::is_simm13(addr->disp())) {
|
||||
__ add(addr->base()->as_pointer_register(), addr->disp(), dest->as_pointer_register());
|
||||
} else {
|
||||
__ set(addr->disp(), G3_scratch);
|
||||
__ add(addr->base()->as_pointer_register(), G3_scratch, dest->as_pointer_register());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -1892,6 +1892,220 @@ void InterpreterMacroAssembler::profile_switch_case(Register index,
|
|||
}
|
||||
}
|
||||
|
||||
void InterpreterMacroAssembler::profile_obj_type(Register obj, const Address& mdo_addr, Register tmp) {
|
||||
Label not_null, do_nothing, do_update;
|
||||
|
||||
assert_different_registers(obj, mdo_addr.base(), tmp);
|
||||
|
||||
verify_oop(obj);
|
||||
|
||||
ld_ptr(mdo_addr, tmp);
|
||||
|
||||
br_notnull_short(obj, pt, not_null);
|
||||
or3(tmp, TypeEntries::null_seen, tmp);
|
||||
ba_short(do_update);
|
||||
|
||||
bind(not_null);
|
||||
load_klass(obj, obj);
|
||||
|
||||
xor3(obj, tmp, obj);
|
||||
btst(TypeEntries::type_klass_mask, obj);
|
||||
// klass seen before, nothing to do. The unknown bit may have been
|
||||
// set already but no need to check.
|
||||
brx(zero, false, pt, do_nothing);
|
||||
delayed()->
|
||||
|
||||
btst(TypeEntries::type_unknown, obj);
|
||||
// already unknown. Nothing to do anymore.
|
||||
brx(notZero, false, pt, do_nothing);
|
||||
delayed()->
|
||||
|
||||
btst(TypeEntries::type_mask, tmp);
|
||||
brx(zero, true, pt, do_update);
|
||||
// first time here. Set profile type.
|
||||
delayed()->or3(tmp, obj, tmp);
|
||||
|
||||
// different than before. Cannot keep accurate profile.
|
||||
or3(tmp, TypeEntries::type_unknown, tmp);
|
||||
|
||||
bind(do_update);
|
||||
// update profile
|
||||
st_ptr(tmp, mdo_addr);
|
||||
|
||||
bind(do_nothing);
|
||||
}
|
||||
|
||||
void InterpreterMacroAssembler::profile_arguments_type(Register callee, Register tmp1, Register tmp2, bool is_virtual) {
|
||||
if (!ProfileInterpreter) {
|
||||
return;
|
||||
}
|
||||
|
||||
assert_different_registers(callee, tmp1, tmp2, ImethodDataPtr);
|
||||
|
||||
if (MethodData::profile_arguments() || MethodData::profile_return()) {
|
||||
Label profile_continue;
|
||||
|
||||
test_method_data_pointer(profile_continue);
|
||||
|
||||
int off_to_start = is_virtual ? in_bytes(VirtualCallData::virtual_call_data_size()) : in_bytes(CounterData::counter_data_size());
|
||||
|
||||
ldub(ImethodDataPtr, in_bytes(DataLayout::tag_offset()) - off_to_start, tmp1);
|
||||
cmp_and_br_short(tmp1, is_virtual ? DataLayout::virtual_call_type_data_tag : DataLayout::call_type_data_tag, notEqual, pn, profile_continue);
|
||||
|
||||
if (MethodData::profile_arguments()) {
|
||||
Label done;
|
||||
int off_to_args = in_bytes(TypeEntriesAtCall::args_data_offset());
|
||||
add(ImethodDataPtr, off_to_args, ImethodDataPtr);
|
||||
|
||||
for (int i = 0; i < TypeProfileArgsLimit; i++) {
|
||||
if (i > 0 || MethodData::profile_return()) {
|
||||
// If return value type is profiled we may have no argument to profile
|
||||
ld_ptr(ImethodDataPtr, in_bytes(TypeEntriesAtCall::cell_count_offset())-off_to_args, tmp1);
|
||||
sub(tmp1, i*TypeStackSlotEntries::per_arg_count(), tmp1);
|
||||
cmp_and_br_short(tmp1, TypeStackSlotEntries::per_arg_count(), less, pn, done);
|
||||
}
|
||||
ld_ptr(Address(callee, Method::const_offset()), tmp1);
|
||||
lduh(Address(tmp1, ConstMethod::size_of_parameters_offset()), tmp1);
|
||||
// stack offset o (zero based) from the start of the argument
|
||||
// list, for n arguments translates into offset n - o - 1 from
|
||||
// the end of the argument list. But there's an extra slot at
|
||||
// the stop of the stack. So the offset is n - o from Lesp.
|
||||
ld_ptr(ImethodDataPtr, in_bytes(TypeEntriesAtCall::stack_slot_offset(i))-off_to_args, tmp2);
|
||||
sub(tmp1, tmp2, tmp1);
|
||||
|
||||
// Can't use MacroAssembler::argument_address() which needs Gargs to be set up
|
||||
sll(tmp1, Interpreter::logStackElementSize, tmp1);
|
||||
ld_ptr(Lesp, tmp1, tmp1);
|
||||
|
||||
Address mdo_arg_addr(ImethodDataPtr, in_bytes(TypeEntriesAtCall::argument_type_offset(i))-off_to_args);
|
||||
profile_obj_type(tmp1, mdo_arg_addr, tmp2);
|
||||
|
||||
int to_add = in_bytes(TypeStackSlotEntries::per_arg_size());
|
||||
add(ImethodDataPtr, to_add, ImethodDataPtr);
|
||||
off_to_args += to_add;
|
||||
}
|
||||
|
||||
if (MethodData::profile_return()) {
|
||||
ld_ptr(ImethodDataPtr, in_bytes(TypeEntriesAtCall::cell_count_offset())-off_to_args, tmp1);
|
||||
sub(tmp1, TypeProfileArgsLimit*TypeStackSlotEntries::per_arg_count(), tmp1);
|
||||
}
|
||||
|
||||
bind(done);
|
||||
|
||||
if (MethodData::profile_return()) {
|
||||
// We're right after the type profile for the last
|
||||
// argument. tmp1 is the number of cells left in the
|
||||
// CallTypeData/VirtualCallTypeData to reach its end. Non null
|
||||
// if there's a return to profile.
|
||||
assert(ReturnTypeEntry::static_cell_count() < TypeStackSlotEntries::per_arg_count(), "can't move past ret type");
|
||||
sll(tmp1, exact_log2(DataLayout::cell_size), tmp1);
|
||||
add(ImethodDataPtr, tmp1, ImethodDataPtr);
|
||||
}
|
||||
} else {
|
||||
assert(MethodData::profile_return(), "either profile call args or call ret");
|
||||
update_mdp_by_constant(in_bytes(ReturnTypeEntry::size()));
|
||||
}
|
||||
|
||||
// mdp points right after the end of the
|
||||
// CallTypeData/VirtualCallTypeData, right after the cells for the
|
||||
// return value type if there's one.
|
||||
|
||||
bind(profile_continue);
|
||||
}
|
||||
}
|
||||
|
||||
void InterpreterMacroAssembler::profile_return_type(Register ret, Register tmp1, Register tmp2) {
|
||||
assert_different_registers(ret, tmp1, tmp2);
|
||||
if (ProfileInterpreter && MethodData::profile_return()) {
|
||||
Label profile_continue, done;
|
||||
|
||||
test_method_data_pointer(profile_continue);
|
||||
|
||||
if (MethodData::profile_return_jsr292_only()) {
|
||||
// If we don't profile all invoke bytecodes we must make sure
|
||||
// it's a bytecode we indeed profile. We can't go back to the
|
||||
// begining of the ProfileData we intend to update to check its
|
||||
// type because we're right after it and we don't known its
|
||||
// length.
|
||||
Label do_profile;
|
||||
ldub(Lbcp, 0, tmp1);
|
||||
cmp_and_br_short(tmp1, Bytecodes::_invokedynamic, equal, pn, do_profile);
|
||||
cmp(tmp1, Bytecodes::_invokehandle);
|
||||
br(equal, false, pn, do_profile);
|
||||
delayed()->ldub(Lmethod, Method::intrinsic_id_offset_in_bytes(), tmp1);
|
||||
cmp_and_br_short(tmp1, vmIntrinsics::_compiledLambdaForm, notEqual, pt, profile_continue);
|
||||
|
||||
bind(do_profile);
|
||||
}
|
||||
|
||||
Address mdo_ret_addr(ImethodDataPtr, -in_bytes(ReturnTypeEntry::size()));
|
||||
mov(ret, tmp1);
|
||||
profile_obj_type(tmp1, mdo_ret_addr, tmp2);
|
||||
|
||||
bind(profile_continue);
|
||||
}
|
||||
}
|
||||
|
||||
void InterpreterMacroAssembler::profile_parameters_type(Register tmp1, Register tmp2, Register tmp3, Register tmp4) {
|
||||
if (ProfileInterpreter && MethodData::profile_parameters()) {
|
||||
Label profile_continue, done;
|
||||
|
||||
test_method_data_pointer(profile_continue);
|
||||
|
||||
// Load the offset of the area within the MDO used for
|
||||
// parameters. If it's negative we're not profiling any parameters.
|
||||
lduw(ImethodDataPtr, in_bytes(MethodData::parameters_type_data_di_offset()) - in_bytes(MethodData::data_offset()), tmp1);
|
||||
cmp_and_br_short(tmp1, 0, less, pn, profile_continue);
|
||||
|
||||
// Compute a pointer to the area for parameters from the offset
|
||||
// and move the pointer to the slot for the last
|
||||
// parameters. Collect profiling from last parameter down.
|
||||
// mdo start + parameters offset + array length - 1
|
||||
|
||||
// Pointer to the parameter area in the MDO
|
||||
Register mdp = tmp1;
|
||||
add(ImethodDataPtr, tmp1, mdp);
|
||||
|
||||
// offset of the current profile entry to update
|
||||
Register entry_offset = tmp2;
|
||||
// entry_offset = array len in number of cells
|
||||
ld_ptr(mdp, ArrayData::array_len_offset(), entry_offset);
|
||||
|
||||
int off_base = in_bytes(ParametersTypeData::stack_slot_offset(0));
|
||||
assert(off_base % DataLayout::cell_size == 0, "should be a number of cells");
|
||||
|
||||
// entry_offset (number of cells) = array len - size of 1 entry + offset of the stack slot field
|
||||
sub(entry_offset, TypeStackSlotEntries::per_arg_count() - (off_base / DataLayout::cell_size), entry_offset);
|
||||
// entry_offset in bytes
|
||||
sll(entry_offset, exact_log2(DataLayout::cell_size), entry_offset);
|
||||
|
||||
Label loop;
|
||||
bind(loop);
|
||||
|
||||
// load offset on the stack from the slot for this parameter
|
||||
ld_ptr(mdp, entry_offset, tmp3);
|
||||
sll(tmp3,Interpreter::logStackElementSize, tmp3);
|
||||
neg(tmp3);
|
||||
// read the parameter from the local area
|
||||
ld_ptr(Llocals, tmp3, tmp3);
|
||||
|
||||
// make entry_offset now point to the type field for this parameter
|
||||
int type_base = in_bytes(ParametersTypeData::type_offset(0));
|
||||
assert(type_base > off_base, "unexpected");
|
||||
add(entry_offset, type_base - off_base, entry_offset);
|
||||
|
||||
// profile the parameter
|
||||
Address arg_type(mdp, entry_offset);
|
||||
profile_obj_type(tmp3, arg_type, tmp4);
|
||||
|
||||
// go to next parameter
|
||||
sub(entry_offset, TypeStackSlotEntries::per_arg_count() * DataLayout::cell_size + (type_base - off_base), entry_offset);
|
||||
cmp_and_br_short(entry_offset, off_base, greaterEqual, pt, loop);
|
||||
|
||||
bind(profile_continue);
|
||||
}
|
||||
}
|
||||
|
||||
// add a InterpMonitorElem to stack (see frame_sparc.hpp)
|
||||
|
||||
void InterpreterMacroAssembler::add_monitor_to_stack( bool stack_is_empty,
|
||||
|
|
|
@ -323,6 +323,11 @@ class InterpreterMacroAssembler: public MacroAssembler {
|
|||
Register scratch2,
|
||||
Register scratch3);
|
||||
|
||||
void profile_obj_type(Register obj, const Address& mdo_addr, Register tmp);
|
||||
void profile_arguments_type(Register callee, Register tmp1, Register tmp2, bool is_virtual);
|
||||
void profile_return_type(Register ret, Register tmp1, Register tmp2);
|
||||
void profile_parameters_type(Register tmp1, Register tmp2, Register tmp3, Register tmp4);
|
||||
|
||||
// Debugging
|
||||
void interp_verify_oop(Register reg, TosState state, const char * file, int line); // only if +VerifyOops && state == atos
|
||||
void verify_oop_or_return_address(Register reg, Register rtmp); // for astore
|
||||
|
|
|
@ -1848,6 +1848,12 @@ const bool Matcher::misaligned_vectors_ok() {
|
|||
return false;
|
||||
}
|
||||
|
||||
// Current (2013) SPARC platforms need to read original key
|
||||
// to construct decryption expanded key
|
||||
const bool Matcher::pass_original_key_for_aes() {
|
||||
return true;
|
||||
}
|
||||
|
||||
// USII supports fxtof through the whole range of number, USIII doesn't
|
||||
const bool Matcher::convL2FSupported(void) {
|
||||
return VM_Version::has_fast_fxtof();
|
||||
|
|
|
@ -3304,6 +3304,775 @@ class StubGenerator: public StubCodeGenerator {
|
|||
}
|
||||
}
|
||||
|
||||
address generate_aescrypt_encryptBlock() {
|
||||
__ align(CodeEntryAlignment);
|
||||
StubCodeMark mark(this, "StubRoutines", "aesencryptBlock");
|
||||
Label L_doLast128bit, L_storeOutput;
|
||||
address start = __ pc();
|
||||
Register from = O0; // source byte array
|
||||
Register to = O1; // destination byte array
|
||||
Register key = O2; // expanded key array
|
||||
const Register keylen = O4; //reg for storing expanded key array length
|
||||
|
||||
// read expanded key length
|
||||
__ ldsw(Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)), keylen, 0);
|
||||
|
||||
// load input into F54-F56; F30-F31 used as temp
|
||||
__ ldf(FloatRegisterImpl::S, from, 0, F30);
|
||||
__ ldf(FloatRegisterImpl::S, from, 4, F31);
|
||||
__ fmov(FloatRegisterImpl::D, F30, F54);
|
||||
__ ldf(FloatRegisterImpl::S, from, 8, F30);
|
||||
__ ldf(FloatRegisterImpl::S, from, 12, F31);
|
||||
__ fmov(FloatRegisterImpl::D, F30, F56);
|
||||
|
||||
// load expanded key
|
||||
for ( int i = 0; i <= 38; i += 2 ) {
|
||||
__ ldf(FloatRegisterImpl::D, key, i*4, as_FloatRegister(i));
|
||||
}
|
||||
|
||||
// perform cipher transformation
|
||||
__ fxor(FloatRegisterImpl::D, F0, F54, F54);
|
||||
__ fxor(FloatRegisterImpl::D, F2, F56, F56);
|
||||
// rounds 1 through 8
|
||||
for ( int i = 4; i <= 28; i += 8 ) {
|
||||
__ aes_eround01(as_FloatRegister(i), F54, F56, F58);
|
||||
__ aes_eround23(as_FloatRegister(i+2), F54, F56, F60);
|
||||
__ aes_eround01(as_FloatRegister(i+4), F58, F60, F54);
|
||||
__ aes_eround23(as_FloatRegister(i+6), F58, F60, F56);
|
||||
}
|
||||
__ aes_eround01(F36, F54, F56, F58); //round 9
|
||||
__ aes_eround23(F38, F54, F56, F60);
|
||||
|
||||
// 128-bit original key size
|
||||
__ cmp_and_brx_short(keylen, 44, Assembler::equal, Assembler::pt, L_doLast128bit);
|
||||
|
||||
for ( int i = 40; i <= 50; i += 2 ) {
|
||||
__ ldf(FloatRegisterImpl::D, key, i*4, as_FloatRegister(i) );
|
||||
}
|
||||
__ aes_eround01(F40, F58, F60, F54); //round 10
|
||||
__ aes_eround23(F42, F58, F60, F56);
|
||||
__ aes_eround01(F44, F54, F56, F58); //round 11
|
||||
__ aes_eround23(F46, F54, F56, F60);
|
||||
|
||||
// 192-bit original key size
|
||||
__ cmp_and_brx_short(keylen, 52, Assembler::equal, Assembler::pt, L_storeOutput);
|
||||
|
||||
__ ldf(FloatRegisterImpl::D, key, 208, F52);
|
||||
__ aes_eround01(F48, F58, F60, F54); //round 12
|
||||
__ aes_eround23(F50, F58, F60, F56);
|
||||
__ ldf(FloatRegisterImpl::D, key, 216, F46);
|
||||
__ ldf(FloatRegisterImpl::D, key, 224, F48);
|
||||
__ ldf(FloatRegisterImpl::D, key, 232, F50);
|
||||
__ aes_eround01(F52, F54, F56, F58); //round 13
|
||||
__ aes_eround23(F46, F54, F56, F60);
|
||||
__ br(Assembler::always, false, Assembler::pt, L_storeOutput);
|
||||
__ delayed()->nop();
|
||||
|
||||
__ BIND(L_doLast128bit);
|
||||
__ ldf(FloatRegisterImpl::D, key, 160, F48);
|
||||
__ ldf(FloatRegisterImpl::D, key, 168, F50);
|
||||
|
||||
__ BIND(L_storeOutput);
|
||||
// perform last round of encryption common for all key sizes
|
||||
__ aes_eround01_l(F48, F58, F60, F54); //last round
|
||||
__ aes_eround23_l(F50, F58, F60, F56);
|
||||
|
||||
// store output into the destination array, F0-F1 used as temp
|
||||
__ fmov(FloatRegisterImpl::D, F54, F0);
|
||||
__ stf(FloatRegisterImpl::S, F0, to, 0);
|
||||
__ stf(FloatRegisterImpl::S, F1, to, 4);
|
||||
__ fmov(FloatRegisterImpl::D, F56, F0);
|
||||
__ stf(FloatRegisterImpl::S, F0, to, 8);
|
||||
__ retl();
|
||||
__ delayed()->stf(FloatRegisterImpl::S, F1, to, 12);
|
||||
|
||||
return start;
|
||||
}
|
||||
|
||||
address generate_aescrypt_decryptBlock() {
|
||||
__ align(CodeEntryAlignment);
|
||||
StubCodeMark mark(this, "StubRoutines", "aesdecryptBlock");
|
||||
address start = __ pc();
|
||||
Label L_expand192bit, L_expand256bit, L_common_transform;
|
||||
Register from = O0; // source byte array
|
||||
Register to = O1; // destination byte array
|
||||
Register key = O2; // expanded key array
|
||||
Register original_key = O3; // original key array only required during decryption
|
||||
const Register keylen = O4; // reg for storing expanded key array length
|
||||
|
||||
// read expanded key array length
|
||||
__ ldsw(Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)), keylen, 0);
|
||||
|
||||
// load input into F52-F54; F30,F31 used as temp
|
||||
__ ldf(FloatRegisterImpl::S, from, 0, F30);
|
||||
__ ldf(FloatRegisterImpl::S, from, 4, F31);
|
||||
__ fmov(FloatRegisterImpl::D, F30, F52);
|
||||
__ ldf(FloatRegisterImpl::S, from, 8, F30);
|
||||
__ ldf(FloatRegisterImpl::S, from, 12, F31);
|
||||
__ fmov(FloatRegisterImpl::D, F30, F54);
|
||||
|
||||
// load original key from SunJCE expanded decryption key
|
||||
for ( int i = 0; i <= 3; i++ ) {
|
||||
__ ldf(FloatRegisterImpl::S, original_key, i*4, as_FloatRegister(i));
|
||||
}
|
||||
|
||||
// 256-bit original key size
|
||||
__ cmp_and_brx_short(keylen, 60, Assembler::equal, Assembler::pn, L_expand256bit);
|
||||
|
||||
// 192-bit original key size
|
||||
__ cmp_and_brx_short(keylen, 52, Assembler::equal, Assembler::pn, L_expand192bit);
|
||||
|
||||
// 128-bit original key size
|
||||
// perform key expansion since SunJCE decryption-key expansion is not compatible with SPARC crypto instructions
|
||||
for ( int i = 0; i <= 36; i += 4 ) {
|
||||
__ aes_kexpand1(as_FloatRegister(i), as_FloatRegister(i+2), i/4, as_FloatRegister(i+4));
|
||||
__ aes_kexpand2(as_FloatRegister(i+2), as_FloatRegister(i+4), as_FloatRegister(i+6));
|
||||
}
|
||||
|
||||
// perform 128-bit key specific inverse cipher transformation
|
||||
__ fxor(FloatRegisterImpl::D, F42, F54, F54);
|
||||
__ fxor(FloatRegisterImpl::D, F40, F52, F52);
|
||||
__ br(Assembler::always, false, Assembler::pt, L_common_transform);
|
||||
__ delayed()->nop();
|
||||
|
||||
__ BIND(L_expand192bit);
|
||||
|
||||
// start loading rest of the 192-bit key
|
||||
__ ldf(FloatRegisterImpl::S, original_key, 16, F4);
|
||||
__ ldf(FloatRegisterImpl::S, original_key, 20, F5);
|
||||
|
||||
// perform key expansion since SunJCE decryption-key expansion is not compatible with SPARC crypto instructions
|
||||
for ( int i = 0; i <= 36; i += 6 ) {
|
||||
__ aes_kexpand1(as_FloatRegister(i), as_FloatRegister(i+4), i/6, as_FloatRegister(i+6));
|
||||
__ aes_kexpand2(as_FloatRegister(i+2), as_FloatRegister(i+6), as_FloatRegister(i+8));
|
||||
__ aes_kexpand2(as_FloatRegister(i+4), as_FloatRegister(i+8), as_FloatRegister(i+10));
|
||||
}
|
||||
__ aes_kexpand1(F42, F46, 7, F48);
|
||||
__ aes_kexpand2(F44, F48, F50);
|
||||
|
||||
// perform 192-bit key specific inverse cipher transformation
|
||||
__ fxor(FloatRegisterImpl::D, F50, F54, F54);
|
||||
__ fxor(FloatRegisterImpl::D, F48, F52, F52);
|
||||
__ aes_dround23(F46, F52, F54, F58);
|
||||
__ aes_dround01(F44, F52, F54, F56);
|
||||
__ aes_dround23(F42, F56, F58, F54);
|
||||
__ aes_dround01(F40, F56, F58, F52);
|
||||
__ br(Assembler::always, false, Assembler::pt, L_common_transform);
|
||||
__ delayed()->nop();
|
||||
|
||||
__ BIND(L_expand256bit);
|
||||
|
||||
// load rest of the 256-bit key
|
||||
for ( int i = 4; i <= 7; i++ ) {
|
||||
__ ldf(FloatRegisterImpl::S, original_key, i*4, as_FloatRegister(i));
|
||||
}
|
||||
|
||||
// perform key expansion since SunJCE decryption-key expansion is not compatible with SPARC crypto instructions
|
||||
for ( int i = 0; i <= 40; i += 8 ) {
|
||||
__ aes_kexpand1(as_FloatRegister(i), as_FloatRegister(i+6), i/8, as_FloatRegister(i+8));
|
||||
__ aes_kexpand2(as_FloatRegister(i+2), as_FloatRegister(i+8), as_FloatRegister(i+10));
|
||||
__ aes_kexpand0(as_FloatRegister(i+4), as_FloatRegister(i+10), as_FloatRegister(i+12));
|
||||
__ aes_kexpand2(as_FloatRegister(i+6), as_FloatRegister(i+12), as_FloatRegister(i+14));
|
||||
}
|
||||
__ aes_kexpand1(F48, F54, 6, F56);
|
||||
__ aes_kexpand2(F50, F56, F58);
|
||||
|
||||
for ( int i = 0; i <= 6; i += 2 ) {
|
||||
__ fmov(FloatRegisterImpl::D, as_FloatRegister(58-i), as_FloatRegister(i));
|
||||
}
|
||||
|
||||
// load input into F52-F54
|
||||
__ ldf(FloatRegisterImpl::D, from, 0, F52);
|
||||
__ ldf(FloatRegisterImpl::D, from, 8, F54);
|
||||
|
||||
// perform 256-bit key specific inverse cipher transformation
|
||||
__ fxor(FloatRegisterImpl::D, F0, F54, F54);
|
||||
__ fxor(FloatRegisterImpl::D, F2, F52, F52);
|
||||
__ aes_dround23(F4, F52, F54, F58);
|
||||
__ aes_dround01(F6, F52, F54, F56);
|
||||
__ aes_dround23(F50, F56, F58, F54);
|
||||
__ aes_dround01(F48, F56, F58, F52);
|
||||
__ aes_dround23(F46, F52, F54, F58);
|
||||
__ aes_dround01(F44, F52, F54, F56);
|
||||
__ aes_dround23(F42, F56, F58, F54);
|
||||
__ aes_dround01(F40, F56, F58, F52);
|
||||
|
||||
for ( int i = 0; i <= 7; i++ ) {
|
||||
__ ldf(FloatRegisterImpl::S, original_key, i*4, as_FloatRegister(i));
|
||||
}
|
||||
|
||||
// perform inverse cipher transformations common for all key sizes
|
||||
__ BIND(L_common_transform);
|
||||
for ( int i = 38; i >= 6; i -= 8 ) {
|
||||
__ aes_dround23(as_FloatRegister(i), F52, F54, F58);
|
||||
__ aes_dround01(as_FloatRegister(i-2), F52, F54, F56);
|
||||
if ( i != 6) {
|
||||
__ aes_dround23(as_FloatRegister(i-4), F56, F58, F54);
|
||||
__ aes_dround01(as_FloatRegister(i-6), F56, F58, F52);
|
||||
} else {
|
||||
__ aes_dround23_l(as_FloatRegister(i-4), F56, F58, F54);
|
||||
__ aes_dround01_l(as_FloatRegister(i-6), F56, F58, F52);
|
||||
}
|
||||
}
|
||||
|
||||
// store output to destination array, F0-F1 used as temp
|
||||
__ fmov(FloatRegisterImpl::D, F52, F0);
|
||||
__ stf(FloatRegisterImpl::S, F0, to, 0);
|
||||
__ stf(FloatRegisterImpl::S, F1, to, 4);
|
||||
__ fmov(FloatRegisterImpl::D, F54, F0);
|
||||
__ stf(FloatRegisterImpl::S, F0, to, 8);
|
||||
__ retl();
|
||||
__ delayed()->stf(FloatRegisterImpl::S, F1, to, 12);
|
||||
|
||||
return start;
|
||||
}
|
||||
|
||||
address generate_cipherBlockChaining_encryptAESCrypt() {
|
||||
__ align(CodeEntryAlignment);
|
||||
StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_encryptAESCrypt");
|
||||
Label L_cbcenc128, L_cbcenc192, L_cbcenc256;
|
||||
address start = __ pc();
|
||||
Register from = O0; // source byte array
|
||||
Register to = O1; // destination byte array
|
||||
Register key = O2; // expanded key array
|
||||
Register rvec = O3; // init vector
|
||||
const Register len_reg = O4; // cipher length
|
||||
const Register keylen = O5; // reg for storing expanded key array length
|
||||
|
||||
// save cipher len to return in the end
|
||||
__ mov(len_reg, L1);
|
||||
|
||||
// read expanded key length
|
||||
__ ldsw(Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)), keylen, 0);
|
||||
|
||||
// load init vector
|
||||
__ ldf(FloatRegisterImpl::D, rvec, 0, F60);
|
||||
__ ldf(FloatRegisterImpl::D, rvec, 8, F62);
|
||||
__ ldx(key,0,G1);
|
||||
__ ldx(key,8,G2);
|
||||
|
||||
// start loading expanded key
|
||||
for ( int i = 0, j = 16; i <= 38; i += 2, j += 8 ) {
|
||||
__ ldf(FloatRegisterImpl::D, key, j, as_FloatRegister(i));
|
||||
}
|
||||
|
||||
// 128-bit original key size
|
||||
__ cmp_and_brx_short(keylen, 44, Assembler::equal, Assembler::pt, L_cbcenc128);
|
||||
|
||||
for ( int i = 40, j = 176; i <= 46; i += 2, j += 8 ) {
|
||||
__ ldf(FloatRegisterImpl::D, key, j, as_FloatRegister(i));
|
||||
}
|
||||
|
||||
// 192-bit original key size
|
||||
__ cmp_and_brx_short(keylen, 52, Assembler::equal, Assembler::pt, L_cbcenc192);
|
||||
|
||||
for ( int i = 48, j = 208; i <= 54; i += 2, j += 8 ) {
|
||||
__ ldf(FloatRegisterImpl::D, key, j, as_FloatRegister(i));
|
||||
}
|
||||
|
||||
// 256-bit original key size
|
||||
__ br(Assembler::always, false, Assembler::pt, L_cbcenc256);
|
||||
__ delayed()->nop();
|
||||
|
||||
__ align(OptoLoopAlignment);
|
||||
__ BIND(L_cbcenc128);
|
||||
__ ldx(from,0,G3);
|
||||
__ ldx(from,8,G4);
|
||||
__ xor3(G1,G3,G3);
|
||||
__ xor3(G2,G4,G4);
|
||||
__ movxtod(G3,F56);
|
||||
__ movxtod(G4,F58);
|
||||
__ fxor(FloatRegisterImpl::D, F60, F56, F60);
|
||||
__ fxor(FloatRegisterImpl::D, F62, F58, F62);
|
||||
|
||||
// TEN_EROUNDS
|
||||
for ( int i = 0; i <= 32; i += 8 ) {
|
||||
__ aes_eround01(as_FloatRegister(i), F60, F62, F56);
|
||||
__ aes_eround23(as_FloatRegister(i+2), F60, F62, F58);
|
||||
if (i != 32 ) {
|
||||
__ aes_eround01(as_FloatRegister(i+4), F56, F58, F60);
|
||||
__ aes_eround23(as_FloatRegister(i+6), F56, F58, F62);
|
||||
} else {
|
||||
__ aes_eround01_l(as_FloatRegister(i+4), F56, F58, F60);
|
||||
__ aes_eround23_l(as_FloatRegister(i+6), F56, F58, F62);
|
||||
}
|
||||
}
|
||||
|
||||
__ stf(FloatRegisterImpl::D, F60, to, 0);
|
||||
__ stf(FloatRegisterImpl::D, F62, to, 8);
|
||||
__ add(from, 16, from);
|
||||
__ add(to, 16, to);
|
||||
__ subcc(len_reg, 16, len_reg);
|
||||
__ br(Assembler::notEqual, false, Assembler::pt, L_cbcenc128);
|
||||
__ delayed()->nop();
|
||||
__ stf(FloatRegisterImpl::D, F60, rvec, 0);
|
||||
__ stf(FloatRegisterImpl::D, F62, rvec, 8);
|
||||
__ retl();
|
||||
__ delayed()->mov(L1, O0);
|
||||
|
||||
__ align(OptoLoopAlignment);
|
||||
__ BIND(L_cbcenc192);
|
||||
__ ldx(from,0,G3);
|
||||
__ ldx(from,8,G4);
|
||||
__ xor3(G1,G3,G3);
|
||||
__ xor3(G2,G4,G4);
|
||||
__ movxtod(G3,F56);
|
||||
__ movxtod(G4,F58);
|
||||
__ fxor(FloatRegisterImpl::D, F60, F56, F60);
|
||||
__ fxor(FloatRegisterImpl::D, F62, F58, F62);
|
||||
|
||||
// TWELEVE_EROUNDS
|
||||
for ( int i = 0; i <= 40; i += 8 ) {
|
||||
__ aes_eround01(as_FloatRegister(i), F60, F62, F56);
|
||||
__ aes_eround23(as_FloatRegister(i+2), F60, F62, F58);
|
||||
if (i != 40 ) {
|
||||
__ aes_eround01(as_FloatRegister(i+4), F56, F58, F60);
|
||||
__ aes_eround23(as_FloatRegister(i+6), F56, F58, F62);
|
||||
} else {
|
||||
__ aes_eround01_l(as_FloatRegister(i+4), F56, F58, F60);
|
||||
__ aes_eround23_l(as_FloatRegister(i+6), F56, F58, F62);
|
||||
}
|
||||
}
|
||||
|
||||
__ stf(FloatRegisterImpl::D, F60, to, 0);
|
||||
__ stf(FloatRegisterImpl::D, F62, to, 8);
|
||||
__ add(from, 16, from);
|
||||
__ subcc(len_reg, 16, len_reg);
|
||||
__ add(to, 16, to);
|
||||
__ br(Assembler::notEqual, false, Assembler::pt, L_cbcenc192);
|
||||
__ delayed()->nop();
|
||||
__ stf(FloatRegisterImpl::D, F60, rvec, 0);
|
||||
__ stf(FloatRegisterImpl::D, F62, rvec, 8);
|
||||
__ retl();
|
||||
__ delayed()->mov(L1, O0);
|
||||
|
||||
__ align(OptoLoopAlignment);
|
||||
__ BIND(L_cbcenc256);
|
||||
__ ldx(from,0,G3);
|
||||
__ ldx(from,8,G4);
|
||||
__ xor3(G1,G3,G3);
|
||||
__ xor3(G2,G4,G4);
|
||||
__ movxtod(G3,F56);
|
||||
__ movxtod(G4,F58);
|
||||
__ fxor(FloatRegisterImpl::D, F60, F56, F60);
|
||||
__ fxor(FloatRegisterImpl::D, F62, F58, F62);
|
||||
|
||||
// FOURTEEN_EROUNDS
|
||||
for ( int i = 0; i <= 48; i += 8 ) {
|
||||
__ aes_eround01(as_FloatRegister(i), F60, F62, F56);
|
||||
__ aes_eround23(as_FloatRegister(i+2), F60, F62, F58);
|
||||
if (i != 48 ) {
|
||||
__ aes_eround01(as_FloatRegister(i+4), F56, F58, F60);
|
||||
__ aes_eround23(as_FloatRegister(i+6), F56, F58, F62);
|
||||
} else {
|
||||
__ aes_eround01_l(as_FloatRegister(i+4), F56, F58, F60);
|
||||
__ aes_eround23_l(as_FloatRegister(i+6), F56, F58, F62);
|
||||
}
|
||||
}
|
||||
|
||||
__ stf(FloatRegisterImpl::D, F60, to, 0);
|
||||
__ stf(FloatRegisterImpl::D, F62, to, 8);
|
||||
__ add(from, 16, from);
|
||||
__ subcc(len_reg, 16, len_reg);
|
||||
__ add(to, 16, to);
|
||||
__ br(Assembler::notEqual, false, Assembler::pt, L_cbcenc256);
|
||||
__ delayed()->nop();
|
||||
__ stf(FloatRegisterImpl::D, F60, rvec, 0);
|
||||
__ stf(FloatRegisterImpl::D, F62, rvec, 8);
|
||||
__ retl();
|
||||
__ delayed()->mov(L1, O0);
|
||||
|
||||
return start;
|
||||
}
|
||||
|
||||
address generate_cipherBlockChaining_decryptAESCrypt_Parallel() {
|
||||
__ align(CodeEntryAlignment);
|
||||
StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_decryptAESCrypt");
|
||||
Label L_cbcdec_end, L_expand192bit, L_expand256bit, L_dec_first_block_start;
|
||||
Label L_dec_first_block128, L_dec_first_block192, L_dec_next2_blocks128, L_dec_next2_blocks192, L_dec_next2_blocks256;
|
||||
address start = __ pc();
|
||||
Register from = I0; // source byte array
|
||||
Register to = I1; // destination byte array
|
||||
Register key = I2; // expanded key array
|
||||
Register rvec = I3; // init vector
|
||||
const Register len_reg = I4; // cipher length
|
||||
const Register original_key = I5; // original key array only required during decryption
|
||||
const Register keylen = L6; // reg for storing expanded key array length
|
||||
|
||||
// save cipher len before save_frame, to return in the end
|
||||
__ mov(O4, L0);
|
||||
__ save_frame(0); //args are read from I* registers since we save the frame in the beginning
|
||||
|
||||
// load original key from SunJCE expanded decryption key
|
||||
for ( int i = 0; i <= 3; i++ ) {
|
||||
__ ldf(FloatRegisterImpl::S, original_key, i*4, as_FloatRegister(i));
|
||||
}
|
||||
|
||||
// load initial vector
|
||||
__ ldx(rvec,0,L0);
|
||||
__ ldx(rvec,8,L1);
|
||||
|
||||
// read expanded key array length
|
||||
__ ldsw(Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)), keylen, 0);
|
||||
|
||||
// 256-bit original key size
|
||||
__ cmp_and_brx_short(keylen, 60, Assembler::equal, Assembler::pn, L_expand256bit);
|
||||
|
||||
// 192-bit original key size
|
||||
__ cmp_and_brx_short(keylen, 52, Assembler::equal, Assembler::pn, L_expand192bit);
|
||||
|
||||
// 128-bit original key size
|
||||
// perform key expansion since SunJCE decryption-key expansion is not compatible with SPARC crypto instructions
|
||||
for ( int i = 0; i <= 36; i += 4 ) {
|
||||
__ aes_kexpand1(as_FloatRegister(i), as_FloatRegister(i+2), i/4, as_FloatRegister(i+4));
|
||||
__ aes_kexpand2(as_FloatRegister(i+2), as_FloatRegister(i+4), as_FloatRegister(i+6));
|
||||
}
|
||||
|
||||
// load expanded key[last-1] and key[last] elements
|
||||
__ movdtox(F40,L2);
|
||||
__ movdtox(F42,L3);
|
||||
|
||||
__ and3(len_reg, 16, L4);
|
||||
__ br_null(L4, false, Assembler::pt, L_dec_next2_blocks128);
|
||||
__ delayed()->nop();
|
||||
|
||||
__ br(Assembler::always, false, Assembler::pt, L_dec_first_block_start);
|
||||
__ delayed()->nop();
|
||||
|
||||
__ BIND(L_expand192bit);
|
||||
// load rest of the 192-bit key
|
||||
__ ldf(FloatRegisterImpl::S, original_key, 16, F4);
|
||||
__ ldf(FloatRegisterImpl::S, original_key, 20, F5);
|
||||
|
||||
// perform key expansion since SunJCE decryption-key expansion is not compatible with SPARC crypto instructions
|
||||
for ( int i = 0; i <= 36; i += 6 ) {
|
||||
__ aes_kexpand1(as_FloatRegister(i), as_FloatRegister(i+4), i/6, as_FloatRegister(i+6));
|
||||
__ aes_kexpand2(as_FloatRegister(i+2), as_FloatRegister(i+6), as_FloatRegister(i+8));
|
||||
__ aes_kexpand2(as_FloatRegister(i+4), as_FloatRegister(i+8), as_FloatRegister(i+10));
|
||||
}
|
||||
__ aes_kexpand1(F42, F46, 7, F48);
|
||||
__ aes_kexpand2(F44, F48, F50);
|
||||
|
||||
// load expanded key[last-1] and key[last] elements
|
||||
__ movdtox(F48,L2);
|
||||
__ movdtox(F50,L3);
|
||||
|
||||
__ and3(len_reg, 16, L4);
|
||||
__ br_null(L4, false, Assembler::pt, L_dec_next2_blocks192);
|
||||
__ delayed()->nop();
|
||||
|
||||
__ br(Assembler::always, false, Assembler::pt, L_dec_first_block_start);
|
||||
__ delayed()->nop();
|
||||
|
||||
__ BIND(L_expand256bit);
|
||||
// load rest of the 256-bit key
|
||||
for ( int i = 4; i <= 7; i++ ) {
|
||||
__ ldf(FloatRegisterImpl::S, original_key, i*4, as_FloatRegister(i));
|
||||
}
|
||||
|
||||
// perform key expansion since SunJCE decryption-key expansion is not compatible with SPARC crypto instructions
|
||||
for ( int i = 0; i <= 40; i += 8 ) {
|
||||
__ aes_kexpand1(as_FloatRegister(i), as_FloatRegister(i+6), i/8, as_FloatRegister(i+8));
|
||||
__ aes_kexpand2(as_FloatRegister(i+2), as_FloatRegister(i+8), as_FloatRegister(i+10));
|
||||
__ aes_kexpand0(as_FloatRegister(i+4), as_FloatRegister(i+10), as_FloatRegister(i+12));
|
||||
__ aes_kexpand2(as_FloatRegister(i+6), as_FloatRegister(i+12), as_FloatRegister(i+14));
|
||||
}
|
||||
__ aes_kexpand1(F48, F54, 6, F56);
|
||||
__ aes_kexpand2(F50, F56, F58);
|
||||
|
||||
// load expanded key[last-1] and key[last] elements
|
||||
__ movdtox(F56,L2);
|
||||
__ movdtox(F58,L3);
|
||||
|
||||
__ and3(len_reg, 16, L4);
|
||||
__ br_null(L4, false, Assembler::pt, L_dec_next2_blocks256);
|
||||
__ delayed()->nop();
|
||||
|
||||
__ BIND(L_dec_first_block_start);
|
||||
__ ldx(from,0,L4);
|
||||
__ ldx(from,8,L5);
|
||||
__ xor3(L2,L4,G1);
|
||||
__ movxtod(G1,F60);
|
||||
__ xor3(L3,L5,G1);
|
||||
__ movxtod(G1,F62);
|
||||
|
||||
// 128-bit original key size
|
||||
__ cmp_and_brx_short(keylen, 44, Assembler::equal, Assembler::pn, L_dec_first_block128);
|
||||
|
||||
// 192-bit original key size
|
||||
__ cmp_and_brx_short(keylen, 52, Assembler::equal, Assembler::pn, L_dec_first_block192);
|
||||
|
||||
__ aes_dround23(F54, F60, F62, F58);
|
||||
__ aes_dround01(F52, F60, F62, F56);
|
||||
__ aes_dround23(F50, F56, F58, F62);
|
||||
__ aes_dround01(F48, F56, F58, F60);
|
||||
|
||||
__ BIND(L_dec_first_block192);
|
||||
__ aes_dround23(F46, F60, F62, F58);
|
||||
__ aes_dround01(F44, F60, F62, F56);
|
||||
__ aes_dround23(F42, F56, F58, F62);
|
||||
__ aes_dround01(F40, F56, F58, F60);
|
||||
|
||||
__ BIND(L_dec_first_block128);
|
||||
for ( int i = 38; i >= 6; i -= 8 ) {
|
||||
__ aes_dround23(as_FloatRegister(i), F60, F62, F58);
|
||||
__ aes_dround01(as_FloatRegister(i-2), F60, F62, F56);
|
||||
if ( i != 6) {
|
||||
__ aes_dround23(as_FloatRegister(i-4), F56, F58, F62);
|
||||
__ aes_dround01(as_FloatRegister(i-6), F56, F58, F60);
|
||||
} else {
|
||||
__ aes_dround23_l(as_FloatRegister(i-4), F56, F58, F62);
|
||||
__ aes_dround01_l(as_FloatRegister(i-6), F56, F58, F60);
|
||||
}
|
||||
}
|
||||
|
||||
__ movxtod(L0,F56);
|
||||
__ movxtod(L1,F58);
|
||||
__ mov(L4,L0);
|
||||
__ mov(L5,L1);
|
||||
__ fxor(FloatRegisterImpl::D, F56, F60, F60);
|
||||
__ fxor(FloatRegisterImpl::D, F58, F62, F62);
|
||||
|
||||
__ stf(FloatRegisterImpl::D, F60, to, 0);
|
||||
__ stf(FloatRegisterImpl::D, F62, to, 8);
|
||||
|
||||
__ add(from, 16, from);
|
||||
__ add(to, 16, to);
|
||||
__ subcc(len_reg, 16, len_reg);
|
||||
__ br(Assembler::equal, false, Assembler::pt, L_cbcdec_end);
|
||||
__ delayed()->nop();
|
||||
|
||||
// 256-bit original key size
|
||||
__ cmp_and_brx_short(keylen, 60, Assembler::equal, Assembler::pn, L_dec_next2_blocks256);
|
||||
|
||||
// 192-bit original key size
|
||||
__ cmp_and_brx_short(keylen, 52, Assembler::equal, Assembler::pn, L_dec_next2_blocks192);
|
||||
|
||||
__ align(OptoLoopAlignment);
|
||||
__ BIND(L_dec_next2_blocks128);
|
||||
__ nop();
|
||||
|
||||
// F40:F42 used for first 16-bytes
|
||||
__ ldx(from,0,G4);
|
||||
__ ldx(from,8,G5);
|
||||
__ xor3(L2,G4,G1);
|
||||
__ movxtod(G1,F40);
|
||||
__ xor3(L3,G5,G1);
|
||||
__ movxtod(G1,F42);
|
||||
|
||||
// F60:F62 used for next 16-bytes
|
||||
__ ldx(from,16,L4);
|
||||
__ ldx(from,24,L5);
|
||||
__ xor3(L2,L4,G1);
|
||||
__ movxtod(G1,F60);
|
||||
__ xor3(L3,L5,G1);
|
||||
__ movxtod(G1,F62);
|
||||
|
||||
for ( int i = 38; i >= 6; i -= 8 ) {
|
||||
__ aes_dround23(as_FloatRegister(i), F40, F42, F44);
|
||||
__ aes_dround01(as_FloatRegister(i-2), F40, F42, F46);
|
||||
__ aes_dround23(as_FloatRegister(i), F60, F62, F58);
|
||||
__ aes_dround01(as_FloatRegister(i-2), F60, F62, F56);
|
||||
if (i != 6 ) {
|
||||
__ aes_dround23(as_FloatRegister(i-4), F46, F44, F42);
|
||||
__ aes_dround01(as_FloatRegister(i-6), F46, F44, F40);
|
||||
__ aes_dround23(as_FloatRegister(i-4), F56, F58, F62);
|
||||
__ aes_dround01(as_FloatRegister(i-6), F56, F58, F60);
|
||||
} else {
|
||||
__ aes_dround23_l(as_FloatRegister(i-4), F46, F44, F42);
|
||||
__ aes_dround01_l(as_FloatRegister(i-6), F46, F44, F40);
|
||||
__ aes_dround23_l(as_FloatRegister(i-4), F56, F58, F62);
|
||||
__ aes_dround01_l(as_FloatRegister(i-6), F56, F58, F60);
|
||||
}
|
||||
}
|
||||
|
||||
__ movxtod(L0,F46);
|
||||
__ movxtod(L1,F44);
|
||||
__ fxor(FloatRegisterImpl::D, F46, F40, F40);
|
||||
__ fxor(FloatRegisterImpl::D, F44, F42, F42);
|
||||
|
||||
__ stf(FloatRegisterImpl::D, F40, to, 0);
|
||||
__ stf(FloatRegisterImpl::D, F42, to, 8);
|
||||
|
||||
__ movxtod(G4,F56);
|
||||
__ movxtod(G5,F58);
|
||||
__ mov(L4,L0);
|
||||
__ mov(L5,L1);
|
||||
__ fxor(FloatRegisterImpl::D, F56, F60, F60);
|
||||
__ fxor(FloatRegisterImpl::D, F58, F62, F62);
|
||||
|
||||
__ stf(FloatRegisterImpl::D, F60, to, 16);
|
||||
__ stf(FloatRegisterImpl::D, F62, to, 24);
|
||||
|
||||
__ add(from, 32, from);
|
||||
__ add(to, 32, to);
|
||||
__ subcc(len_reg, 32, len_reg);
|
||||
__ br(Assembler::notEqual, false, Assembler::pt, L_dec_next2_blocks128);
|
||||
__ delayed()->nop();
|
||||
__ br(Assembler::always, false, Assembler::pt, L_cbcdec_end);
|
||||
__ delayed()->nop();
|
||||
|
||||
__ align(OptoLoopAlignment);
|
||||
__ BIND(L_dec_next2_blocks192);
|
||||
__ nop();
|
||||
|
||||
// F48:F50 used for first 16-bytes
|
||||
__ ldx(from,0,G4);
|
||||
__ ldx(from,8,G5);
|
||||
__ xor3(L2,G4,G1);
|
||||
__ movxtod(G1,F48);
|
||||
__ xor3(L3,G5,G1);
|
||||
__ movxtod(G1,F50);
|
||||
|
||||
// F60:F62 used for next 16-bytes
|
||||
__ ldx(from,16,L4);
|
||||
__ ldx(from,24,L5);
|
||||
__ xor3(L2,L4,G1);
|
||||
__ movxtod(G1,F60);
|
||||
__ xor3(L3,L5,G1);
|
||||
__ movxtod(G1,F62);
|
||||
|
||||
for ( int i = 46; i >= 6; i -= 8 ) {
|
||||
__ aes_dround23(as_FloatRegister(i), F48, F50, F52);
|
||||
__ aes_dround01(as_FloatRegister(i-2), F48, F50, F54);
|
||||
__ aes_dround23(as_FloatRegister(i), F60, F62, F58);
|
||||
__ aes_dround01(as_FloatRegister(i-2), F60, F62, F56);
|
||||
if (i != 6 ) {
|
||||
__ aes_dround23(as_FloatRegister(i-4), F54, F52, F50);
|
||||
__ aes_dround01(as_FloatRegister(i-6), F54, F52, F48);
|
||||
__ aes_dround23(as_FloatRegister(i-4), F56, F58, F62);
|
||||
__ aes_dround01(as_FloatRegister(i-6), F56, F58, F60);
|
||||
} else {
|
||||
__ aes_dround23_l(as_FloatRegister(i-4), F54, F52, F50);
|
||||
__ aes_dround01_l(as_FloatRegister(i-6), F54, F52, F48);
|
||||
__ aes_dround23_l(as_FloatRegister(i-4), F56, F58, F62);
|
||||
__ aes_dround01_l(as_FloatRegister(i-6), F56, F58, F60);
|
||||
}
|
||||
}
|
||||
|
||||
__ movxtod(L0,F54);
|
||||
__ movxtod(L1,F52);
|
||||
__ fxor(FloatRegisterImpl::D, F54, F48, F48);
|
||||
__ fxor(FloatRegisterImpl::D, F52, F50, F50);
|
||||
|
||||
__ stf(FloatRegisterImpl::D, F48, to, 0);
|
||||
__ stf(FloatRegisterImpl::D, F50, to, 8);
|
||||
|
||||
__ movxtod(G4,F56);
|
||||
__ movxtod(G5,F58);
|
||||
__ mov(L4,L0);
|
||||
__ mov(L5,L1);
|
||||
__ fxor(FloatRegisterImpl::D, F56, F60, F60);
|
||||
__ fxor(FloatRegisterImpl::D, F58, F62, F62);
|
||||
|
||||
__ stf(FloatRegisterImpl::D, F60, to, 16);
|
||||
__ stf(FloatRegisterImpl::D, F62, to, 24);
|
||||
|
||||
__ add(from, 32, from);
|
||||
__ add(to, 32, to);
|
||||
__ subcc(len_reg, 32, len_reg);
|
||||
__ br(Assembler::notEqual, false, Assembler::pt, L_dec_next2_blocks192);
|
||||
__ delayed()->nop();
|
||||
__ br(Assembler::always, false, Assembler::pt, L_cbcdec_end);
|
||||
__ delayed()->nop();
|
||||
|
||||
__ align(OptoLoopAlignment);
|
||||
__ BIND(L_dec_next2_blocks256);
|
||||
__ nop();
|
||||
|
||||
// F0:F2 used for first 16-bytes
|
||||
__ ldx(from,0,G4);
|
||||
__ ldx(from,8,G5);
|
||||
__ xor3(L2,G4,G1);
|
||||
__ movxtod(G1,F0);
|
||||
__ xor3(L3,G5,G1);
|
||||
__ movxtod(G1,F2);
|
||||
|
||||
// F60:F62 used for next 16-bytes
|
||||
__ ldx(from,16,L4);
|
||||
__ ldx(from,24,L5);
|
||||
__ xor3(L2,L4,G1);
|
||||
__ movxtod(G1,F60);
|
||||
__ xor3(L3,L5,G1);
|
||||
__ movxtod(G1,F62);
|
||||
|
||||
__ aes_dround23(F54, F0, F2, F4);
|
||||
__ aes_dround01(F52, F0, F2, F6);
|
||||
__ aes_dround23(F54, F60, F62, F58);
|
||||
__ aes_dround01(F52, F60, F62, F56);
|
||||
__ aes_dround23(F50, F6, F4, F2);
|
||||
__ aes_dround01(F48, F6, F4, F0);
|
||||
__ aes_dround23(F50, F56, F58, F62);
|
||||
__ aes_dround01(F48, F56, F58, F60);
|
||||
// save F48:F54 in temp registers
|
||||
__ movdtox(F54,G2);
|
||||
__ movdtox(F52,G3);
|
||||
__ movdtox(F50,G6);
|
||||
__ movdtox(F48,G1);
|
||||
for ( int i = 46; i >= 14; i -= 8 ) {
|
||||
__ aes_dround23(as_FloatRegister(i), F0, F2, F4);
|
||||
__ aes_dround01(as_FloatRegister(i-2), F0, F2, F6);
|
||||
__ aes_dround23(as_FloatRegister(i), F60, F62, F58);
|
||||
__ aes_dround01(as_FloatRegister(i-2), F60, F62, F56);
|
||||
__ aes_dround23(as_FloatRegister(i-4), F6, F4, F2);
|
||||
__ aes_dround01(as_FloatRegister(i-6), F6, F4, F0);
|
||||
__ aes_dround23(as_FloatRegister(i-4), F56, F58, F62);
|
||||
__ aes_dround01(as_FloatRegister(i-6), F56, F58, F60);
|
||||
}
|
||||
// init F48:F54 with F0:F6 values (original key)
|
||||
__ ldf(FloatRegisterImpl::D, original_key, 0, F48);
|
||||
__ ldf(FloatRegisterImpl::D, original_key, 8, F50);
|
||||
__ ldf(FloatRegisterImpl::D, original_key, 16, F52);
|
||||
__ ldf(FloatRegisterImpl::D, original_key, 24, F54);
|
||||
__ aes_dround23(F54, F0, F2, F4);
|
||||
__ aes_dround01(F52, F0, F2, F6);
|
||||
__ aes_dround23(F54, F60, F62, F58);
|
||||
__ aes_dround01(F52, F60, F62, F56);
|
||||
__ aes_dround23_l(F50, F6, F4, F2);
|
||||
__ aes_dround01_l(F48, F6, F4, F0);
|
||||
__ aes_dround23_l(F50, F56, F58, F62);
|
||||
__ aes_dround01_l(F48, F56, F58, F60);
|
||||
// re-init F48:F54 with their original values
|
||||
__ movxtod(G2,F54);
|
||||
__ movxtod(G3,F52);
|
||||
__ movxtod(G6,F50);
|
||||
__ movxtod(G1,F48);
|
||||
|
||||
__ movxtod(L0,F6);
|
||||
__ movxtod(L1,F4);
|
||||
__ fxor(FloatRegisterImpl::D, F6, F0, F0);
|
||||
__ fxor(FloatRegisterImpl::D, F4, F2, F2);
|
||||
|
||||
__ stf(FloatRegisterImpl::D, F0, to, 0);
|
||||
__ stf(FloatRegisterImpl::D, F2, to, 8);
|
||||
|
||||
__ movxtod(G4,F56);
|
||||
__ movxtod(G5,F58);
|
||||
__ mov(L4,L0);
|
||||
__ mov(L5,L1);
|
||||
__ fxor(FloatRegisterImpl::D, F56, F60, F60);
|
||||
__ fxor(FloatRegisterImpl::D, F58, F62, F62);
|
||||
|
||||
__ stf(FloatRegisterImpl::D, F60, to, 16);
|
||||
__ stf(FloatRegisterImpl::D, F62, to, 24);
|
||||
|
||||
__ add(from, 32, from);
|
||||
__ add(to, 32, to);
|
||||
__ subcc(len_reg, 32, len_reg);
|
||||
__ br(Assembler::notEqual, false, Assembler::pt, L_dec_next2_blocks256);
|
||||
__ delayed()->nop();
|
||||
|
||||
__ BIND(L_cbcdec_end);
|
||||
__ stx(L0, rvec, 0);
|
||||
__ stx(L1, rvec, 8);
|
||||
__ restore();
|
||||
__ mov(L0, O0);
|
||||
__ retl();
|
||||
__ delayed()->nop();
|
||||
|
||||
return start;
|
||||
}
|
||||
|
||||
void generate_initial() {
|
||||
// Generates all stubs and initializes the entry points
|
||||
|
||||
|
@ -3368,6 +4137,14 @@ class StubGenerator: public StubCodeGenerator {
|
|||
generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry,
|
||||
&StubRoutines::_safefetchN_fault_pc,
|
||||
&StubRoutines::_safefetchN_continuation_pc);
|
||||
|
||||
// generate AES intrinsics code
|
||||
if (UseAESIntrinsics) {
|
||||
StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock();
|
||||
StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();
|
||||
StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt();
|
||||
StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt_Parallel();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -156,6 +156,10 @@ address TemplateInterpreterGenerator::generate_StackOverflowError_handler() {
|
|||
address TemplateInterpreterGenerator::generate_return_entry_for(TosState state, int step, size_t index_size) {
|
||||
address entry = __ pc();
|
||||
|
||||
if (state == atos) {
|
||||
__ profile_return_type(O0, G3_scratch, G1_scratch);
|
||||
}
|
||||
|
||||
#if !defined(_LP64) && defined(COMPILER2)
|
||||
// All return values are where we want them, except for Longs. C2 returns
|
||||
// longs in G1 in the 32-bit build whereas the interpreter wants them in O0/O1.
|
||||
|
@ -1333,6 +1337,7 @@ address InterpreterGenerator::generate_normal_entry(bool synchronized) {
|
|||
__ movbool(true, G3_scratch);
|
||||
__ stbool(G3_scratch, do_not_unlock_if_synchronized);
|
||||
|
||||
__ profile_parameters_type(G1_scratch, G3_scratch, G4_scratch, Lscratch);
|
||||
// increment invocation counter and check for overflow
|
||||
//
|
||||
// Note: checking for negative value instead of overflow
|
||||
|
|
|
@ -2942,12 +2942,12 @@ void TemplateTable::prepare_invoke(int byte_no,
|
|||
|
||||
|
||||
void TemplateTable::generate_vtable_call(Register Rrecv, Register Rindex, Register Rret) {
|
||||
Register Rtemp = G4_scratch;
|
||||
Register Rcall = Rindex;
|
||||
assert_different_registers(Rcall, G5_method, Gargs, Rret);
|
||||
|
||||
// get target Method* & entry point
|
||||
__ lookup_virtual_method(Rrecv, Rindex, G5_method);
|
||||
__ profile_arguments_type(G5_method, Rcall, Gargs, true);
|
||||
__ call_from_interpreter(Rcall, Gargs, Rret);
|
||||
}
|
||||
|
||||
|
@ -3022,6 +3022,7 @@ void TemplateTable::invokevfinal_helper(Register Rscratch, Register Rret) {
|
|||
__ null_check(O0);
|
||||
|
||||
__ profile_final_call(O4);
|
||||
__ profile_arguments_type(G5_method, Rscratch, Gargs, true);
|
||||
|
||||
// get return address
|
||||
AddressLiteral table(Interpreter::invoke_return_entry_table());
|
||||
|
@ -3051,6 +3052,7 @@ void TemplateTable::invokespecial(int byte_no) {
|
|||
|
||||
// do the call
|
||||
__ profile_call(O4);
|
||||
__ profile_arguments_type(G5_method, Rscratch, Gargs, false);
|
||||
__ call_from_interpreter(Rscratch, Gargs, Rret);
|
||||
}
|
||||
|
||||
|
@ -3066,6 +3068,7 @@ void TemplateTable::invokestatic(int byte_no) {
|
|||
|
||||
// do the call
|
||||
__ profile_call(O4);
|
||||
__ profile_arguments_type(G5_method, Rscratch, Gargs, false);
|
||||
__ call_from_interpreter(Rscratch, Gargs, Rret);
|
||||
}
|
||||
|
||||
|
@ -3091,6 +3094,7 @@ void TemplateTable::invokeinterface_object_method(Register RKlass,
|
|||
// do the call - the index (f2) contains the Method*
|
||||
assert_different_registers(G5_method, Gargs, Rcall);
|
||||
__ mov(Rindex, G5_method);
|
||||
__ profile_arguments_type(G5_method, Rcall, Gargs, true);
|
||||
__ call_from_interpreter(Rcall, Gargs, Rret);
|
||||
__ bind(notFinal);
|
||||
|
||||
|
@ -3197,6 +3201,7 @@ void TemplateTable::invokeinterface(int byte_no) {
|
|||
Register Rcall = Rinterface;
|
||||
assert_different_registers(Rcall, G5_method, Gargs, Rret);
|
||||
|
||||
__ profile_arguments_type(G5_method, Rcall, Gargs, true);
|
||||
__ call_from_interpreter(Rcall, Gargs, Rret);
|
||||
}
|
||||
|
||||
|
@ -3226,6 +3231,7 @@ void TemplateTable::invokehandle(int byte_no) {
|
|||
// do the call
|
||||
__ verify_oop(G4_mtype);
|
||||
__ profile_final_call(O4); // FIXME: profile the LambdaForm also
|
||||
__ profile_arguments_type(G5_method, Rscratch, Gargs, true);
|
||||
__ call_from_interpreter(Rscratch, Gargs, Rret);
|
||||
}
|
||||
|
||||
|
@ -3262,6 +3268,7 @@ void TemplateTable::invokedynamic(int byte_no) {
|
|||
|
||||
// do the call
|
||||
__ verify_oop(G4_callsite);
|
||||
__ profile_arguments_type(G5_method, Rscratch, Gargs, false);
|
||||
__ call_from_interpreter(Rscratch, Gargs, Rret);
|
||||
}
|
||||
|
||||
|
|
|
@ -234,7 +234,7 @@ void VM_Version::initialize() {
|
|||
assert((OptoLoopAlignment % relocInfo::addr_unit()) == 0, "alignment is not a multiple of NOP size");
|
||||
|
||||
char buf[512];
|
||||
jio_snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
|
||||
jio_snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
|
||||
(has_v9() ? ", v9" : (has_v8() ? ", v8" : "")),
|
||||
(has_hardware_popc() ? ", popc" : ""),
|
||||
(has_vis1() ? ", vis1" : ""),
|
||||
|
@ -242,6 +242,7 @@ void VM_Version::initialize() {
|
|||
(has_vis3() ? ", vis3" : ""),
|
||||
(has_blk_init() ? ", blk_init" : ""),
|
||||
(has_cbcond() ? ", cbcond" : ""),
|
||||
(has_aes() ? ", aes" : ""),
|
||||
(is_ultra3() ? ", ultra3" : ""),
|
||||
(is_sun4v() ? ", sun4v" : ""),
|
||||
(is_niagara_plus() ? ", niagara_plus" : (is_niagara() ? ", niagara" : "")),
|
||||
|
@ -265,6 +266,41 @@ void VM_Version::initialize() {
|
|||
if (!has_vis1()) // Drop to 0 if no VIS1 support
|
||||
UseVIS = 0;
|
||||
|
||||
// T2 and above should have support for AES instructions
|
||||
if (has_aes()) {
|
||||
if (UseVIS > 0) { // AES intrinsics use FXOR instruction which is VIS1
|
||||
if (FLAG_IS_DEFAULT(UseAES)) {
|
||||
FLAG_SET_DEFAULT(UseAES, true);
|
||||
}
|
||||
if (FLAG_IS_DEFAULT(UseAESIntrinsics)) {
|
||||
FLAG_SET_DEFAULT(UseAESIntrinsics, true);
|
||||
}
|
||||
// we disable both the AES flags if either of them is disabled on the command line
|
||||
if (!UseAES || !UseAESIntrinsics) {
|
||||
FLAG_SET_DEFAULT(UseAES, false);
|
||||
FLAG_SET_DEFAULT(UseAESIntrinsics, false);
|
||||
}
|
||||
} else {
|
||||
if (UseAES || UseAESIntrinsics) {
|
||||
warning("SPARC AES intrinsics require VIS1 instruction support. Intrinsics will be disabled.");
|
||||
if (UseAES) {
|
||||
FLAG_SET_DEFAULT(UseAES, false);
|
||||
}
|
||||
if (UseAESIntrinsics) {
|
||||
FLAG_SET_DEFAULT(UseAESIntrinsics, false);
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if (UseAES || UseAESIntrinsics) {
|
||||
warning("AES instructions are not available on this CPU");
|
||||
if (UseAES) {
|
||||
FLAG_SET_DEFAULT(UseAES, false);
|
||||
}
|
||||
if (UseAESIntrinsics) {
|
||||
FLAG_SET_DEFAULT(UseAESIntrinsics, false);
|
||||
}
|
||||
}
|
||||
|
||||
if (FLAG_IS_DEFAULT(ContendedPaddingWidth) &&
|
||||
(cache_line_size > ContendedPaddingWidth))
|
||||
ContendedPaddingWidth = cache_line_size;
|
||||
|
|
|
@ -48,7 +48,8 @@ protected:
|
|||
sparc64_family = 14,
|
||||
M_family = 15,
|
||||
T_family = 16,
|
||||
T1_model = 17
|
||||
T1_model = 17,
|
||||
aes_instructions = 18
|
||||
};
|
||||
|
||||
enum Feature_Flag_Set {
|
||||
|
@ -73,6 +74,7 @@ protected:
|
|||
M_family_m = 1 << M_family,
|
||||
T_family_m = 1 << T_family,
|
||||
T1_model_m = 1 << T1_model,
|
||||
aes_instructions_m = 1 << aes_instructions,
|
||||
|
||||
generic_v8_m = v8_instructions_m | hardware_mul32_m | hardware_div32_m | hardware_fsmuld_m,
|
||||
generic_v9_m = generic_v8_m | v9_instructions_m,
|
||||
|
@ -123,6 +125,7 @@ public:
|
|||
static bool has_vis3() { return (_features & vis3_instructions_m) != 0; }
|
||||
static bool has_blk_init() { return (_features & blk_init_instructions_m) != 0; }
|
||||
static bool has_cbcond() { return (_features & cbcond_instructions_m) != 0; }
|
||||
static bool has_aes() { return (_features & aes_instructions_m) != 0; }
|
||||
|
||||
static bool supports_compare_and_exchange()
|
||||
{ return has_v9(); }
|
||||
|
|
|
@ -38,6 +38,7 @@
|
|||
#include "nativeInst_x86.hpp"
|
||||
#include "oops/objArrayKlass.hpp"
|
||||
#include "runtime/sharedRuntime.hpp"
|
||||
#include "vmreg_x86.inline.hpp"
|
||||
|
||||
|
||||
// These masks are used to provide 128-bit aligned bitmasks to the XMM
|
||||
|
@ -1006,6 +1007,9 @@ void LIR_Assembler::reg2mem(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_Patch
|
|||
if (UseCompressedOops && !wide) {
|
||||
__ movptr(compressed_src, src->as_register());
|
||||
__ encode_heap_oop(compressed_src);
|
||||
if (patch_code != lir_patch_none) {
|
||||
info->oop_map()->set_narrowoop(compressed_src->as_VMReg());
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
|
|
@ -941,6 +941,8 @@ void LIRGenerator::do_update_CRC32(Intrinsic* x) {
|
|||
case vmIntrinsics::_updateCRC32: {
|
||||
LIRItem crc(x->argument_at(0), this);
|
||||
LIRItem val(x->argument_at(1), this);
|
||||
// val is destroyed by update_crc32
|
||||
val.set_destroys_register();
|
||||
crc.load_item();
|
||||
val.load_item();
|
||||
__ update_crc32(crc.result(), val.result(), result);
|
||||
|
|
|
@ -127,7 +127,7 @@ void InterpreterMacroAssembler::profile_arguments_type(Register mdp, Register ca
|
|||
|
||||
if (MethodData::profile_return()) {
|
||||
// We're right after the type profile for the last
|
||||
// argument. tmp is the number of cell left in the
|
||||
// argument. tmp is the number of cells left in the
|
||||
// CallTypeData/VirtualCallTypeData to reach its end. Non null
|
||||
// if there's a return to profile.
|
||||
assert(ReturnTypeEntry::static_cell_count() < TypeStackSlotEntries::per_arg_count(), "can't move past ret type");
|
||||
|
@ -198,7 +198,7 @@ void InterpreterMacroAssembler::profile_parameters_type(Register mdp, Register t
|
|||
// parameters. Collect profiling from last parameter down.
|
||||
// mdo start + parameters offset + array length - 1
|
||||
addptr(mdp, tmp1);
|
||||
movptr(tmp1, Address(mdp, in_bytes(ArrayData::array_len_offset())));
|
||||
movptr(tmp1, Address(mdp, ArrayData::array_len_offset()));
|
||||
decrement(tmp1, TypeStackSlotEntries::per_arg_count());
|
||||
|
||||
Label loop;
|
||||
|
|
|
@ -2403,6 +2403,9 @@ class StubGenerator: public StubCodeGenerator {
|
|||
// c_rarg3 - r vector byte array address
|
||||
// c_rarg4 - input length
|
||||
//
|
||||
// Output:
|
||||
// rax - input length
|
||||
//
|
||||
address generate_cipherBlockChaining_encryptAESCrypt() {
|
||||
assert(UseAES, "need AES instructions and misaligned SSE support");
|
||||
__ align(CodeEntryAlignment);
|
||||
|
@ -2483,7 +2486,7 @@ class StubGenerator: public StubCodeGenerator {
|
|||
__ movdqu(Address(rvec, 0), xmm_result); // final value of r stored in rvec of CipherBlockChaining object
|
||||
|
||||
handleSOERegisters(false /*restoring*/);
|
||||
__ movl(rax, 0); // return 0 (why?)
|
||||
__ movptr(rax, len_param); // return length
|
||||
__ leave(); // required for proper stackwalking of RuntimeStub frame
|
||||
__ ret(0);
|
||||
|
||||
|
@ -2557,6 +2560,9 @@ class StubGenerator: public StubCodeGenerator {
|
|||
// c_rarg3 - r vector byte array address
|
||||
// c_rarg4 - input length
|
||||
//
|
||||
// Output:
|
||||
// rax - input length
|
||||
//
|
||||
|
||||
address generate_cipherBlockChaining_decryptAESCrypt() {
|
||||
assert(UseAES, "need AES instructions and misaligned SSE support");
|
||||
|
@ -2650,7 +2656,7 @@ class StubGenerator: public StubCodeGenerator {
|
|||
__ movptr(rvec , rvec_param); // restore this since used in loop
|
||||
__ movdqu(Address(rvec, 0), xmm_temp); // final value of r stored in rvec of CipherBlockChaining object
|
||||
handleSOERegisters(false /*restoring*/);
|
||||
__ movl(rax, 0); // return 0 (why?)
|
||||
__ movptr(rax, len_param); // return length
|
||||
__ leave(); // required for proper stackwalking of RuntimeStub frame
|
||||
__ ret(0);
|
||||
|
||||
|
|
|
@ -3217,6 +3217,9 @@ class StubGenerator: public StubCodeGenerator {
|
|||
// c_rarg3 - r vector byte array address
|
||||
// c_rarg4 - input length
|
||||
//
|
||||
// Output:
|
||||
// rax - input length
|
||||
//
|
||||
address generate_cipherBlockChaining_encryptAESCrypt() {
|
||||
assert(UseAES, "need AES instructions and misaligned SSE support");
|
||||
__ align(CodeEntryAlignment);
|
||||
|
@ -3232,7 +3235,7 @@ class StubGenerator: public StubCodeGenerator {
|
|||
#ifndef _WIN64
|
||||
const Register len_reg = c_rarg4; // src len (must be multiple of blocksize 16)
|
||||
#else
|
||||
const Address len_mem(rsp, 6 * wordSize); // length is on stack on Win64
|
||||
const Address len_mem(rbp, 6 * wordSize); // length is on stack on Win64
|
||||
const Register len_reg = r10; // pick the first volatile windows register
|
||||
#endif
|
||||
const Register pos = rax;
|
||||
|
@ -3259,6 +3262,8 @@ class StubGenerator: public StubCodeGenerator {
|
|||
for (int i = 6; i <= XMM_REG_NUM_KEY_LAST; i++) {
|
||||
__ movdqu(xmm_save(i), as_XMMRegister(i));
|
||||
}
|
||||
#else
|
||||
__ push(len_reg); // Save
|
||||
#endif
|
||||
|
||||
const XMMRegister xmm_key_shuf_mask = xmm_temp; // used temporarily to swap key bytes up front
|
||||
|
@ -3301,8 +3306,10 @@ class StubGenerator: public StubCodeGenerator {
|
|||
for (int i = 6; i <= XMM_REG_NUM_KEY_LAST; i++) {
|
||||
__ movdqu(as_XMMRegister(i), xmm_save(i));
|
||||
}
|
||||
__ movl(rax, len_mem);
|
||||
#else
|
||||
__ pop(rax); // return length
|
||||
#endif
|
||||
__ movl(rax, 0); // return 0 (why?)
|
||||
__ leave(); // required for proper stackwalking of RuntimeStub frame
|
||||
__ ret(0);
|
||||
|
||||
|
@ -3409,6 +3416,9 @@ class StubGenerator: public StubCodeGenerator {
|
|||
// c_rarg3 - r vector byte array address
|
||||
// c_rarg4 - input length
|
||||
//
|
||||
// Output:
|
||||
// rax - input length
|
||||
//
|
||||
|
||||
address generate_cipherBlockChaining_decryptAESCrypt_Parallel() {
|
||||
assert(UseAES, "need AES instructions and misaligned SSE support");
|
||||
|
@ -3427,7 +3437,7 @@ class StubGenerator: public StubCodeGenerator {
|
|||
#ifndef _WIN64
|
||||
const Register len_reg = c_rarg4; // src len (must be multiple of blocksize 16)
|
||||
#else
|
||||
const Address len_mem(rsp, 6 * wordSize); // length is on stack on Win64
|
||||
const Address len_mem(rbp, 6 * wordSize); // length is on stack on Win64
|
||||
const Register len_reg = r10; // pick the first volatile windows register
|
||||
#endif
|
||||
const Register pos = rax;
|
||||
|
@ -3448,7 +3458,10 @@ class StubGenerator: public StubCodeGenerator {
|
|||
for (int i = 6; i <= XMM_REG_NUM_KEY_LAST; i++) {
|
||||
__ movdqu(xmm_save(i), as_XMMRegister(i));
|
||||
}
|
||||
#else
|
||||
__ push(len_reg); // Save
|
||||
#endif
|
||||
|
||||
// the java expanded key ordering is rotated one position from what we want
|
||||
// so we start from 0x10 here and hit 0x00 last
|
||||
const XMMRegister xmm_key_shuf_mask = xmm1; // used temporarily to swap key bytes up front
|
||||
|
@ -3554,8 +3567,10 @@ class StubGenerator: public StubCodeGenerator {
|
|||
for (int i = 6; i <= XMM_REG_NUM_KEY_LAST; i++) {
|
||||
__ movdqu(as_XMMRegister(i), xmm_save(i));
|
||||
}
|
||||
__ movl(rax, len_mem);
|
||||
#else
|
||||
__ pop(rax); // return length
|
||||
#endif
|
||||
__ movl(rax, 0); // return 0 (why?)
|
||||
__ leave(); // required for proper stackwalking of RuntimeStub frame
|
||||
__ ret(0);
|
||||
|
||||
|
|
|
@ -581,6 +581,12 @@ const bool Matcher::misaligned_vectors_ok() {
|
|||
return !AlignVector; // can be changed by flag
|
||||
}
|
||||
|
||||
// x86 AES instructions are compatible with SunJCE expanded
|
||||
// keys, hence we do not need to pass the original key to stubs
|
||||
const bool Matcher::pass_original_key_for_aes() {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Helper methods for MachSpillCopyNode::implementation().
|
||||
static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
|
||||
int src_hi, int dst_hi, uint ireg, outputStream* st) {
|
||||
|
|
|
@ -119,6 +119,11 @@ int VM_Version::platform_features(int features) {
|
|||
#endif
|
||||
if (av & AV_SPARC_CBCOND) features |= cbcond_instructions_m;
|
||||
|
||||
#ifndef AV_SPARC_AES
|
||||
#define AV_SPARC_AES 0x00020000 /* aes instrs supported */
|
||||
#endif
|
||||
if (av & AV_SPARC_AES) features |= aes_instructions_m;
|
||||
|
||||
} else {
|
||||
// getisax(2) failed, use the old legacy code.
|
||||
#ifndef PRODUCT
|
||||
|
|
|
@ -3288,7 +3288,10 @@ void LIRGenerator::do_ProfileReturnType(ProfileReturnType* x) {
|
|||
ciSignature* signature_at_call = NULL;
|
||||
x->method()->get_method_at_bci(bci, ignored_will_link, &signature_at_call);
|
||||
|
||||
ciKlass* exact = profile_type(md, 0, md->byte_offset_of_slot(data, ret->type_offset()),
|
||||
// The offset within the MDO of the entry to update may be too large
|
||||
// to be used in load/store instructions on some platforms. So have
|
||||
// profile_type() compute the address of the profile in a register.
|
||||
ciKlass* exact = profile_type(md, md->byte_offset_of_slot(data, ret->type_offset()), 0,
|
||||
ret->type(), x->ret(), mdp,
|
||||
!x->needs_null_check(),
|
||||
signature_at_call->return_type()->as_klass(),
|
||||
|
|
|
@ -787,7 +787,7 @@
|
|||
do_intrinsic(_cipherBlockChaining_decryptAESCrypt, com_sun_crypto_provider_cipherBlockChaining, decrypt_name, byteArray_int_int_byteArray_int_signature, F_R) \
|
||||
do_name( encrypt_name, "encrypt") \
|
||||
do_name( decrypt_name, "decrypt") \
|
||||
do_signature(byteArray_int_int_byteArray_int_signature, "([BII[BI)V") \
|
||||
do_signature(byteArray_int_int_byteArray_int_signature, "([BII[BI)I") \
|
||||
\
|
||||
/* support for java.util.zip */ \
|
||||
do_class(java_util_zip_CRC32, "java/util/zip/CRC32") \
|
||||
|
|
|
@ -596,20 +596,13 @@ void CodeCache::clear_inline_caches() {
|
|||
}
|
||||
|
||||
#ifndef PRODUCT
|
||||
// used to keep track of how much time is spent in mark_for_deoptimization
|
||||
// Keeps track of time spent for checking dependencies
|
||||
static elapsedTimer dependentCheckTime;
|
||||
static int dependentCheckCount = 0;
|
||||
#endif // PRODUCT
|
||||
#endif
|
||||
|
||||
|
||||
int CodeCache::mark_for_deoptimization(DepChange& changes) {
|
||||
MutexLockerEx mu(CodeCache_lock, Mutex::_no_safepoint_check_flag);
|
||||
|
||||
#ifndef PRODUCT
|
||||
dependentCheckTime.start();
|
||||
dependentCheckCount++;
|
||||
#endif // PRODUCT
|
||||
|
||||
int number_of_marked_CodeBlobs = 0;
|
||||
|
||||
// search the hierarchy looking for nmethods which are affected by the loading of this class
|
||||
|
@ -617,32 +610,23 @@ int CodeCache::mark_for_deoptimization(DepChange& changes) {
|
|||
// then search the interfaces this class implements looking for nmethods
|
||||
// which might be dependent of the fact that an interface only had one
|
||||
// implementor.
|
||||
|
||||
{ No_Safepoint_Verifier nsv;
|
||||
for (DepChange::ContextStream str(changes, nsv); str.next(); ) {
|
||||
Klass* d = str.klass();
|
||||
number_of_marked_CodeBlobs += InstanceKlass::cast(d)->mark_dependent_nmethods(changes);
|
||||
}
|
||||
}
|
||||
|
||||
if (VerifyDependencies) {
|
||||
// Turn off dependency tracing while actually testing deps.
|
||||
NOT_PRODUCT( FlagSetting fs(TraceDependencies, false) );
|
||||
FOR_ALL_ALIVE_NMETHODS(nm) {
|
||||
if (!nm->is_marked_for_deoptimization() &&
|
||||
nm->check_all_dependencies()) {
|
||||
ResourceMark rm;
|
||||
tty->print_cr("Should have been marked for deoptimization:");
|
||||
changes.print();
|
||||
nm->print();
|
||||
nm->print_dependencies();
|
||||
}
|
||||
}
|
||||
// nmethod::check_all_dependencies works only correctly, if no safepoint
|
||||
// can happen
|
||||
No_Safepoint_Verifier nsv;
|
||||
for (DepChange::ContextStream str(changes, nsv); str.next(); ) {
|
||||
Klass* d = str.klass();
|
||||
number_of_marked_CodeBlobs += InstanceKlass::cast(d)->mark_dependent_nmethods(changes);
|
||||
}
|
||||
|
||||
#ifndef PRODUCT
|
||||
dependentCheckTime.stop();
|
||||
#endif // PRODUCT
|
||||
if (VerifyDependencies) {
|
||||
// Object pointers are used as unique identifiers for dependency arguments. This
|
||||
// is only possible if no safepoint, i.e., GC occurs during the verification code.
|
||||
dependentCheckTime.start();
|
||||
nmethod::check_all_dependencies(changes);
|
||||
dependentCheckTime.stop();
|
||||
}
|
||||
#endif
|
||||
|
||||
return number_of_marked_CodeBlobs;
|
||||
}
|
||||
|
@ -899,9 +883,7 @@ void CodeCache::print() {
|
|||
}
|
||||
|
||||
tty->print_cr("CodeCache:");
|
||||
|
||||
tty->print_cr("nmethod dependency checking time %f", dependentCheckTime.seconds(),
|
||||
dependentCheckTime.seconds() / dependentCheckCount);
|
||||
tty->print_cr("nmethod dependency checking time %fs", dependentCheckTime.seconds());
|
||||
|
||||
if (!live.is_empty()) {
|
||||
live.print("live");
|
||||
|
|
|
@ -678,6 +678,17 @@ Metadata* Dependencies::DepStream::argument(int i) {
|
|||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a unique identifier for each dependency argument.
|
||||
*/
|
||||
uintptr_t Dependencies::DepStream::get_identifier(int i) {
|
||||
if (has_oop_argument()) {
|
||||
return (uintptr_t)(oopDesc*)argument_oop(i);
|
||||
} else {
|
||||
return (uintptr_t)argument(i);
|
||||
}
|
||||
}
|
||||
|
||||
oop Dependencies::DepStream::argument_oop(int i) {
|
||||
oop result = recorded_oop_at(argument_index(i));
|
||||
assert(result == NULL || result->is_oop(), "must be");
|
||||
|
@ -713,6 +724,57 @@ Klass* Dependencies::DepStream::context_type() {
|
|||
return NULL;
|
||||
}
|
||||
|
||||
// ----------------- DependencySignature --------------------------------------
|
||||
bool DependencySignature::equals(const DependencySignature& sig) const {
|
||||
if (type() != sig.type()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (args_count() != sig.args_count()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
for (int i = 0; i < sig.args_count(); i++) {
|
||||
if (arg(i) != sig.arg(i)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
// ----------------- DependencySignatureBuffer --------------------------------------
|
||||
DependencySignatureBuffer::DependencySignatureBuffer() {
|
||||
_signatures = NEW_RESOURCE_ARRAY(GrowableArray<DependencySignature*>*, Dependencies::TYPE_LIMIT);
|
||||
memset(_signatures, 0, sizeof(DependencySignature*) * Dependencies::TYPE_LIMIT);
|
||||
}
|
||||
|
||||
/* Check if arguments are identical. Two dependency signatures are considered
|
||||
* identical, if the type as well as all argument identifiers are identical.
|
||||
* If the dependency has not already been checked, the dependency signature is
|
||||
* added to the checked dependencies of the same type. The function returns
|
||||
* false, which causes the dependency to be checked in the caller.
|
||||
*/
|
||||
bool DependencySignatureBuffer::add_if_missing(const DependencySignature& sig) {
|
||||
const int index = sig.type();
|
||||
GrowableArray<DependencySignature*>* buffer = _signatures[index];
|
||||
if (buffer == NULL) {
|
||||
buffer = new GrowableArray<DependencySignature*>();
|
||||
_signatures[index] = buffer;
|
||||
}
|
||||
|
||||
// Check if we have already checked the dependency
|
||||
for (int i = 0; i < buffer->length(); i++) {
|
||||
DependencySignature* checked_signature = buffer->at(i);
|
||||
if (checked_signature->equals(sig)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
buffer->append((DependencySignature*)&sig);
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
/// Checking dependencies:
|
||||
|
||||
// This hierarchy walker inspects subtypes of a given type,
|
||||
|
|
|
@ -480,6 +480,9 @@ class Dependencies: public ResourceObj {
|
|||
bool next();
|
||||
|
||||
DepType type() { return _type; }
|
||||
bool has_oop_argument() { return type() == call_site_target_value; }
|
||||
uintptr_t get_identifier(int i);
|
||||
|
||||
int argument_count() { return dep_args(type()); }
|
||||
int argument_index(int i) { assert(0 <= i && i < argument_count(), "oob");
|
||||
return _xi[i]; }
|
||||
|
@ -523,6 +526,38 @@ class Dependencies: public ResourceObj {
|
|||
};
|
||||
|
||||
|
||||
class DependencySignature : public ResourceObj {
|
||||
private:
|
||||
int _args_count;
|
||||
uintptr_t _argument_hash[Dependencies::max_arg_count];
|
||||
Dependencies::DepType _type;
|
||||
|
||||
|
||||
public:
|
||||
DependencySignature(Dependencies::DepStream& dep) {
|
||||
_args_count = dep.argument_count();
|
||||
_type = dep.type();
|
||||
for (int i = 0; i < _args_count; i++) {
|
||||
_argument_hash[i] = dep.get_identifier(i);
|
||||
}
|
||||
}
|
||||
|
||||
bool equals(const DependencySignature& sig) const;
|
||||
|
||||
int args_count() const { return _args_count; }
|
||||
uintptr_t arg(int idx) const { return _argument_hash[idx]; }
|
||||
Dependencies::DepType type() const { return _type; }
|
||||
};
|
||||
|
||||
class DependencySignatureBuffer : public StackObj {
|
||||
private:
|
||||
GrowableArray<DependencySignature*>** _signatures;
|
||||
|
||||
public:
|
||||
DependencySignatureBuffer();
|
||||
bool add_if_missing(const DependencySignature& sig);
|
||||
};
|
||||
|
||||
// Every particular DepChange is a sub-class of this class.
|
||||
class DepChange : public StackObj {
|
||||
public:
|
||||
|
|
|
@ -2161,16 +2161,41 @@ PcDesc* nmethod::find_pc_desc_internal(address pc, bool approximate) {
|
|||
}
|
||||
|
||||
|
||||
bool nmethod::check_all_dependencies() {
|
||||
bool found_check = false;
|
||||
// wholesale check of all dependencies
|
||||
for (Dependencies::DepStream deps(this); deps.next(); ) {
|
||||
if (deps.check_dependency() != NULL) {
|
||||
found_check = true;
|
||||
NOT_DEBUG(break);
|
||||
void nmethod::check_all_dependencies(DepChange& changes) {
|
||||
// Checked dependencies are allocated into this ResourceMark
|
||||
ResourceMark rm;
|
||||
|
||||
// Turn off dependency tracing while actually testing dependencies.
|
||||
NOT_PRODUCT( FlagSetting fs(TraceDependencies, false) );
|
||||
|
||||
// 'dep_signature_buffers' caches already checked dependencies.
|
||||
DependencySignatureBuffer dep_signature_buffers;
|
||||
|
||||
// Iterate over live nmethods and check dependencies of all nmethods that are not
|
||||
// marked for deoptimization. A particular dependency is only checked once.
|
||||
for(nmethod* nm = CodeCache::alive_nmethod(CodeCache::first()); nm != NULL; nm = CodeCache::alive_nmethod(CodeCache::next(nm))) {
|
||||
if (!nm->is_marked_for_deoptimization()) {
|
||||
for (Dependencies::DepStream deps(nm); deps.next(); ) {
|
||||
// Construct abstraction of a dependency.
|
||||
const DependencySignature* current_sig = new DependencySignature(deps);
|
||||
// Determine if 'deps' is already checked. If it is not checked,
|
||||
// 'add_if_missing()' adds the dependency signature and returns
|
||||
// false.
|
||||
if (!dep_signature_buffers.add_if_missing(*current_sig)) {
|
||||
if (deps.check_dependency() != NULL) {
|
||||
// Dependency checking failed. Print out information about the failed
|
||||
// dependency and finally fail with an assert. We can fail here, since
|
||||
// dependency checking is never done in a product build.
|
||||
ResourceMark rm;
|
||||
changes.print();
|
||||
nm->print();
|
||||
nm->print_dependencies();
|
||||
assert(false, "Should have been marked for deoptimization");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return found_check; // tell caller if we found anything
|
||||
}
|
||||
|
||||
bool nmethod::check_dependency_on(DepChange& changes) {
|
||||
|
|
|
@ -679,7 +679,7 @@ public:
|
|||
|
||||
// tells if any of this method's dependencies have been invalidated
|
||||
// (this is expensive!)
|
||||
bool check_all_dependencies();
|
||||
static void check_all_dependencies(DepChange& changes);
|
||||
|
||||
// tells if this compiled method is dependent on the given changes,
|
||||
// and the changes have invalidated it
|
||||
|
|
|
@ -219,58 +219,52 @@ void SATBMarkQueueSet::handle_zero_index_for_thread(JavaThread* t) {
|
|||
}
|
||||
|
||||
#ifdef ASSERT
|
||||
void SATBMarkQueueSet::dump_active_values(JavaThread* first,
|
||||
bool expected_active) {
|
||||
gclog_or_tty->print_cr("SATB queue active values for Java Threads");
|
||||
gclog_or_tty->print_cr(" SATB queue set: active is %s",
|
||||
(is_active()) ? "TRUE" : "FALSE");
|
||||
gclog_or_tty->print_cr(" expected_active is %s",
|
||||
(expected_active) ? "TRUE" : "FALSE");
|
||||
for (JavaThread* t = first; t; t = t->next()) {
|
||||
bool active = t->satb_mark_queue().is_active();
|
||||
gclog_or_tty->print_cr(" thread %s, active is %s",
|
||||
t->name(), (active) ? "TRUE" : "FALSE");
|
||||
void SATBMarkQueueSet::dump_active_states(bool expected_active) {
|
||||
gclog_or_tty->print_cr("Expected SATB active state: %s",
|
||||
expected_active ? "ACTIVE" : "INACTIVE");
|
||||
gclog_or_tty->print_cr("Actual SATB active states:");
|
||||
gclog_or_tty->print_cr(" Queue set: %s", is_active() ? "ACTIVE" : "INACTIVE");
|
||||
for (JavaThread* t = Threads::first(); t; t = t->next()) {
|
||||
gclog_or_tty->print_cr(" Thread \"%s\" queue: %s", t->name(),
|
||||
t->satb_mark_queue().is_active() ? "ACTIVE" : "INACTIVE");
|
||||
}
|
||||
gclog_or_tty->print_cr(" Shared queue: %s",
|
||||
shared_satb_queue()->is_active() ? "ACTIVE" : "INACTIVE");
|
||||
}
|
||||
|
||||
void SATBMarkQueueSet::verify_active_states(bool expected_active) {
|
||||
// Verify queue set state
|
||||
if (is_active() != expected_active) {
|
||||
dump_active_states(expected_active);
|
||||
guarantee(false, "SATB queue set has an unexpected active state");
|
||||
}
|
||||
|
||||
// Verify thread queue states
|
||||
for (JavaThread* t = Threads::first(); t; t = t->next()) {
|
||||
if (t->satb_mark_queue().is_active() != expected_active) {
|
||||
dump_active_states(expected_active);
|
||||
guarantee(false, "Thread SATB queue has an unexpected active state");
|
||||
}
|
||||
}
|
||||
|
||||
// Verify shared queue state
|
||||
if (shared_satb_queue()->is_active() != expected_active) {
|
||||
dump_active_states(expected_active);
|
||||
guarantee(false, "Shared SATB queue has an unexpected active state");
|
||||
}
|
||||
}
|
||||
#endif // ASSERT
|
||||
|
||||
void SATBMarkQueueSet::set_active_all_threads(bool b,
|
||||
bool expected_active) {
|
||||
void SATBMarkQueueSet::set_active_all_threads(bool active, bool expected_active) {
|
||||
assert(SafepointSynchronize::is_at_safepoint(), "Must be at safepoint.");
|
||||
JavaThread* first = Threads::first();
|
||||
|
||||
#ifdef ASSERT
|
||||
if (_all_active != expected_active) {
|
||||
dump_active_values(first, expected_active);
|
||||
|
||||
// I leave this here as a guarantee, instead of an assert, so
|
||||
// that it will still be compiled in if we choose to uncomment
|
||||
// the #ifdef ASSERT in a product build. The whole block is
|
||||
// within an #ifdef ASSERT so the guarantee will not be compiled
|
||||
// in a product build anyway.
|
||||
guarantee(false,
|
||||
"SATB queue set has an unexpected active value");
|
||||
}
|
||||
verify_active_states(expected_active);
|
||||
#endif // ASSERT
|
||||
_all_active = b;
|
||||
|
||||
for (JavaThread* t = first; t; t = t->next()) {
|
||||
#ifdef ASSERT
|
||||
bool active = t->satb_mark_queue().is_active();
|
||||
if (active != expected_active) {
|
||||
dump_active_values(first, expected_active);
|
||||
|
||||
// I leave this here as a guarantee, instead of an assert, so
|
||||
// that it will still be compiled in if we choose to uncomment
|
||||
// the #ifdef ASSERT in a product build. The whole block is
|
||||
// within an #ifdef ASSERT so the guarantee will not be compiled
|
||||
// in a product build anyway.
|
||||
guarantee(false,
|
||||
"thread has an unexpected active value in its SATB queue");
|
||||
}
|
||||
#endif // ASSERT
|
||||
t->satb_mark_queue().set_active(b);
|
||||
_all_active = active;
|
||||
for (JavaThread* t = Threads::first(); t; t = t->next()) {
|
||||
t->satb_mark_queue().set_active(active);
|
||||
}
|
||||
shared_satb_queue()->set_active(active);
|
||||
}
|
||||
|
||||
void SATBMarkQueueSet::filter_thread_buffers() {
|
||||
|
|
|
@ -87,7 +87,8 @@ class SATBMarkQueueSet: public PtrQueueSet {
|
|||
bool apply_closure_to_completed_buffer_work(bool par, int worker);
|
||||
|
||||
#ifdef ASSERT
|
||||
void dump_active_values(JavaThread* first, bool expected_active);
|
||||
void dump_active_states(bool expected_active);
|
||||
void verify_active_states(bool expected_active);
|
||||
#endif // ASSERT
|
||||
|
||||
public:
|
||||
|
@ -99,11 +100,11 @@ public:
|
|||
|
||||
static void handle_zero_index_for_thread(JavaThread* t);
|
||||
|
||||
// Apply "set_active(b)" to all Java threads' SATB queues. It should be
|
||||
// Apply "set_active(active)" to all SATB queues in the set. It should be
|
||||
// called only with the world stopped. The method will assert that the
|
||||
// SATB queues of all threads it visits, as well as the SATB queue
|
||||
// set itself, has an active value same as expected_active.
|
||||
void set_active_all_threads(bool b, bool expected_active);
|
||||
void set_active_all_threads(bool active, bool expected_active);
|
||||
|
||||
// Filter all the currently-active SATB buffers.
|
||||
void filter_thread_buffers();
|
||||
|
|
|
@ -1455,9 +1455,10 @@ void MetaspaceGC::compute_new_size() {
|
|||
|
||||
// No expansion, now see if we want to shrink
|
||||
// We would never want to shrink more than this
|
||||
assert(capacity_until_GC >= minimum_desired_capacity,
|
||||
err_msg(SIZE_FORMAT " >= " SIZE_FORMAT,
|
||||
capacity_until_GC, minimum_desired_capacity));
|
||||
size_t max_shrink_bytes = capacity_until_GC - minimum_desired_capacity;
|
||||
assert(max_shrink_bytes >= 0, err_msg("max_shrink_bytes " SIZE_FORMAT,
|
||||
max_shrink_bytes));
|
||||
|
||||
// Should shrinking be considered?
|
||||
if (MaxMetaspaceFreeRatio < 100) {
|
||||
|
|
|
@ -100,7 +100,6 @@ ReferenceProcessor::ReferenceProcessor(MemRegion span,
|
|||
_enqueuing_is_done(false),
|
||||
_is_alive_non_header(is_alive_non_header),
|
||||
_discovered_list_needs_barrier(discovered_list_needs_barrier),
|
||||
_bs(NULL),
|
||||
_processing_is_mt(mt_processing),
|
||||
_next_id(0)
|
||||
{
|
||||
|
@ -126,10 +125,6 @@ ReferenceProcessor::ReferenceProcessor(MemRegion span,
|
|||
_discovered_refs[i].set_length(0);
|
||||
}
|
||||
|
||||
// If we do barriers, cache a copy of the barrier set.
|
||||
if (discovered_list_needs_barrier) {
|
||||
_bs = Universe::heap()->barrier_set();
|
||||
}
|
||||
setup_policy(false /* default soft ref policy */);
|
||||
}
|
||||
|
||||
|
@ -317,13 +312,9 @@ bool enqueue_discovered_ref_helper(ReferenceProcessor* ref,
|
|||
// Enqueue references that are not made active again, and
|
||||
// clear the decks for the next collection (cycle).
|
||||
ref->enqueue_discovered_reflists((HeapWord*)pending_list_addr, task_executor);
|
||||
// Do the oop-check on pending_list_addr missed in
|
||||
// enqueue_discovered_reflist. We should probably
|
||||
// do a raw oop_check so that future such idempotent
|
||||
// oop_stores relying on the oop-check side-effect
|
||||
// may be elided automatically and safely without
|
||||
// affecting correctness.
|
||||
oop_store(pending_list_addr, oopDesc::load_decode_heap_oop(pending_list_addr));
|
||||
// Do the post-barrier on pending_list_addr missed in
|
||||
// enqueue_discovered_reflist.
|
||||
oopDesc::bs()->write_ref_field(pending_list_addr, oopDesc::load_decode_heap_oop(pending_list_addr));
|
||||
|
||||
// Stop treating discovered references specially.
|
||||
ref->disable_discovery();
|
||||
|
@ -372,15 +363,17 @@ void ReferenceProcessor::enqueue_discovered_reflist(DiscoveredList& refs_list,
|
|||
assert(java_lang_ref_Reference::next(obj) == NULL,
|
||||
"Reference not active; should not be discovered");
|
||||
// Self-loop next, so as to make Ref not active.
|
||||
java_lang_ref_Reference::set_next(obj, obj);
|
||||
// Post-barrier not needed when looping to self.
|
||||
java_lang_ref_Reference::set_next_raw(obj, obj);
|
||||
if (next_d == obj) { // obj is last
|
||||
// Swap refs_list into pendling_list_addr and
|
||||
// set obj's discovered to what we read from pending_list_addr.
|
||||
oop old = oopDesc::atomic_exchange_oop(refs_list.head(), pending_list_addr);
|
||||
// Need oop_check on pending_list_addr above;
|
||||
// see special oop-check code at the end of
|
||||
// Need post-barrier on pending_list_addr above;
|
||||
// see special post-barrier code at the end of
|
||||
// enqueue_discovered_reflists() further below.
|
||||
java_lang_ref_Reference::set_discovered(obj, old); // old may be NULL
|
||||
java_lang_ref_Reference::set_discovered_raw(obj, old); // old may be NULL
|
||||
oopDesc::bs()->write_ref_field(java_lang_ref_Reference::discovered_addr(obj), old);
|
||||
}
|
||||
}
|
||||
} else { // Old behaviour
|
||||
|
@ -516,13 +509,11 @@ void DiscoveredListIterator::make_active() {
|
|||
// the reference object and will fail
|
||||
// CT verification.
|
||||
if (UseG1GC) {
|
||||
BarrierSet* bs = oopDesc::bs();
|
||||
HeapWord* next_addr = java_lang_ref_Reference::next_addr(_ref);
|
||||
|
||||
if (UseCompressedOops) {
|
||||
bs->write_ref_field_pre((narrowOop*)next_addr, NULL);
|
||||
oopDesc::bs()->write_ref_field_pre((narrowOop*)next_addr, NULL);
|
||||
} else {
|
||||
bs->write_ref_field_pre((oop*)next_addr, NULL);
|
||||
oopDesc::bs()->write_ref_field_pre((oop*)next_addr, NULL);
|
||||
}
|
||||
java_lang_ref_Reference::set_next_raw(_ref, NULL);
|
||||
} else {
|
||||
|
@ -790,10 +781,9 @@ private:
|
|||
};
|
||||
|
||||
void ReferenceProcessor::set_discovered(oop ref, oop value) {
|
||||
java_lang_ref_Reference::set_discovered_raw(ref, value);
|
||||
if (_discovered_list_needs_barrier) {
|
||||
java_lang_ref_Reference::set_discovered(ref, value);
|
||||
} else {
|
||||
java_lang_ref_Reference::set_discovered_raw(ref, value);
|
||||
oopDesc::bs()->write_ref_field(ref, value);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1085,7 +1075,7 @@ ReferenceProcessor::add_to_discovered_list_mt(DiscoveredList& refs_list,
|
|||
// so this will expand to nothing. As a result, we have manually
|
||||
// elided this out for G1, but left in the test for some future
|
||||
// collector that might have need for a pre-barrier here, e.g.:-
|
||||
// _bs->write_ref_field_pre((oop* or narrowOop*)discovered_addr, next_discovered);
|
||||
// oopDesc::bs()->write_ref_field_pre((oop* or narrowOop*)discovered_addr, next_discovered);
|
||||
assert(!_discovered_list_needs_barrier || UseG1GC,
|
||||
"Need to check non-G1 collector: "
|
||||
"may need a pre-write-barrier for CAS from NULL below");
|
||||
|
@ -1098,7 +1088,7 @@ ReferenceProcessor::add_to_discovered_list_mt(DiscoveredList& refs_list,
|
|||
refs_list.set_head(obj);
|
||||
refs_list.inc_length(1);
|
||||
if (_discovered_list_needs_barrier) {
|
||||
_bs->write_ref_field((void*)discovered_addr, next_discovered);
|
||||
oopDesc::bs()->write_ref_field((void*)discovered_addr, next_discovered);
|
||||
}
|
||||
|
||||
if (TraceReferenceGC) {
|
||||
|
@ -1260,13 +1250,13 @@ bool ReferenceProcessor::discover_reference(oop obj, ReferenceType rt) {
|
|||
|
||||
// As in the case further above, since we are over-writing a NULL
|
||||
// pre-value, we can safely elide the pre-barrier here for the case of G1.
|
||||
// e.g.:- _bs->write_ref_field_pre((oop* or narrowOop*)discovered_addr, next_discovered);
|
||||
// e.g.:- oopDesc::bs()->write_ref_field_pre((oop* or narrowOop*)discovered_addr, next_discovered);
|
||||
assert(discovered == NULL, "control point invariant");
|
||||
assert(!_discovered_list_needs_barrier || UseG1GC,
|
||||
"For non-G1 collector, may need a pre-write-barrier for CAS from NULL below");
|
||||
oop_store_raw(discovered_addr, next_discovered);
|
||||
if (_discovered_list_needs_barrier) {
|
||||
_bs->write_ref_field((void*)discovered_addr, next_discovered);
|
||||
oopDesc::bs()->write_ref_field((void*)discovered_addr, next_discovered);
|
||||
}
|
||||
list->set_head(obj);
|
||||
list->inc_length(1);
|
||||
|
|
|
@ -235,7 +235,6 @@ class ReferenceProcessor : public CHeapObj<mtGC> {
|
|||
// discovery.)
|
||||
bool _discovered_list_needs_barrier;
|
||||
|
||||
BarrierSet* _bs; // Cached copy of BarrierSet.
|
||||
bool _enqueuing_is_done; // true if all weak references enqueued
|
||||
bool _processing_is_mt; // true during phases when
|
||||
// reference processing is MT.
|
||||
|
@ -420,25 +419,6 @@ class ReferenceProcessor : public CHeapObj<mtGC> {
|
|||
void update_soft_ref_master_clock();
|
||||
|
||||
public:
|
||||
// constructor
|
||||
ReferenceProcessor():
|
||||
_span((HeapWord*)NULL, (HeapWord*)NULL),
|
||||
_discovered_refs(NULL),
|
||||
_discoveredSoftRefs(NULL), _discoveredWeakRefs(NULL),
|
||||
_discoveredFinalRefs(NULL), _discoveredPhantomRefs(NULL),
|
||||
_discovering_refs(false),
|
||||
_discovery_is_atomic(true),
|
||||
_enqueuing_is_done(false),
|
||||
_discovery_is_mt(false),
|
||||
_discovered_list_needs_barrier(false),
|
||||
_bs(NULL),
|
||||
_is_alive_non_header(NULL),
|
||||
_num_q(0),
|
||||
_max_num_q(0),
|
||||
_processing_is_mt(false),
|
||||
_next_id(0)
|
||||
{ }
|
||||
|
||||
// Default parameters give you a vanilla reference processor.
|
||||
ReferenceProcessor(MemRegion span,
|
||||
bool mt_processing = false, uint mt_processing_degree = 1,
|
||||
|
|
|
@ -1018,7 +1018,7 @@ const Type *PhiNode::Value( PhaseTransform *phase ) const {
|
|||
!jtkp->klass_is_exact() && // Keep exact interface klass (6894807)
|
||||
ttkp->is_loaded() && !ttkp->klass()->is_interface() ) {
|
||||
assert(ft == ttkp->cast_to_ptr_type(jtkp->ptr()) ||
|
||||
ft->isa_narrowoop() && ft->make_ptr() == ttkp->cast_to_ptr_type(jtkp->ptr()), "");
|
||||
ft->isa_narrowklass() && ft->make_ptr() == ttkp->cast_to_ptr_type(jtkp->ptr()), "");
|
||||
jt = ft;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -304,6 +304,7 @@ class LibraryCallKit : public GraphKit {
|
|||
bool inline_cipherBlockChaining_AESCrypt(vmIntrinsics::ID id);
|
||||
Node* inline_cipherBlockChaining_AESCrypt_predicate(bool decrypting);
|
||||
Node* get_key_start_from_aescrypt_object(Node* aescrypt_object);
|
||||
Node* get_original_key_start_from_aescrypt_object(Node* aescrypt_object);
|
||||
bool inline_encodeISOArray();
|
||||
bool inline_updateCRC32();
|
||||
bool inline_updateBytesCRC32();
|
||||
|
@ -5936,10 +5937,22 @@ bool LibraryCallKit::inline_aescrypt_Block(vmIntrinsics::ID id) {
|
|||
Node* k_start = get_key_start_from_aescrypt_object(aescrypt_object);
|
||||
if (k_start == NULL) return false;
|
||||
|
||||
// Call the stub.
|
||||
make_runtime_call(RC_LEAF|RC_NO_FP, OptoRuntime::aescrypt_block_Type(),
|
||||
stubAddr, stubName, TypePtr::BOTTOM,
|
||||
src_start, dest_start, k_start);
|
||||
if (Matcher::pass_original_key_for_aes()) {
|
||||
// on SPARC we need to pass the original key since key expansion needs to happen in intrinsics due to
|
||||
// compatibility issues between Java key expansion and SPARC crypto instructions
|
||||
Node* original_k_start = get_original_key_start_from_aescrypt_object(aescrypt_object);
|
||||
if (original_k_start == NULL) return false;
|
||||
|
||||
// Call the stub.
|
||||
make_runtime_call(RC_LEAF|RC_NO_FP, OptoRuntime::aescrypt_block_Type(),
|
||||
stubAddr, stubName, TypePtr::BOTTOM,
|
||||
src_start, dest_start, k_start, original_k_start);
|
||||
} else {
|
||||
// Call the stub.
|
||||
make_runtime_call(RC_LEAF|RC_NO_FP, OptoRuntime::aescrypt_block_Type(),
|
||||
stubAddr, stubName, TypePtr::BOTTOM,
|
||||
src_start, dest_start, k_start);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
@ -6017,14 +6030,29 @@ bool LibraryCallKit::inline_cipherBlockChaining_AESCrypt(vmIntrinsics::ID id) {
|
|||
if (objRvec == NULL) return false;
|
||||
Node* r_start = array_element_address(objRvec, intcon(0), T_BYTE);
|
||||
|
||||
// Call the stub, passing src_start, dest_start, k_start, r_start and src_len
|
||||
make_runtime_call(RC_LEAF|RC_NO_FP,
|
||||
OptoRuntime::cipherBlockChaining_aescrypt_Type(),
|
||||
stubAddr, stubName, TypePtr::BOTTOM,
|
||||
src_start, dest_start, k_start, r_start, len);
|
||||
Node* cbcCrypt;
|
||||
if (Matcher::pass_original_key_for_aes()) {
|
||||
// on SPARC we need to pass the original key since key expansion needs to happen in intrinsics due to
|
||||
// compatibility issues between Java key expansion and SPARC crypto instructions
|
||||
Node* original_k_start = get_original_key_start_from_aescrypt_object(aescrypt_object);
|
||||
if (original_k_start == NULL) return false;
|
||||
|
||||
// return is void so no result needs to be pushed
|
||||
// Call the stub, passing src_start, dest_start, k_start, r_start, src_len and original_k_start
|
||||
cbcCrypt = make_runtime_call(RC_LEAF|RC_NO_FP,
|
||||
OptoRuntime::cipherBlockChaining_aescrypt_Type(),
|
||||
stubAddr, stubName, TypePtr::BOTTOM,
|
||||
src_start, dest_start, k_start, r_start, len, original_k_start);
|
||||
} else {
|
||||
// Call the stub, passing src_start, dest_start, k_start, r_start and src_len
|
||||
cbcCrypt = make_runtime_call(RC_LEAF|RC_NO_FP,
|
||||
OptoRuntime::cipherBlockChaining_aescrypt_Type(),
|
||||
stubAddr, stubName, TypePtr::BOTTOM,
|
||||
src_start, dest_start, k_start, r_start, len);
|
||||
}
|
||||
|
||||
// return cipher length (int)
|
||||
Node* retvalue = _gvn.transform(new (C) ProjNode(cbcCrypt, TypeFunc::Parms));
|
||||
set_result(retvalue);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -6039,6 +6067,17 @@ Node * LibraryCallKit::get_key_start_from_aescrypt_object(Node *aescrypt_object)
|
|||
return k_start;
|
||||
}
|
||||
|
||||
//------------------------------get_original_key_start_from_aescrypt_object-----------------------
|
||||
Node * LibraryCallKit::get_original_key_start_from_aescrypt_object(Node *aescrypt_object) {
|
||||
Node* objAESCryptKey = load_field_from_object(aescrypt_object, "lastKey", "[B", /*is_exact*/ false);
|
||||
assert (objAESCryptKey != NULL, "wrong version of com.sun.crypto.provider.AESCrypt");
|
||||
if (objAESCryptKey == NULL) return (Node *) NULL;
|
||||
|
||||
// now have the array, need to get the start address of the lastKey array
|
||||
Node* original_k_start = array_element_address(objAESCryptKey, intcon(0), T_BYTE);
|
||||
return original_k_start;
|
||||
}
|
||||
|
||||
//----------------------------inline_cipherBlockChaining_AESCrypt_predicate----------------------------
|
||||
// Return node representing slow path of predicate check.
|
||||
// the pseudo code we want to emulate with this predicate is:
|
||||
|
|
|
@ -286,6 +286,9 @@ public:
|
|||
// CPU supports misaligned vectors store/load.
|
||||
static const bool misaligned_vectors_ok();
|
||||
|
||||
// Should original key array reference be passed to AES stubs
|
||||
static const bool pass_original_key_for_aes();
|
||||
|
||||
// Used to determine a "low complexity" 64-bit constant. (Zero is simple.)
|
||||
// The standard of comparison is one (StoreL ConL) vs. two (StoreI ConI).
|
||||
// Depends on the details of 64-bit constant generation on the CPU.
|
||||
|
|
|
@ -814,12 +814,18 @@ const TypeFunc* OptoRuntime::array_fill_Type() {
|
|||
const TypeFunc* OptoRuntime::aescrypt_block_Type() {
|
||||
// create input type (domain)
|
||||
int num_args = 3;
|
||||
if (Matcher::pass_original_key_for_aes()) {
|
||||
num_args = 4;
|
||||
}
|
||||
int argcnt = num_args;
|
||||
const Type** fields = TypeTuple::fields(argcnt);
|
||||
int argp = TypeFunc::Parms;
|
||||
fields[argp++] = TypePtr::NOTNULL; // src
|
||||
fields[argp++] = TypePtr::NOTNULL; // dest
|
||||
fields[argp++] = TypePtr::NOTNULL; // k array
|
||||
if (Matcher::pass_original_key_for_aes()) {
|
||||
fields[argp++] = TypePtr::NOTNULL; // original k array
|
||||
}
|
||||
assert(argp == TypeFunc::Parms+argcnt, "correct decoding");
|
||||
const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields);
|
||||
|
||||
|
@ -856,6 +862,9 @@ const TypeFunc* OptoRuntime::updateBytesCRC32_Type() {
|
|||
const TypeFunc* OptoRuntime::cipherBlockChaining_aescrypt_Type() {
|
||||
// create input type (domain)
|
||||
int num_args = 5;
|
||||
if (Matcher::pass_original_key_for_aes()) {
|
||||
num_args = 6;
|
||||
}
|
||||
int argcnt = num_args;
|
||||
const Type** fields = TypeTuple::fields(argcnt);
|
||||
int argp = TypeFunc::Parms;
|
||||
|
@ -864,13 +873,16 @@ const TypeFunc* OptoRuntime::cipherBlockChaining_aescrypt_Type() {
|
|||
fields[argp++] = TypePtr::NOTNULL; // k array
|
||||
fields[argp++] = TypePtr::NOTNULL; // r array
|
||||
fields[argp++] = TypeInt::INT; // src len
|
||||
if (Matcher::pass_original_key_for_aes()) {
|
||||
fields[argp++] = TypePtr::NOTNULL; // original k array
|
||||
}
|
||||
assert(argp == TypeFunc::Parms+argcnt, "correct decoding");
|
||||
const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields);
|
||||
|
||||
// no result type needed
|
||||
// returning cipher len (int)
|
||||
fields = TypeTuple::fields(1);
|
||||
fields[TypeFunc::Parms+0] = NULL; // void
|
||||
const TypeTuple* range = TypeTuple::make(TypeFunc::Parms, fields);
|
||||
fields[TypeFunc::Parms+0] = TypeInt::INT;
|
||||
const TypeTuple* range = TypeTuple::make(TypeFunc::Parms+1, fields);
|
||||
return TypeFunc::make(domain, range);
|
||||
}
|
||||
|
||||
|
|
|
@ -147,6 +147,9 @@ void VM_RedefineClasses::doit() {
|
|||
_scratch_classes[i] = NULL;
|
||||
}
|
||||
|
||||
// Disable any dependent concurrent compilations
|
||||
SystemDictionary::notice_modification();
|
||||
|
||||
// Set flag indicating that some invariants are no longer true.
|
||||
// See jvmtiExport.hpp for detailed explanation.
|
||||
JvmtiExport::set_has_redefined_a_class();
|
||||
|
|
|
@ -3727,10 +3727,6 @@ jint Arguments::apply_ergo() {
|
|||
// Doing the replace in parent maps helps speculation
|
||||
FLAG_SET_DEFAULT(ReplaceInParentMaps, true);
|
||||
}
|
||||
#ifndef X86
|
||||
// Only on x86 for now
|
||||
FLAG_SET_DEFAULT(TypeProfileLevel, 0);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
if (PrintAssembly && FLAG_IS_DEFAULT(DebugNonSafepoints)) {
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012, 2014 Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
|
@ -39,20 +39,32 @@ public class TestAESMain {
|
|||
System.out.println(iters + " iterations");
|
||||
TestAESEncode etest = new TestAESEncode();
|
||||
etest.prepare();
|
||||
// warm-up for 20K iterations
|
||||
System.out.println("Starting encryption warm-up");
|
||||
for (int i=0; i<20000; i++) {
|
||||
etest.run();
|
||||
}
|
||||
System.out.println("Finished encryption warm-up");
|
||||
long start = System.nanoTime();
|
||||
for (int i=0; i<iters; i++) {
|
||||
etest.run();
|
||||
}
|
||||
long end = System.nanoTime();
|
||||
System.out.println("TestAESEncode runtime was " + (double)((end - start)/1000000000.0) + " ms");
|
||||
System.out.println("TestAESEncode runtime was " + (double)((end - start)/1000000.0) + " ms");
|
||||
|
||||
TestAESDecode dtest = new TestAESDecode();
|
||||
dtest.prepare();
|
||||
// warm-up for 20K iterations
|
||||
System.out.println("Starting decryption warm-up");
|
||||
for (int i=0; i<20000; i++) {
|
||||
dtest.run();
|
||||
}
|
||||
System.out.println("Finished decryption warm-up");
|
||||
start = System.nanoTime();
|
||||
for (int i=0; i<iters; i++) {
|
||||
dtest.run();
|
||||
}
|
||||
end = System.nanoTime();
|
||||
System.out.println("TestAESDecode runtime was " + (double)((end - start)/1000000000.0) + " ms");
|
||||
System.out.println("TestAESDecode runtime was " + (double)((end - start)/1000000.0) + " ms");
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue