mirror of
https://github.com/openjdk/jdk.git
synced 2025-09-23 04:24:49 +02:00
3989 lines
131 KiB
C++
3989 lines
131 KiB
C++
/*
|
|
* Copyright (c) 2003, 2022, Oracle and/or its affiliates. All rights reserved.
|
|
* Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
|
|
* Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
|
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
*
|
|
* This code is free software; you can redistribute it and/or modify it
|
|
* under the terms of the GNU General Public License version 2 only, as
|
|
* published by the Free Software Foundation.
|
|
*
|
|
* This code is distributed in the hope that it will be useful, but WITHOUT
|
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
* version 2 for more details (a copy is included in the LICENSE file that
|
|
* accompanied this code).
|
|
*
|
|
* You should have received a copy of the GNU General Public License version
|
|
* 2 along with this work; if not, write to the Free Software Foundation,
|
|
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
*
|
|
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
* or visit www.oracle.com if you need additional information or have any
|
|
* questions.
|
|
*
|
|
*/
|
|
|
|
#include "precompiled.hpp"
|
|
#include "asm/macroAssembler.hpp"
|
|
#include "asm/macroAssembler.inline.hpp"
|
|
#include "compiler/oopMap.hpp"
|
|
#include "gc/shared/barrierSet.hpp"
|
|
#include "gc/shared/barrierSetAssembler.hpp"
|
|
#include "interpreter/interpreter.hpp"
|
|
#include "memory/universe.hpp"
|
|
#include "nativeInst_riscv.hpp"
|
|
#include "oops/instanceOop.hpp"
|
|
#include "oops/method.hpp"
|
|
#include "oops/objArrayKlass.hpp"
|
|
#include "oops/oop.inline.hpp"
|
|
#include "prims/methodHandles.hpp"
|
|
#include "runtime/frame.inline.hpp"
|
|
#include "runtime/handles.inline.hpp"
|
|
#include "runtime/javaThread.hpp"
|
|
#include "runtime/sharedRuntime.hpp"
|
|
#include "runtime/stubCodeGenerator.hpp"
|
|
#include "runtime/stubRoutines.hpp"
|
|
#include "utilities/align.hpp"
|
|
#include "utilities/powerOfTwo.hpp"
|
|
#ifdef COMPILER2
|
|
#include "opto/runtime.hpp"
|
|
#endif
|
|
#if INCLUDE_ZGC
|
|
#include "gc/z/zThreadLocalData.hpp"
|
|
#endif
|
|
|
|
// Declaration and definition of StubGenerator (no .hpp file).
|
|
// For a more detailed description of the stub routine structure
|
|
// see the comment in stubRoutines.hpp
|
|
|
|
#undef __
|
|
#define __ _masm->
|
|
|
|
#ifdef PRODUCT
|
|
#define BLOCK_COMMENT(str) /* nothing */
|
|
#else
|
|
#define BLOCK_COMMENT(str) __ block_comment(str)
|
|
#endif
|
|
|
|
#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
|
|
|
|
// Stub Code definitions
|
|
|
|
class StubGenerator: public StubCodeGenerator {
|
|
private:
|
|
|
|
#ifdef PRODUCT
|
|
#define inc_counter_np(counter) ((void)0)
|
|
#else
|
|
void inc_counter_np_(int& counter) {
|
|
__ la(t1, ExternalAddress((address)&counter));
|
|
__ lwu(t0, Address(t1, 0));
|
|
__ addiw(t0, t0, 1);
|
|
__ sw(t0, Address(t1, 0));
|
|
}
|
|
#define inc_counter_np(counter) \
|
|
BLOCK_COMMENT("inc_counter " #counter); \
|
|
inc_counter_np_(counter);
|
|
#endif
|
|
|
|
// Call stubs are used to call Java from C
|
|
//
|
|
// Arguments:
|
|
// c_rarg0: call wrapper address address
|
|
// c_rarg1: result address
|
|
// c_rarg2: result type BasicType
|
|
// c_rarg3: method Method*
|
|
// c_rarg4: (interpreter) entry point address
|
|
// c_rarg5: parameters intptr_t*
|
|
// c_rarg6: parameter size (in words) int
|
|
// c_rarg7: thread Thread*
|
|
//
|
|
// There is no return from the stub itself as any Java result
|
|
// is written to result
|
|
//
|
|
// we save x1 (ra) as the return PC at the base of the frame and
|
|
// link x8 (fp) below it as the frame pointer installing sp (x2)
|
|
// into fp.
|
|
//
|
|
// we save x10-x17, which accounts for all the c arguments.
|
|
//
|
|
// TODO: strictly do we need to save them all? they are treated as
|
|
// volatile by C so could we omit saving the ones we are going to
|
|
// place in global registers (thread? method?) or those we only use
|
|
// during setup of the Java call?
|
|
//
|
|
// we don't need to save x5 which C uses as an indirect result location
|
|
// return register.
|
|
//
|
|
// we don't need to save x6-x7 and x28-x31 which both C and Java treat as
|
|
// volatile
|
|
//
|
|
// we save x9, x18-x27, f8-f9, and f18-f27 which Java uses as temporary
|
|
// registers and C expects to be callee-save
|
|
//
|
|
// so the stub frame looks like this when we enter Java code
|
|
//
|
|
// [ return_from_Java ] <--- sp
|
|
// [ argument word n ]
|
|
// ...
|
|
// -34 [ argument word 1 ]
|
|
// -33 [ saved f27 ] <--- sp_after_call
|
|
// -32 [ saved f26 ]
|
|
// -31 [ saved f25 ]
|
|
// -30 [ saved f24 ]
|
|
// -29 [ saved f23 ]
|
|
// -28 [ saved f22 ]
|
|
// -27 [ saved f21 ]
|
|
// -26 [ saved f20 ]
|
|
// -25 [ saved f19 ]
|
|
// -24 [ saved f18 ]
|
|
// -23 [ saved f9 ]
|
|
// -22 [ saved f8 ]
|
|
// -21 [ saved x27 ]
|
|
// -20 [ saved x26 ]
|
|
// -19 [ saved x25 ]
|
|
// -18 [ saved x24 ]
|
|
// -17 [ saved x23 ]
|
|
// -16 [ saved x22 ]
|
|
// -15 [ saved x21 ]
|
|
// -14 [ saved x20 ]
|
|
// -13 [ saved x19 ]
|
|
// -12 [ saved x18 ]
|
|
// -11 [ saved x9 ]
|
|
// -10 [ call wrapper (x10) ]
|
|
// -9 [ result (x11) ]
|
|
// -8 [ result type (x12) ]
|
|
// -7 [ method (x13) ]
|
|
// -6 [ entry point (x14) ]
|
|
// -5 [ parameters (x15) ]
|
|
// -4 [ parameter size (x16) ]
|
|
// -3 [ thread (x17) ]
|
|
// -2 [ saved fp (x8) ]
|
|
// -1 [ saved ra (x1) ]
|
|
// 0 [ ] <--- fp == saved sp (x2)
|
|
|
|
// Call stub stack layout word offsets from fp
|
|
enum call_stub_layout {
|
|
sp_after_call_off = -33,
|
|
|
|
f27_off = -33,
|
|
f26_off = -32,
|
|
f25_off = -31,
|
|
f24_off = -30,
|
|
f23_off = -29,
|
|
f22_off = -28,
|
|
f21_off = -27,
|
|
f20_off = -26,
|
|
f19_off = -25,
|
|
f18_off = -24,
|
|
f9_off = -23,
|
|
f8_off = -22,
|
|
|
|
x27_off = -21,
|
|
x26_off = -20,
|
|
x25_off = -19,
|
|
x24_off = -18,
|
|
x23_off = -17,
|
|
x22_off = -16,
|
|
x21_off = -15,
|
|
x20_off = -14,
|
|
x19_off = -13,
|
|
x18_off = -12,
|
|
x9_off = -11,
|
|
|
|
call_wrapper_off = -10,
|
|
result_off = -9,
|
|
result_type_off = -8,
|
|
method_off = -7,
|
|
entry_point_off = -6,
|
|
parameters_off = -5,
|
|
parameter_size_off = -4,
|
|
thread_off = -3,
|
|
fp_f = -2,
|
|
retaddr_off = -1,
|
|
};
|
|
|
|
address generate_call_stub(address& return_address) {
|
|
assert((int)frame::entry_frame_after_call_words == -(int)sp_after_call_off + 1 &&
|
|
(int)frame::entry_frame_call_wrapper_offset == (int)call_wrapper_off,
|
|
"adjust this code");
|
|
|
|
StubCodeMark mark(this, "StubRoutines", "call_stub");
|
|
address start = __ pc();
|
|
|
|
const Address sp_after_call (fp, sp_after_call_off * wordSize);
|
|
|
|
const Address call_wrapper (fp, call_wrapper_off * wordSize);
|
|
const Address result (fp, result_off * wordSize);
|
|
const Address result_type (fp, result_type_off * wordSize);
|
|
const Address method (fp, method_off * wordSize);
|
|
const Address entry_point (fp, entry_point_off * wordSize);
|
|
const Address parameters (fp, parameters_off * wordSize);
|
|
const Address parameter_size(fp, parameter_size_off * wordSize);
|
|
|
|
const Address thread (fp, thread_off * wordSize);
|
|
|
|
const Address f27_save (fp, f27_off * wordSize);
|
|
const Address f26_save (fp, f26_off * wordSize);
|
|
const Address f25_save (fp, f25_off * wordSize);
|
|
const Address f24_save (fp, f24_off * wordSize);
|
|
const Address f23_save (fp, f23_off * wordSize);
|
|
const Address f22_save (fp, f22_off * wordSize);
|
|
const Address f21_save (fp, f21_off * wordSize);
|
|
const Address f20_save (fp, f20_off * wordSize);
|
|
const Address f19_save (fp, f19_off * wordSize);
|
|
const Address f18_save (fp, f18_off * wordSize);
|
|
const Address f9_save (fp, f9_off * wordSize);
|
|
const Address f8_save (fp, f8_off * wordSize);
|
|
|
|
const Address x27_save (fp, x27_off * wordSize);
|
|
const Address x26_save (fp, x26_off * wordSize);
|
|
const Address x25_save (fp, x25_off * wordSize);
|
|
const Address x24_save (fp, x24_off * wordSize);
|
|
const Address x23_save (fp, x23_off * wordSize);
|
|
const Address x22_save (fp, x22_off * wordSize);
|
|
const Address x21_save (fp, x21_off * wordSize);
|
|
const Address x20_save (fp, x20_off * wordSize);
|
|
const Address x19_save (fp, x19_off * wordSize);
|
|
const Address x18_save (fp, x18_off * wordSize);
|
|
|
|
const Address x9_save (fp, x9_off * wordSize);
|
|
|
|
// stub code
|
|
|
|
address riscv_entry = __ pc();
|
|
|
|
// set up frame and move sp to end of save area
|
|
__ enter();
|
|
__ addi(sp, fp, sp_after_call_off * wordSize);
|
|
|
|
// save register parameters and Java temporary/global registers
|
|
// n.b. we save thread even though it gets installed in
|
|
// xthread because we want to sanity check tp later
|
|
__ sd(c_rarg7, thread);
|
|
__ sw(c_rarg6, parameter_size);
|
|
__ sd(c_rarg5, parameters);
|
|
__ sd(c_rarg4, entry_point);
|
|
__ sd(c_rarg3, method);
|
|
__ sd(c_rarg2, result_type);
|
|
__ sd(c_rarg1, result);
|
|
__ sd(c_rarg0, call_wrapper);
|
|
|
|
__ sd(x9, x9_save);
|
|
|
|
__ sd(x18, x18_save);
|
|
__ sd(x19, x19_save);
|
|
__ sd(x20, x20_save);
|
|
__ sd(x21, x21_save);
|
|
__ sd(x22, x22_save);
|
|
__ sd(x23, x23_save);
|
|
__ sd(x24, x24_save);
|
|
__ sd(x25, x25_save);
|
|
__ sd(x26, x26_save);
|
|
__ sd(x27, x27_save);
|
|
|
|
__ fsd(f8, f8_save);
|
|
__ fsd(f9, f9_save);
|
|
__ fsd(f18, f18_save);
|
|
__ fsd(f19, f19_save);
|
|
__ fsd(f20, f20_save);
|
|
__ fsd(f21, f21_save);
|
|
__ fsd(f22, f22_save);
|
|
__ fsd(f23, f23_save);
|
|
__ fsd(f24, f24_save);
|
|
__ fsd(f25, f25_save);
|
|
__ fsd(f26, f26_save);
|
|
__ fsd(f27, f27_save);
|
|
|
|
// install Java thread in global register now we have saved
|
|
// whatever value it held
|
|
__ mv(xthread, c_rarg7);
|
|
|
|
// And method
|
|
__ mv(xmethod, c_rarg3);
|
|
|
|
// set up the heapbase register
|
|
__ reinit_heapbase();
|
|
|
|
#ifdef ASSERT
|
|
// make sure we have no pending exceptions
|
|
{
|
|
Label L;
|
|
__ ld(t0, Address(xthread, in_bytes(Thread::pending_exception_offset())));
|
|
__ beqz(t0, L);
|
|
__ stop("StubRoutines::call_stub: entered with pending exception");
|
|
__ BIND(L);
|
|
}
|
|
#endif
|
|
// pass parameters if any
|
|
__ mv(esp, sp);
|
|
__ slli(t0, c_rarg6, LogBytesPerWord);
|
|
__ sub(t0, sp, t0); // Move SP out of the way
|
|
__ andi(sp, t0, -2 * wordSize);
|
|
|
|
BLOCK_COMMENT("pass parameters if any");
|
|
Label parameters_done;
|
|
// parameter count is still in c_rarg6
|
|
// and parameter pointer identifying param 1 is in c_rarg5
|
|
__ beqz(c_rarg6, parameters_done);
|
|
|
|
address loop = __ pc();
|
|
__ ld(t0, c_rarg5, 0);
|
|
__ addi(c_rarg5, c_rarg5, wordSize);
|
|
__ addi(c_rarg6, c_rarg6, -1);
|
|
__ push_reg(t0);
|
|
__ bgtz(c_rarg6, loop);
|
|
|
|
__ BIND(parameters_done);
|
|
|
|
// call Java entry -- passing methdoOop, and current sp
|
|
// xmethod: Method*
|
|
// x19_sender_sp: sender sp
|
|
BLOCK_COMMENT("call Java function");
|
|
__ mv(x19_sender_sp, sp);
|
|
__ jalr(c_rarg4);
|
|
|
|
// save current address for use by exception handling code
|
|
|
|
return_address = __ pc();
|
|
|
|
// store result depending on type (everything that is not
|
|
// T_OBJECT, T_LONG, T_FLOAT or T_DOUBLE is treated as T_INT)
|
|
// n.b. this assumes Java returns an integral result in x10
|
|
// and a floating result in j_farg0
|
|
__ ld(j_rarg2, result);
|
|
Label is_long, is_float, is_double, exit;
|
|
__ ld(j_rarg1, result_type);
|
|
__ mv(t0, (u1)T_OBJECT);
|
|
__ beq(j_rarg1, t0, is_long);
|
|
__ mv(t0, (u1)T_LONG);
|
|
__ beq(j_rarg1, t0, is_long);
|
|
__ mv(t0, (u1)T_FLOAT);
|
|
__ beq(j_rarg1, t0, is_float);
|
|
__ mv(t0, (u1)T_DOUBLE);
|
|
__ beq(j_rarg1, t0, is_double);
|
|
|
|
// handle T_INT case
|
|
__ sw(x10, Address(j_rarg2));
|
|
|
|
__ BIND(exit);
|
|
|
|
// pop parameters
|
|
__ addi(esp, fp, sp_after_call_off * wordSize);
|
|
|
|
#ifdef ASSERT
|
|
// verify that threads correspond
|
|
{
|
|
Label L, S;
|
|
__ ld(t0, thread);
|
|
__ bne(xthread, t0, S);
|
|
__ get_thread(t0);
|
|
__ beq(xthread, t0, L);
|
|
__ BIND(S);
|
|
__ stop("StubRoutines::call_stub: threads must correspond");
|
|
__ BIND(L);
|
|
}
|
|
#endif
|
|
|
|
// restore callee-save registers
|
|
__ fld(f27, f27_save);
|
|
__ fld(f26, f26_save);
|
|
__ fld(f25, f25_save);
|
|
__ fld(f24, f24_save);
|
|
__ fld(f23, f23_save);
|
|
__ fld(f22, f22_save);
|
|
__ fld(f21, f21_save);
|
|
__ fld(f20, f20_save);
|
|
__ fld(f19, f19_save);
|
|
__ fld(f18, f18_save);
|
|
__ fld(f9, f9_save);
|
|
__ fld(f8, f8_save);
|
|
|
|
__ ld(x27, x27_save);
|
|
__ ld(x26, x26_save);
|
|
__ ld(x25, x25_save);
|
|
__ ld(x24, x24_save);
|
|
__ ld(x23, x23_save);
|
|
__ ld(x22, x22_save);
|
|
__ ld(x21, x21_save);
|
|
__ ld(x20, x20_save);
|
|
__ ld(x19, x19_save);
|
|
__ ld(x18, x18_save);
|
|
|
|
__ ld(x9, x9_save);
|
|
|
|
__ ld(c_rarg0, call_wrapper);
|
|
__ ld(c_rarg1, result);
|
|
__ ld(c_rarg2, result_type);
|
|
__ ld(c_rarg3, method);
|
|
__ ld(c_rarg4, entry_point);
|
|
__ ld(c_rarg5, parameters);
|
|
__ ld(c_rarg6, parameter_size);
|
|
__ ld(c_rarg7, thread);
|
|
|
|
// leave frame and return to caller
|
|
__ leave();
|
|
__ ret();
|
|
|
|
// handle return types different from T_INT
|
|
|
|
__ BIND(is_long);
|
|
__ sd(x10, Address(j_rarg2, 0));
|
|
__ j(exit);
|
|
|
|
__ BIND(is_float);
|
|
__ fsw(j_farg0, Address(j_rarg2, 0), t0);
|
|
__ j(exit);
|
|
|
|
__ BIND(is_double);
|
|
__ fsd(j_farg0, Address(j_rarg2, 0), t0);
|
|
__ j(exit);
|
|
|
|
return start;
|
|
}
|
|
|
|
// Return point for a Java call if there's an exception thrown in
|
|
// Java code. The exception is caught and transformed into a
|
|
// pending exception stored in JavaThread that can be tested from
|
|
// within the VM.
|
|
//
|
|
// Note: Usually the parameters are removed by the callee. In case
|
|
// of an exception crossing an activation frame boundary, that is
|
|
// not the case if the callee is compiled code => need to setup the
|
|
// sp.
|
|
//
|
|
// x10: exception oop
|
|
|
|
address generate_catch_exception() {
|
|
StubCodeMark mark(this, "StubRoutines", "catch_exception");
|
|
address start = __ pc();
|
|
|
|
// same as in generate_call_stub():
|
|
const Address thread(fp, thread_off * wordSize);
|
|
|
|
#ifdef ASSERT
|
|
// verify that threads correspond
|
|
{
|
|
Label L, S;
|
|
__ ld(t0, thread);
|
|
__ bne(xthread, t0, S);
|
|
__ get_thread(t0);
|
|
__ beq(xthread, t0, L);
|
|
__ bind(S);
|
|
__ stop("StubRoutines::catch_exception: threads must correspond");
|
|
__ bind(L);
|
|
}
|
|
#endif
|
|
|
|
// set pending exception
|
|
__ verify_oop(x10);
|
|
|
|
__ sd(x10, Address(xthread, Thread::pending_exception_offset()));
|
|
__ mv(t0, (address)__FILE__);
|
|
__ sd(t0, Address(xthread, Thread::exception_file_offset()));
|
|
__ mv(t0, (int)__LINE__);
|
|
__ sw(t0, Address(xthread, Thread::exception_line_offset()));
|
|
|
|
// complete return to VM
|
|
assert(StubRoutines::_call_stub_return_address != NULL,
|
|
"_call_stub_return_address must have been generated before");
|
|
__ j(StubRoutines::_call_stub_return_address);
|
|
|
|
return start;
|
|
}
|
|
|
|
// Continuation point for runtime calls returning with a pending
|
|
// exception. The pending exception check happened in the runtime
|
|
// or native call stub. The pending exception in Thread is
|
|
// converted into a Java-level exception.
|
|
//
|
|
// Contract with Java-level exception handlers:
|
|
// x10: exception
|
|
// x13: throwing pc
|
|
//
|
|
// NOTE: At entry of this stub, exception-pc must be in RA !!
|
|
|
|
// NOTE: this is always used as a jump target within generated code
|
|
// so it just needs to be generated code with no x86 prolog
|
|
|
|
address generate_forward_exception() {
|
|
StubCodeMark mark(this, "StubRoutines", "forward exception");
|
|
address start = __ pc();
|
|
|
|
// Upon entry, RA points to the return address returning into
|
|
// Java (interpreted or compiled) code; i.e., the return address
|
|
// becomes the throwing pc.
|
|
//
|
|
// Arguments pushed before the runtime call are still on the stack
|
|
// but the exception handler will reset the stack pointer ->
|
|
// ignore them. A potential result in registers can be ignored as
|
|
// well.
|
|
|
|
#ifdef ASSERT
|
|
// make sure this code is only executed if there is a pending exception
|
|
{
|
|
Label L;
|
|
__ ld(t0, Address(xthread, Thread::pending_exception_offset()));
|
|
__ bnez(t0, L);
|
|
__ stop("StubRoutines::forward exception: no pending exception (1)");
|
|
__ bind(L);
|
|
}
|
|
#endif
|
|
|
|
// compute exception handler into x9
|
|
|
|
// call the VM to find the handler address associated with the
|
|
// caller address. pass thread in x10 and caller pc (ret address)
|
|
// in x11. n.b. the caller pc is in ra, unlike x86 where it is on
|
|
// the stack.
|
|
__ mv(c_rarg1, ra);
|
|
// ra will be trashed by the VM call so we move it to x9
|
|
// (callee-saved) because we also need to pass it to the handler
|
|
// returned by this call.
|
|
__ mv(x9, ra);
|
|
BLOCK_COMMENT("call exception_handler_for_return_address");
|
|
__ call_VM_leaf(CAST_FROM_FN_PTR(address,
|
|
SharedRuntime::exception_handler_for_return_address),
|
|
xthread, c_rarg1);
|
|
// we should not really care that ra is no longer the callee
|
|
// address. we saved the value the handler needs in x9 so we can
|
|
// just copy it to x13. however, the C2 handler will push its own
|
|
// frame and then calls into the VM and the VM code asserts that
|
|
// the PC for the frame above the handler belongs to a compiled
|
|
// Java method. So, we restore ra here to satisfy that assert.
|
|
__ mv(ra, x9);
|
|
// setup x10 & x13 & clear pending exception
|
|
__ mv(x13, x9);
|
|
__ mv(x9, x10);
|
|
__ ld(x10, Address(xthread, Thread::pending_exception_offset()));
|
|
__ sd(zr, Address(xthread, Thread::pending_exception_offset()));
|
|
|
|
#ifdef ASSERT
|
|
// make sure exception is set
|
|
{
|
|
Label L;
|
|
__ bnez(x10, L);
|
|
__ stop("StubRoutines::forward exception: no pending exception (2)");
|
|
__ bind(L);
|
|
}
|
|
#endif
|
|
|
|
// continue at exception handler
|
|
// x10: exception
|
|
// x13: throwing pc
|
|
// x9: exception handler
|
|
__ verify_oop(x10);
|
|
__ jr(x9);
|
|
|
|
return start;
|
|
}
|
|
|
|
// Non-destructive plausibility checks for oops
|
|
//
|
|
// Arguments:
|
|
// x10: oop to verify
|
|
// t0: error message
|
|
//
|
|
// Stack after saving c_rarg3:
|
|
// [tos + 0]: saved c_rarg3
|
|
// [tos + 1]: saved c_rarg2
|
|
// [tos + 2]: saved ra
|
|
// [tos + 3]: saved t1
|
|
// [tos + 4]: saved x10
|
|
// [tos + 5]: saved t0
|
|
address generate_verify_oop() {
|
|
|
|
StubCodeMark mark(this, "StubRoutines", "verify_oop");
|
|
address start = __ pc();
|
|
|
|
Label exit, error;
|
|
|
|
__ push_reg(RegSet::of(c_rarg2, c_rarg3), sp); // save c_rarg2 and c_rarg3
|
|
|
|
__ la(c_rarg2, ExternalAddress((address) StubRoutines::verify_oop_count_addr()));
|
|
__ ld(c_rarg3, Address(c_rarg2));
|
|
__ add(c_rarg3, c_rarg3, 1);
|
|
__ sd(c_rarg3, Address(c_rarg2));
|
|
|
|
// object is in x10
|
|
// make sure object is 'reasonable'
|
|
__ beqz(x10, exit); // if obj is NULL it is OK
|
|
|
|
#if INCLUDE_ZGC
|
|
if (UseZGC) {
|
|
// Check if mask is good.
|
|
// verifies that ZAddressBadMask & x10 == 0
|
|
__ ld(c_rarg3, Address(xthread, ZThreadLocalData::address_bad_mask_offset()));
|
|
__ andr(c_rarg2, x10, c_rarg3);
|
|
__ bnez(c_rarg2, error);
|
|
}
|
|
#endif
|
|
|
|
// Check if the oop is in the right area of memory
|
|
__ mv(c_rarg3, (intptr_t) Universe::verify_oop_mask());
|
|
__ andr(c_rarg2, x10, c_rarg3);
|
|
__ mv(c_rarg3, (intptr_t) Universe::verify_oop_bits());
|
|
|
|
// Compare c_rarg2 and c_rarg3.
|
|
__ bne(c_rarg2, c_rarg3, error);
|
|
|
|
// make sure klass is 'reasonable', which is not zero.
|
|
__ load_klass(x10, x10); // get klass
|
|
__ beqz(x10, error); // if klass is NULL it is broken
|
|
|
|
// return if everything seems ok
|
|
__ bind(exit);
|
|
|
|
__ pop_reg(RegSet::of(c_rarg2, c_rarg3), sp); // pop c_rarg2 and c_rarg3
|
|
__ ret();
|
|
|
|
// handle errors
|
|
__ bind(error);
|
|
__ pop_reg(RegSet::of(c_rarg2, c_rarg3), sp); // pop c_rarg2 and c_rarg3
|
|
|
|
__ push_reg(RegSet::range(x0, x31), sp);
|
|
// debug(char* msg, int64_t pc, int64_t regs[])
|
|
__ mv(c_rarg0, t0); // pass address of error message
|
|
__ mv(c_rarg1, ra); // pass return address
|
|
__ mv(c_rarg2, sp); // pass address of regs on stack
|
|
#ifndef PRODUCT
|
|
assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area");
|
|
#endif
|
|
BLOCK_COMMENT("call MacroAssembler::debug");
|
|
int32_t offset = 0;
|
|
__ movptr(t0, CAST_FROM_FN_PTR(address, MacroAssembler::debug64), offset);
|
|
__ jalr(x1, t0, offset);
|
|
__ ebreak();
|
|
|
|
return start;
|
|
}
|
|
|
|
// The inner part of zero_words().
|
|
//
|
|
// Inputs:
|
|
// x28: the HeapWord-aligned base address of an array to zero.
|
|
// x29: the count in HeapWords, x29 > 0.
|
|
//
|
|
// Returns x28 and x29, adjusted for the caller to clear.
|
|
// x28: the base address of the tail of words left to clear.
|
|
// x29: the number of words in the tail.
|
|
// x29 < MacroAssembler::zero_words_block_size.
|
|
|
|
address generate_zero_blocks() {
|
|
Label done;
|
|
|
|
const Register base = x28, cnt = x29;
|
|
|
|
__ align(CodeEntryAlignment);
|
|
StubCodeMark mark(this, "StubRoutines", "zero_blocks");
|
|
address start = __ pc();
|
|
|
|
{
|
|
// Clear the remaining blocks.
|
|
Label loop;
|
|
__ sub(cnt, cnt, MacroAssembler::zero_words_block_size);
|
|
__ bltz(cnt, done);
|
|
__ bind(loop);
|
|
for (int i = 0; i < MacroAssembler::zero_words_block_size; i++) {
|
|
__ sd(zr, Address(base, 0));
|
|
__ add(base, base, 8);
|
|
}
|
|
__ sub(cnt, cnt, MacroAssembler::zero_words_block_size);
|
|
__ bgez(cnt, loop);
|
|
__ bind(done);
|
|
__ add(cnt, cnt, MacroAssembler::zero_words_block_size);
|
|
}
|
|
|
|
__ ret();
|
|
|
|
return start;
|
|
}
|
|
|
|
typedef enum {
|
|
copy_forwards = 1,
|
|
copy_backwards = -1
|
|
} copy_direction;
|
|
|
|
// Bulk copy of blocks of 8 words.
|
|
//
|
|
// count is a count of words.
|
|
//
|
|
// Precondition: count >= 8
|
|
//
|
|
// Postconditions:
|
|
//
|
|
// The least significant bit of count contains the remaining count
|
|
// of words to copy. The rest of count is trash.
|
|
//
|
|
// s and d are adjusted to point to the remaining words to copy
|
|
//
|
|
void generate_copy_longs(Label &start, Register s, Register d, Register count,
|
|
copy_direction direction) {
|
|
int unit = wordSize * direction;
|
|
int bias = wordSize;
|
|
|
|
const Register tmp_reg0 = x13, tmp_reg1 = x14, tmp_reg2 = x15, tmp_reg3 = x16,
|
|
tmp_reg4 = x17, tmp_reg5 = x7, tmp_reg6 = x28, tmp_reg7 = x29;
|
|
|
|
const Register stride = x30;
|
|
|
|
assert_different_registers(t0, tmp_reg0, tmp_reg1, tmp_reg2, tmp_reg3,
|
|
tmp_reg4, tmp_reg5, tmp_reg6, tmp_reg7);
|
|
assert_different_registers(s, d, count, t0);
|
|
|
|
Label again, drain;
|
|
const char* stub_name = NULL;
|
|
if (direction == copy_forwards) {
|
|
stub_name = "forward_copy_longs";
|
|
} else {
|
|
stub_name = "backward_copy_longs";
|
|
}
|
|
StubCodeMark mark(this, "StubRoutines", stub_name);
|
|
__ align(CodeEntryAlignment);
|
|
__ bind(start);
|
|
|
|
if (direction == copy_forwards) {
|
|
__ sub(s, s, bias);
|
|
__ sub(d, d, bias);
|
|
}
|
|
|
|
#ifdef ASSERT
|
|
// Make sure we are never given < 8 words
|
|
{
|
|
Label L;
|
|
|
|
__ mv(t0, 8);
|
|
__ bge(count, t0, L);
|
|
__ stop("genrate_copy_longs called with < 8 words");
|
|
__ bind(L);
|
|
}
|
|
#endif
|
|
|
|
__ ld(tmp_reg0, Address(s, 1 * unit));
|
|
__ ld(tmp_reg1, Address(s, 2 * unit));
|
|
__ ld(tmp_reg2, Address(s, 3 * unit));
|
|
__ ld(tmp_reg3, Address(s, 4 * unit));
|
|
__ ld(tmp_reg4, Address(s, 5 * unit));
|
|
__ ld(tmp_reg5, Address(s, 6 * unit));
|
|
__ ld(tmp_reg6, Address(s, 7 * unit));
|
|
__ ld(tmp_reg7, Address(s, 8 * unit));
|
|
__ addi(s, s, 8 * unit);
|
|
|
|
__ sub(count, count, 16);
|
|
__ bltz(count, drain);
|
|
|
|
__ bind(again);
|
|
|
|
__ sd(tmp_reg0, Address(d, 1 * unit));
|
|
__ sd(tmp_reg1, Address(d, 2 * unit));
|
|
__ sd(tmp_reg2, Address(d, 3 * unit));
|
|
__ sd(tmp_reg3, Address(d, 4 * unit));
|
|
__ sd(tmp_reg4, Address(d, 5 * unit));
|
|
__ sd(tmp_reg5, Address(d, 6 * unit));
|
|
__ sd(tmp_reg6, Address(d, 7 * unit));
|
|
__ sd(tmp_reg7, Address(d, 8 * unit));
|
|
|
|
__ ld(tmp_reg0, Address(s, 1 * unit));
|
|
__ ld(tmp_reg1, Address(s, 2 * unit));
|
|
__ ld(tmp_reg2, Address(s, 3 * unit));
|
|
__ ld(tmp_reg3, Address(s, 4 * unit));
|
|
__ ld(tmp_reg4, Address(s, 5 * unit));
|
|
__ ld(tmp_reg5, Address(s, 6 * unit));
|
|
__ ld(tmp_reg6, Address(s, 7 * unit));
|
|
__ ld(tmp_reg7, Address(s, 8 * unit));
|
|
|
|
__ addi(s, s, 8 * unit);
|
|
__ addi(d, d, 8 * unit);
|
|
|
|
__ sub(count, count, 8);
|
|
__ bgez(count, again);
|
|
|
|
// Drain
|
|
__ bind(drain);
|
|
|
|
__ sd(tmp_reg0, Address(d, 1 * unit));
|
|
__ sd(tmp_reg1, Address(d, 2 * unit));
|
|
__ sd(tmp_reg2, Address(d, 3 * unit));
|
|
__ sd(tmp_reg3, Address(d, 4 * unit));
|
|
__ sd(tmp_reg4, Address(d, 5 * unit));
|
|
__ sd(tmp_reg5, Address(d, 6 * unit));
|
|
__ sd(tmp_reg6, Address(d, 7 * unit));
|
|
__ sd(tmp_reg7, Address(d, 8 * unit));
|
|
__ addi(d, d, 8 * unit);
|
|
|
|
{
|
|
Label L1, L2;
|
|
__ andi(t0, count, 4);
|
|
__ beqz(t0, L1);
|
|
|
|
__ ld(tmp_reg0, Address(s, 1 * unit));
|
|
__ ld(tmp_reg1, Address(s, 2 * unit));
|
|
__ ld(tmp_reg2, Address(s, 3 * unit));
|
|
__ ld(tmp_reg3, Address(s, 4 * unit));
|
|
__ addi(s, s, 4 * unit);
|
|
|
|
__ sd(tmp_reg0, Address(d, 1 * unit));
|
|
__ sd(tmp_reg1, Address(d, 2 * unit));
|
|
__ sd(tmp_reg2, Address(d, 3 * unit));
|
|
__ sd(tmp_reg3, Address(d, 4 * unit));
|
|
__ addi(d, d, 4 * unit);
|
|
|
|
__ bind(L1);
|
|
|
|
if (direction == copy_forwards) {
|
|
__ addi(s, s, bias);
|
|
__ addi(d, d, bias);
|
|
}
|
|
|
|
__ andi(t0, count, 2);
|
|
__ beqz(t0, L2);
|
|
if (direction == copy_backwards) {
|
|
__ addi(s, s, 2 * unit);
|
|
__ ld(tmp_reg0, Address(s));
|
|
__ ld(tmp_reg1, Address(s, wordSize));
|
|
__ addi(d, d, 2 * unit);
|
|
__ sd(tmp_reg0, Address(d));
|
|
__ sd(tmp_reg1, Address(d, wordSize));
|
|
} else {
|
|
__ ld(tmp_reg0, Address(s));
|
|
__ ld(tmp_reg1, Address(s, wordSize));
|
|
__ addi(s, s, 2 * unit);
|
|
__ sd(tmp_reg0, Address(d));
|
|
__ sd(tmp_reg1, Address(d, wordSize));
|
|
__ addi(d, d, 2 * unit);
|
|
}
|
|
__ bind(L2);
|
|
}
|
|
|
|
__ ret();
|
|
}
|
|
|
|
Label copy_f, copy_b;
|
|
|
|
// All-singing all-dancing memory copy.
|
|
//
|
|
// Copy count units of memory from s to d. The size of a unit is
|
|
// step, which can be positive or negative depending on the direction
|
|
// of copy. If is_aligned is false, we align the source address.
|
|
//
|
|
/*
|
|
* if (is_aligned) {
|
|
* goto copy_8_bytes;
|
|
* }
|
|
* bool is_backwards = step < 0;
|
|
* int granularity = uabs(step);
|
|
* count = count * granularity; * count bytes
|
|
*
|
|
* if (is_backwards) {
|
|
* s += count;
|
|
* d += count;
|
|
* }
|
|
*
|
|
* count limit maybe greater than 16, for better performance
|
|
* if (count < 16) {
|
|
* goto copy_small;
|
|
* }
|
|
*
|
|
* if ((dst % 8) == (src % 8)) {
|
|
* aligned;
|
|
* goto copy8;
|
|
* }
|
|
*
|
|
* copy_small:
|
|
* load element one by one;
|
|
* done;
|
|
*/
|
|
|
|
typedef void (MacroAssembler::*copy_insn)(Register Rd, const Address &adr, Register temp);
|
|
|
|
void copy_memory_v(Register s, Register d, Register count, Register tmp, int step) {
|
|
bool is_backward = step < 0;
|
|
int granularity = uabs(step);
|
|
|
|
const Register src = x30, dst = x31, vl = x14, cnt = x15, tmp1 = x16, tmp2 = x17;
|
|
assert_different_registers(s, d, cnt, vl, tmp, tmp1, tmp2);
|
|
Assembler::SEW sew = Assembler::elembytes_to_sew(granularity);
|
|
Label loop_forward, loop_backward, done;
|
|
|
|
__ mv(dst, d);
|
|
__ mv(src, s);
|
|
__ mv(cnt, count);
|
|
|
|
__ bind(loop_forward);
|
|
__ vsetvli(vl, cnt, sew, Assembler::m8);
|
|
if (is_backward) {
|
|
__ bne(vl, cnt, loop_backward);
|
|
}
|
|
|
|
__ vlex_v(v0, src, sew);
|
|
__ sub(cnt, cnt, vl);
|
|
__ slli(vl, vl, (int)sew);
|
|
__ add(src, src, vl);
|
|
|
|
__ vsex_v(v0, dst, sew);
|
|
__ add(dst, dst, vl);
|
|
__ bnez(cnt, loop_forward);
|
|
|
|
if (is_backward) {
|
|
__ j(done);
|
|
|
|
__ bind(loop_backward);
|
|
__ sub(tmp, cnt, vl);
|
|
__ slli(tmp, tmp, sew);
|
|
__ add(tmp1, s, tmp);
|
|
__ vlex_v(v0, tmp1, sew);
|
|
__ add(tmp2, d, tmp);
|
|
__ vsex_v(v0, tmp2, sew);
|
|
__ sub(cnt, cnt, vl);
|
|
__ bnez(cnt, loop_forward);
|
|
__ bind(done);
|
|
}
|
|
}
|
|
|
|
void copy_memory(bool is_aligned, Register s, Register d,
|
|
Register count, Register tmp, int step) {
|
|
if (UseRVV) {
|
|
return copy_memory_v(s, d, count, tmp, step);
|
|
}
|
|
|
|
bool is_backwards = step < 0;
|
|
int granularity = uabs(step);
|
|
|
|
const Register src = x30, dst = x31, cnt = x15, tmp3 = x16, tmp4 = x17;
|
|
|
|
Label same_aligned;
|
|
Label copy8, copy_small, done;
|
|
|
|
copy_insn ld_arr = NULL, st_arr = NULL;
|
|
switch (granularity) {
|
|
case 1 :
|
|
ld_arr = (copy_insn)&MacroAssembler::lbu;
|
|
st_arr = (copy_insn)&MacroAssembler::sb;
|
|
break;
|
|
case 2 :
|
|
ld_arr = (copy_insn)&MacroAssembler::lhu;
|
|
st_arr = (copy_insn)&MacroAssembler::sh;
|
|
break;
|
|
case 4 :
|
|
ld_arr = (copy_insn)&MacroAssembler::lwu;
|
|
st_arr = (copy_insn)&MacroAssembler::sw;
|
|
break;
|
|
case 8 :
|
|
ld_arr = (copy_insn)&MacroAssembler::ld;
|
|
st_arr = (copy_insn)&MacroAssembler::sd;
|
|
break;
|
|
default :
|
|
ShouldNotReachHere();
|
|
}
|
|
|
|
__ beqz(count, done);
|
|
__ slli(cnt, count, exact_log2(granularity));
|
|
if (is_backwards) {
|
|
__ add(src, s, cnt);
|
|
__ add(dst, d, cnt);
|
|
} else {
|
|
__ mv(src, s);
|
|
__ mv(dst, d);
|
|
}
|
|
|
|
if (is_aligned) {
|
|
__ addi(tmp, cnt, -8);
|
|
__ bgez(tmp, copy8);
|
|
__ j(copy_small);
|
|
}
|
|
|
|
__ mv(tmp, 16);
|
|
__ blt(cnt, tmp, copy_small);
|
|
|
|
__ xorr(tmp, src, dst);
|
|
__ andi(tmp, tmp, 0b111);
|
|
__ bnez(tmp, copy_small);
|
|
|
|
__ bind(same_aligned);
|
|
__ andi(tmp, src, 0b111);
|
|
__ beqz(tmp, copy8);
|
|
if (is_backwards) {
|
|
__ addi(src, src, step);
|
|
__ addi(dst, dst, step);
|
|
}
|
|
(_masm->*ld_arr)(tmp3, Address(src), t0);
|
|
(_masm->*st_arr)(tmp3, Address(dst), t0);
|
|
if (!is_backwards) {
|
|
__ addi(src, src, step);
|
|
__ addi(dst, dst, step);
|
|
}
|
|
__ addi(cnt, cnt, -granularity);
|
|
__ beqz(cnt, done);
|
|
__ j(same_aligned);
|
|
|
|
__ bind(copy8);
|
|
if (is_backwards) {
|
|
__ addi(src, src, -wordSize);
|
|
__ addi(dst, dst, -wordSize);
|
|
}
|
|
__ ld(tmp3, Address(src));
|
|
__ sd(tmp3, Address(dst));
|
|
if (!is_backwards) {
|
|
__ addi(src, src, wordSize);
|
|
__ addi(dst, dst, wordSize);
|
|
}
|
|
__ addi(cnt, cnt, -wordSize);
|
|
__ addi(tmp4, cnt, -8);
|
|
__ bgez(tmp4, copy8); // cnt >= 8, do next loop
|
|
|
|
__ beqz(cnt, done);
|
|
|
|
__ bind(copy_small);
|
|
if (is_backwards) {
|
|
__ addi(src, src, step);
|
|
__ addi(dst, dst, step);
|
|
}
|
|
(_masm->*ld_arr)(tmp3, Address(src), t0);
|
|
(_masm->*st_arr)(tmp3, Address(dst), t0);
|
|
if (!is_backwards) {
|
|
__ addi(src, src, step);
|
|
__ addi(dst, dst, step);
|
|
}
|
|
__ addi(cnt, cnt, -granularity);
|
|
__ bgtz(cnt, copy_small);
|
|
|
|
__ bind(done);
|
|
}
|
|
|
|
// Scan over array at a for count oops, verifying each one.
|
|
// Preserves a and count, clobbers t0 and t1.
|
|
void verify_oop_array(size_t size, Register a, Register count, Register temp) {
|
|
Label loop, end;
|
|
__ mv(t1, zr);
|
|
__ slli(t0, count, exact_log2(size));
|
|
__ bind(loop);
|
|
__ bgeu(t1, t0, end);
|
|
|
|
__ add(temp, a, t1);
|
|
if (size == (size_t)wordSize) {
|
|
__ ld(temp, Address(temp, 0));
|
|
__ verify_oop(temp);
|
|
} else {
|
|
__ lwu(temp, Address(temp, 0));
|
|
__ decode_heap_oop(temp); // calls verify_oop
|
|
}
|
|
__ add(t1, t1, size);
|
|
__ j(loop);
|
|
__ bind(end);
|
|
}
|
|
|
|
// Arguments:
|
|
// aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
|
|
// ignored
|
|
// is_oop - true => oop array, so generate store check code
|
|
// name - stub name string
|
|
//
|
|
// Inputs:
|
|
// c_rarg0 - source array address
|
|
// c_rarg1 - destination array address
|
|
// c_rarg2 - element count, treated as ssize_t, can be zero
|
|
//
|
|
// If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
|
|
// the hardware handle it. The two dwords within qwords that span
|
|
// cache line boundaries will still be loaded and stored atomically.
|
|
//
|
|
// Side Effects:
|
|
// disjoint_int_copy_entry is set to the no-overlap entry point
|
|
// used by generate_conjoint_int_oop_copy().
|
|
//
|
|
address generate_disjoint_copy(size_t size, bool aligned, bool is_oop, address* entry,
|
|
const char* name, bool dest_uninitialized = false) {
|
|
const Register s = c_rarg0, d = c_rarg1, count = c_rarg2;
|
|
RegSet saved_reg = RegSet::of(s, d, count);
|
|
__ align(CodeEntryAlignment);
|
|
StubCodeMark mark(this, "StubRoutines", name);
|
|
address start = __ pc();
|
|
__ enter();
|
|
|
|
if (entry != NULL) {
|
|
*entry = __ pc();
|
|
// caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
|
|
BLOCK_COMMENT("Entry:");
|
|
}
|
|
|
|
DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_DISJOINT;
|
|
if (dest_uninitialized) {
|
|
decorators |= IS_DEST_UNINITIALIZED;
|
|
}
|
|
if (aligned) {
|
|
decorators |= ARRAYCOPY_ALIGNED;
|
|
}
|
|
|
|
BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
|
|
bs->arraycopy_prologue(_masm, decorators, is_oop, s, d, count, saved_reg);
|
|
|
|
if (is_oop) {
|
|
// save regs before copy_memory
|
|
__ push_reg(RegSet::of(d, count), sp);
|
|
}
|
|
|
|
{
|
|
// UnsafeCopyMemory page error: continue after ucm
|
|
bool add_entry = !is_oop && (!aligned || sizeof(jlong) == size);
|
|
UnsafeCopyMemoryMark ucmm(this, add_entry, true);
|
|
copy_memory(aligned, s, d, count, t0, size);
|
|
}
|
|
|
|
if (is_oop) {
|
|
__ pop_reg(RegSet::of(d, count), sp);
|
|
if (VerifyOops) {
|
|
verify_oop_array(size, d, count, t2);
|
|
}
|
|
}
|
|
|
|
bs->arraycopy_epilogue(_masm, decorators, is_oop, d, count, t0, RegSet());
|
|
|
|
__ leave();
|
|
__ mv(x10, zr); // return 0
|
|
__ ret();
|
|
return start;
|
|
}
|
|
|
|
// Arguments:
|
|
// aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
|
|
// ignored
|
|
// is_oop - true => oop array, so generate store check code
|
|
// name - stub name string
|
|
//
|
|
// Inputs:
|
|
// c_rarg0 - source array address
|
|
// c_rarg1 - destination array address
|
|
// c_rarg2 - element count, treated as ssize_t, can be zero
|
|
//
|
|
// If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
|
|
// the hardware handle it. The two dwords within qwords that span
|
|
// cache line boundaries will still be loaded and stored atomically.
|
|
//
|
|
address generate_conjoint_copy(size_t size, bool aligned, bool is_oop, address nooverlap_target,
|
|
address* entry, const char* name,
|
|
bool dest_uninitialized = false) {
|
|
const Register s = c_rarg0, d = c_rarg1, count = c_rarg2;
|
|
RegSet saved_regs = RegSet::of(s, d, count);
|
|
StubCodeMark mark(this, "StubRoutines", name);
|
|
address start = __ pc();
|
|
__ enter();
|
|
|
|
if (entry != NULL) {
|
|
*entry = __ pc();
|
|
// caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
|
|
BLOCK_COMMENT("Entry:");
|
|
}
|
|
|
|
// use fwd copy when (d-s) above_equal (count*size)
|
|
__ sub(t0, d, s);
|
|
__ slli(t1, count, exact_log2(size));
|
|
__ bgeu(t0, t1, nooverlap_target);
|
|
|
|
DecoratorSet decorators = IN_HEAP | IS_ARRAY;
|
|
if (dest_uninitialized) {
|
|
decorators |= IS_DEST_UNINITIALIZED;
|
|
}
|
|
if (aligned) {
|
|
decorators |= ARRAYCOPY_ALIGNED;
|
|
}
|
|
|
|
BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
|
|
bs->arraycopy_prologue(_masm, decorators, is_oop, s, d, count, saved_regs);
|
|
|
|
if (is_oop) {
|
|
// save regs before copy_memory
|
|
__ push_reg(RegSet::of(d, count), sp);
|
|
}
|
|
|
|
{
|
|
// UnsafeCopyMemory page error: continue after ucm
|
|
bool add_entry = !is_oop && (!aligned || sizeof(jlong) == size);
|
|
UnsafeCopyMemoryMark ucmm(this, add_entry, true);
|
|
copy_memory(aligned, s, d, count, t0, -size);
|
|
}
|
|
|
|
if (is_oop) {
|
|
__ pop_reg(RegSet::of(d, count), sp);
|
|
if (VerifyOops) {
|
|
verify_oop_array(size, d, count, t2);
|
|
}
|
|
}
|
|
bs->arraycopy_epilogue(_masm, decorators, is_oop, d, count, t0, RegSet());
|
|
__ leave();
|
|
__ mv(x10, zr); // return 0
|
|
__ ret();
|
|
return start;
|
|
}
|
|
|
|
// Arguments:
|
|
// aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
|
|
// ignored
|
|
// name - stub name string
|
|
//
|
|
// Inputs:
|
|
// c_rarg0 - source array address
|
|
// c_rarg1 - destination array address
|
|
// c_rarg2 - element count, treated as ssize_t, can be zero
|
|
//
|
|
// If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries,
|
|
// we let the hardware handle it. The one to eight bytes within words,
|
|
// dwords or qwords that span cache line boundaries will still be loaded
|
|
// and stored atomically.
|
|
//
|
|
// Side Effects:
|
|
// disjoint_byte_copy_entry is set to the no-overlap entry point //
|
|
// If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries,
|
|
// we let the hardware handle it. The one to eight bytes within words,
|
|
// dwords or qwords that span cache line boundaries will still be loaded
|
|
// and stored atomically.
|
|
//
|
|
// Side Effects:
|
|
// disjoint_byte_copy_entry is set to the no-overlap entry point
|
|
// used by generate_conjoint_byte_copy().
|
|
//
|
|
address generate_disjoint_byte_copy(bool aligned, address* entry, const char* name) {
|
|
const bool not_oop = false;
|
|
return generate_disjoint_copy(sizeof (jbyte), aligned, not_oop, entry, name);
|
|
}
|
|
|
|
// Arguments:
|
|
// aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
|
|
// ignored
|
|
// name - stub name string
|
|
//
|
|
// Inputs:
|
|
// c_rarg0 - source array address
|
|
// c_rarg1 - destination array address
|
|
// c_rarg2 - element count, treated as ssize_t, can be zero
|
|
//
|
|
// If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries,
|
|
// we let the hardware handle it. The one to eight bytes within words,
|
|
// dwords or qwords that span cache line boundaries will still be loaded
|
|
// and stored atomically.
|
|
//
|
|
address generate_conjoint_byte_copy(bool aligned, address nooverlap_target,
|
|
address* entry, const char* name) {
|
|
const bool not_oop = false;
|
|
return generate_conjoint_copy(sizeof (jbyte), aligned, not_oop, nooverlap_target, entry, name);
|
|
}
|
|
|
|
// Arguments:
|
|
// aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
|
|
// ignored
|
|
// name - stub name string
|
|
//
|
|
// Inputs:
|
|
// c_rarg0 - source array address
|
|
// c_rarg1 - destination array address
|
|
// c_rarg2 - element count, treated as ssize_t, can be zero
|
|
//
|
|
// If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we
|
|
// let the hardware handle it. The two or four words within dwords
|
|
// or qwords that span cache line boundaries will still be loaded
|
|
// and stored atomically.
|
|
//
|
|
// Side Effects:
|
|
// disjoint_short_copy_entry is set to the no-overlap entry point
|
|
// used by generate_conjoint_short_copy().
|
|
//
|
|
address generate_disjoint_short_copy(bool aligned,
|
|
address* entry, const char* name) {
|
|
const bool not_oop = false;
|
|
return generate_disjoint_copy(sizeof (jshort), aligned, not_oop, entry, name);
|
|
}
|
|
|
|
// Arguments:
|
|
// aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
|
|
// ignored
|
|
// name - stub name string
|
|
//
|
|
// Inputs:
|
|
// c_rarg0 - source array address
|
|
// c_rarg1 - destination array address
|
|
// c_rarg2 - element count, treated as ssize_t, can be zero
|
|
//
|
|
// If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we
|
|
// let the hardware handle it. The two or four words within dwords
|
|
// or qwords that span cache line boundaries will still be loaded
|
|
// and stored atomically.
|
|
//
|
|
address generate_conjoint_short_copy(bool aligned, address nooverlap_target,
|
|
address* entry, const char* name) {
|
|
const bool not_oop = false;
|
|
return generate_conjoint_copy(sizeof (jshort), aligned, not_oop, nooverlap_target, entry, name);
|
|
}
|
|
|
|
// Arguments:
|
|
// aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
|
|
// ignored
|
|
// name - stub name string
|
|
//
|
|
// Inputs:
|
|
// c_rarg0 - source array address
|
|
// c_rarg1 - destination array address
|
|
// c_rarg2 - element count, treated as ssize_t, can be zero
|
|
//
|
|
// If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
|
|
// the hardware handle it. The two dwords within qwords that span
|
|
// cache line boundaries will still be loaded and stored atomically.
|
|
//
|
|
// Side Effects:
|
|
// disjoint_int_copy_entry is set to the no-overlap entry point
|
|
// used by generate_conjoint_int_oop_copy().
|
|
//
|
|
address generate_disjoint_int_copy(bool aligned, address* entry,
|
|
const char* name, bool dest_uninitialized = false) {
|
|
const bool not_oop = false;
|
|
return generate_disjoint_copy(sizeof (jint), aligned, not_oop, entry, name);
|
|
}
|
|
|
|
// Arguments:
|
|
// aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
|
|
// ignored
|
|
// name - stub name string
|
|
//
|
|
// Inputs:
|
|
// c_rarg0 - source array address
|
|
// c_rarg1 - destination array address
|
|
// c_rarg2 - element count, treated as ssize_t, can be zero
|
|
//
|
|
// If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
|
|
// the hardware handle it. The two dwords within qwords that span
|
|
// cache line boundaries will still be loaded and stored atomically.
|
|
//
|
|
address generate_conjoint_int_copy(bool aligned, address nooverlap_target,
|
|
address* entry, const char* name,
|
|
bool dest_uninitialized = false) {
|
|
const bool not_oop = false;
|
|
return generate_conjoint_copy(sizeof (jint), aligned, not_oop, nooverlap_target, entry, name);
|
|
}
|
|
|
|
|
|
// Arguments:
|
|
// aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes
|
|
// ignored
|
|
// name - stub name string
|
|
//
|
|
// Inputs:
|
|
// c_rarg0 - source array address
|
|
// c_rarg1 - destination array address
|
|
// c_rarg2 - element count, treated as size_t, can be zero
|
|
//
|
|
// Side Effects:
|
|
// disjoint_oop_copy_entry or disjoint_long_copy_entry is set to the
|
|
// no-overlap entry point used by generate_conjoint_long_oop_copy().
|
|
//
|
|
address generate_disjoint_long_copy(bool aligned, address* entry,
|
|
const char* name, bool dest_uninitialized = false) {
|
|
const bool not_oop = false;
|
|
return generate_disjoint_copy(sizeof (jlong), aligned, not_oop, entry, name);
|
|
}
|
|
|
|
// Arguments:
|
|
// aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes
|
|
// ignored
|
|
// name - stub name string
|
|
//
|
|
// Inputs:
|
|
// c_rarg0 - source array address
|
|
// c_rarg1 - destination array address
|
|
// c_rarg2 - element count, treated as size_t, can be zero
|
|
//
|
|
address generate_conjoint_long_copy(bool aligned,
|
|
address nooverlap_target, address* entry,
|
|
const char* name, bool dest_uninitialized = false) {
|
|
const bool not_oop = false;
|
|
return generate_conjoint_copy(sizeof (jlong), aligned, not_oop, nooverlap_target, entry, name);
|
|
}
|
|
|
|
// Arguments:
|
|
// aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes
|
|
// ignored
|
|
// name - stub name string
|
|
//
|
|
// Inputs:
|
|
// c_rarg0 - source array address
|
|
// c_rarg1 - destination array address
|
|
// c_rarg2 - element count, treated as size_t, can be zero
|
|
//
|
|
// Side Effects:
|
|
// disjoint_oop_copy_entry or disjoint_long_copy_entry is set to the
|
|
// no-overlap entry point used by generate_conjoint_long_oop_copy().
|
|
//
|
|
address generate_disjoint_oop_copy(bool aligned, address* entry,
|
|
const char* name, bool dest_uninitialized) {
|
|
const bool is_oop = true;
|
|
const size_t size = UseCompressedOops ? sizeof (jint) : sizeof (jlong);
|
|
return generate_disjoint_copy(size, aligned, is_oop, entry, name, dest_uninitialized);
|
|
}
|
|
|
|
// Arguments:
|
|
// aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes
|
|
// ignored
|
|
// name - stub name string
|
|
//
|
|
// Inputs:
|
|
// c_rarg0 - source array address
|
|
// c_rarg1 - destination array address
|
|
// c_rarg2 - element count, treated as size_t, can be zero
|
|
//
|
|
address generate_conjoint_oop_copy(bool aligned,
|
|
address nooverlap_target, address* entry,
|
|
const char* name, bool dest_uninitialized) {
|
|
const bool is_oop = true;
|
|
const size_t size = UseCompressedOops ? sizeof (jint) : sizeof (jlong);
|
|
return generate_conjoint_copy(size, aligned, is_oop, nooverlap_target, entry,
|
|
name, dest_uninitialized);
|
|
}
|
|
|
|
// Helper for generating a dynamic type check.
|
|
// Smashes t0, t1.
|
|
void generate_type_check(Register sub_klass,
|
|
Register super_check_offset,
|
|
Register super_klass,
|
|
Label& L_success) {
|
|
assert_different_registers(sub_klass, super_check_offset, super_klass);
|
|
|
|
BLOCK_COMMENT("type_check:");
|
|
|
|
Label L_miss;
|
|
|
|
__ check_klass_subtype_fast_path(sub_klass, super_klass, noreg, &L_success, &L_miss, NULL, super_check_offset);
|
|
__ check_klass_subtype_slow_path(sub_klass, super_klass, noreg, noreg, &L_success, NULL);
|
|
|
|
// Fall through on failure!
|
|
__ BIND(L_miss);
|
|
}
|
|
|
|
//
|
|
// Generate checkcasting array copy stub
|
|
//
|
|
// Input:
|
|
// c_rarg0 - source array address
|
|
// c_rarg1 - destination array address
|
|
// c_rarg2 - element count, treated as ssize_t, can be zero
|
|
// c_rarg3 - size_t ckoff (super_check_offset)
|
|
// c_rarg4 - oop ckval (super_klass)
|
|
//
|
|
// Output:
|
|
// x10 == 0 - success
|
|
// x10 == -1^K - failure, where K is partial transfer count
|
|
//
|
|
address generate_checkcast_copy(const char* name, address* entry,
|
|
bool dest_uninitialized = false) {
|
|
Label L_load_element, L_store_element, L_do_card_marks, L_done, L_done_pop;
|
|
|
|
// Input registers (after setup_arg_regs)
|
|
const Register from = c_rarg0; // source array address
|
|
const Register to = c_rarg1; // destination array address
|
|
const Register count = c_rarg2; // elementscount
|
|
const Register ckoff = c_rarg3; // super_check_offset
|
|
const Register ckval = c_rarg4; // super_klass
|
|
|
|
RegSet wb_pre_saved_regs = RegSet::range(c_rarg0, c_rarg4);
|
|
RegSet wb_post_saved_regs = RegSet::of(count);
|
|
|
|
// Registers used as temps (x7, x9, x18 are save-on-entry)
|
|
const Register count_save = x19; // orig elementscount
|
|
const Register start_to = x18; // destination array start address
|
|
const Register copied_oop = x7; // actual oop copied
|
|
const Register r9_klass = x9; // oop._klass
|
|
|
|
//---------------------------------------------------------------
|
|
// Assembler stub will be used for this call to arraycopy
|
|
// if the two arrays are subtypes of Object[] but the
|
|
// destination array type is not equal to or a supertype
|
|
// of the source type. Each element must be separately
|
|
// checked.
|
|
|
|
assert_different_registers(from, to, count, ckoff, ckval, start_to,
|
|
copied_oop, r9_klass, count_save);
|
|
|
|
__ align(CodeEntryAlignment);
|
|
StubCodeMark mark(this, "StubRoutines", name);
|
|
address start = __ pc();
|
|
|
|
__ enter(); // required for proper stackwalking of RuntimeStub frame
|
|
|
|
// Caller of this entry point must set up the argument registers.
|
|
if (entry != NULL) {
|
|
*entry = __ pc();
|
|
BLOCK_COMMENT("Entry:");
|
|
}
|
|
|
|
// Empty array: Nothing to do
|
|
__ beqz(count, L_done);
|
|
|
|
__ push_reg(RegSet::of(x7, x9, x18, x19), sp);
|
|
|
|
#ifdef ASSERT
|
|
BLOCK_COMMENT("assert consistent ckoff/ckval");
|
|
// The ckoff and ckval must be mutually consistent,
|
|
// even though caller generates both.
|
|
{ Label L;
|
|
int sco_offset = in_bytes(Klass::super_check_offset_offset());
|
|
__ lwu(start_to, Address(ckval, sco_offset));
|
|
__ beq(ckoff, start_to, L);
|
|
__ stop("super_check_offset inconsistent");
|
|
__ bind(L);
|
|
}
|
|
#endif //ASSERT
|
|
|
|
DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_CHECKCAST | ARRAYCOPY_DISJOINT;
|
|
bool is_oop = true;
|
|
if (dest_uninitialized) {
|
|
decorators |= IS_DEST_UNINITIALIZED;
|
|
}
|
|
|
|
BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
|
|
bs->arraycopy_prologue(_masm, decorators, is_oop, from, to, count, wb_pre_saved_regs);
|
|
|
|
// save the original count
|
|
__ mv(count_save, count);
|
|
|
|
// Copy from low to high addresses
|
|
__ mv(start_to, to); // Save destination array start address
|
|
__ j(L_load_element);
|
|
|
|
// ======== begin loop ========
|
|
// (Loop is rotated; its entry is L_load_element.)
|
|
// Loop control:
|
|
// for count to 0 do
|
|
// copied_oop = load_heap_oop(from++)
|
|
// ... generate_type_check ...
|
|
// store_heap_oop(to++, copied_oop)
|
|
// end
|
|
|
|
__ align(OptoLoopAlignment);
|
|
|
|
__ BIND(L_store_element);
|
|
__ store_heap_oop(Address(to, 0), copied_oop, noreg, noreg, noreg, AS_RAW); // store the oop
|
|
__ add(to, to, UseCompressedOops ? 4 : 8);
|
|
__ sub(count, count, 1);
|
|
__ beqz(count, L_do_card_marks);
|
|
|
|
// ======== loop entry is here ========
|
|
__ BIND(L_load_element);
|
|
__ load_heap_oop(copied_oop, Address(from, 0), noreg, noreg, AS_RAW); // load the oop
|
|
__ add(from, from, UseCompressedOops ? 4 : 8);
|
|
__ beqz(copied_oop, L_store_element);
|
|
|
|
__ load_klass(r9_klass, copied_oop);// query the object klass
|
|
generate_type_check(r9_klass, ckoff, ckval, L_store_element);
|
|
// ======== end loop ========
|
|
|
|
// It was a real error; we must depend on the caller to finish the job.
|
|
// Register count = remaining oops, count_orig = total oops.
|
|
// Emit GC store barriers for the oops we have copied and report
|
|
// their number to the caller.
|
|
|
|
__ sub(count, count_save, count); // K = partially copied oop count
|
|
__ xori(count, count, -1); // report (-1^K) to caller
|
|
__ beqz(count, L_done_pop);
|
|
|
|
__ BIND(L_do_card_marks);
|
|
bs->arraycopy_epilogue(_masm, decorators, is_oop, start_to, count_save, t0, wb_post_saved_regs);
|
|
|
|
__ bind(L_done_pop);
|
|
__ pop_reg(RegSet::of(x7, x9, x18, x19), sp);
|
|
inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr);
|
|
|
|
__ bind(L_done);
|
|
__ mv(x10, count);
|
|
__ leave();
|
|
__ ret();
|
|
|
|
return start;
|
|
}
|
|
|
|
// Perform range checks on the proposed arraycopy.
|
|
// Kills temp, but nothing else.
|
|
// Also, clean the sign bits of src_pos and dst_pos.
|
|
void arraycopy_range_checks(Register src, // source array oop (c_rarg0)
|
|
Register src_pos, // source position (c_rarg1)
|
|
Register dst, // destination array oo (c_rarg2)
|
|
Register dst_pos, // destination position (c_rarg3)
|
|
Register length,
|
|
Register temp,
|
|
Label& L_failed) {
|
|
BLOCK_COMMENT("arraycopy_range_checks:");
|
|
|
|
assert_different_registers(t0, temp);
|
|
|
|
// if [src_pos + length > arrayOop(src)->length()] then FAIL
|
|
__ lwu(t0, Address(src, arrayOopDesc::length_offset_in_bytes()));
|
|
__ addw(temp, length, src_pos);
|
|
__ bgtu(temp, t0, L_failed);
|
|
|
|
// if [dst_pos + length > arrayOop(dst)->length()] then FAIL
|
|
__ lwu(t0, Address(dst, arrayOopDesc::length_offset_in_bytes()));
|
|
__ addw(temp, length, dst_pos);
|
|
__ bgtu(temp, t0, L_failed);
|
|
|
|
// Have to clean up high 32 bits of 'src_pos' and 'dst_pos'.
|
|
__ zero_extend(src_pos, src_pos, 32);
|
|
__ zero_extend(dst_pos, dst_pos, 32);
|
|
|
|
BLOCK_COMMENT("arraycopy_range_checks done");
|
|
}
|
|
|
|
//
|
|
// Generate 'unsafe' array copy stub
|
|
// Though just as safe as the other stubs, it takes an unscaled
|
|
// size_t argument instead of an element count.
|
|
//
|
|
// Input:
|
|
// c_rarg0 - source array address
|
|
// c_rarg1 - destination array address
|
|
// c_rarg2 - byte count, treated as ssize_t, can be zero
|
|
//
|
|
// Examines the alignment of the operands and dispatches
|
|
// to a long, int, short, or byte copy loop.
|
|
//
|
|
address generate_unsafe_copy(const char* name,
|
|
address byte_copy_entry,
|
|
address short_copy_entry,
|
|
address int_copy_entry,
|
|
address long_copy_entry) {
|
|
assert_cond(byte_copy_entry != NULL && short_copy_entry != NULL &&
|
|
int_copy_entry != NULL && long_copy_entry != NULL);
|
|
Label L_long_aligned, L_int_aligned, L_short_aligned;
|
|
const Register s = c_rarg0, d = c_rarg1, count = c_rarg2;
|
|
|
|
__ align(CodeEntryAlignment);
|
|
StubCodeMark mark(this, "StubRoutines", name);
|
|
address start = __ pc();
|
|
__ enter(); // required for proper stackwalking of RuntimeStub frame
|
|
|
|
// bump this on entry, not on exit:
|
|
inc_counter_np(SharedRuntime::_unsafe_array_copy_ctr);
|
|
|
|
__ orr(t0, s, d);
|
|
__ orr(t0, t0, count);
|
|
|
|
__ andi(t0, t0, BytesPerLong - 1);
|
|
__ beqz(t0, L_long_aligned);
|
|
__ andi(t0, t0, BytesPerInt - 1);
|
|
__ beqz(t0, L_int_aligned);
|
|
__ andi(t0, t0, 1);
|
|
__ beqz(t0, L_short_aligned);
|
|
__ j(RuntimeAddress(byte_copy_entry));
|
|
|
|
__ BIND(L_short_aligned);
|
|
__ srli(count, count, LogBytesPerShort); // size => short_count
|
|
__ j(RuntimeAddress(short_copy_entry));
|
|
__ BIND(L_int_aligned);
|
|
__ srli(count, count, LogBytesPerInt); // size => int_count
|
|
__ j(RuntimeAddress(int_copy_entry));
|
|
__ BIND(L_long_aligned);
|
|
__ srli(count, count, LogBytesPerLong); // size => long_count
|
|
__ j(RuntimeAddress(long_copy_entry));
|
|
|
|
return start;
|
|
}
|
|
|
|
//
|
|
// Generate generic array copy stubs
|
|
//
|
|
// Input:
|
|
// c_rarg0 - src oop
|
|
// c_rarg1 - src_pos (32-bits)
|
|
// c_rarg2 - dst oop
|
|
// c_rarg3 - dst_pos (32-bits)
|
|
// c_rarg4 - element count (32-bits)
|
|
//
|
|
// Output:
|
|
// x10 == 0 - success
|
|
// x10 == -1^K - failure, where K is partial transfer count
|
|
//
|
|
address generate_generic_copy(const char* name,
|
|
address byte_copy_entry, address short_copy_entry,
|
|
address int_copy_entry, address oop_copy_entry,
|
|
address long_copy_entry, address checkcast_copy_entry) {
|
|
assert_cond(byte_copy_entry != NULL && short_copy_entry != NULL &&
|
|
int_copy_entry != NULL && oop_copy_entry != NULL &&
|
|
long_copy_entry != NULL && checkcast_copy_entry != NULL);
|
|
Label L_failed, L_failed_0, L_objArray;
|
|
Label L_copy_bytes, L_copy_shorts, L_copy_ints, L_copy_longs;
|
|
|
|
// Input registers
|
|
const Register src = c_rarg0; // source array oop
|
|
const Register src_pos = c_rarg1; // source position
|
|
const Register dst = c_rarg2; // destination array oop
|
|
const Register dst_pos = c_rarg3; // destination position
|
|
const Register length = c_rarg4;
|
|
|
|
// Registers used as temps
|
|
const Register dst_klass = c_rarg5;
|
|
|
|
__ align(CodeEntryAlignment);
|
|
|
|
StubCodeMark mark(this, "StubRoutines", name);
|
|
|
|
address start = __ pc();
|
|
|
|
__ enter(); // required for proper stackwalking of RuntimeStub frame
|
|
|
|
// bump this on entry, not on exit:
|
|
inc_counter_np(SharedRuntime::_generic_array_copy_ctr);
|
|
|
|
//-----------------------------------------------------------------------
|
|
// Assembler stub will be used for this call to arraycopy
|
|
// if the following conditions are met:
|
|
//
|
|
// (1) src and dst must not be null.
|
|
// (2) src_pos must not be negative.
|
|
// (3) dst_pos must not be negative.
|
|
// (4) length must not be negative.
|
|
// (5) src klass and dst klass should be the same and not NULL.
|
|
// (6) src and dst should be arrays.
|
|
// (7) src_pos + length must not exceed length of src.
|
|
// (8) dst_pos + length must not exceed length of dst.
|
|
//
|
|
|
|
// if [src == NULL] then return -1
|
|
__ beqz(src, L_failed);
|
|
|
|
// if [src_pos < 0] then return -1
|
|
// i.e. sign bit set
|
|
__ andi(t0, src_pos, 1UL << 31);
|
|
__ bnez(t0, L_failed);
|
|
|
|
// if [dst == NULL] then return -1
|
|
__ beqz(dst, L_failed);
|
|
|
|
// if [dst_pos < 0] then return -1
|
|
// i.e. sign bit set
|
|
__ andi(t0, dst_pos, 1UL << 31);
|
|
__ bnez(t0, L_failed);
|
|
|
|
// registers used as temp
|
|
const Register scratch_length = x28; // elements count to copy
|
|
const Register scratch_src_klass = x29; // array klass
|
|
const Register lh = x30; // layout helper
|
|
|
|
// if [length < 0] then return -1
|
|
__ addw(scratch_length, length, zr); // length (elements count, 32-bits value)
|
|
// i.e. sign bit set
|
|
__ andi(t0, scratch_length, 1UL << 31);
|
|
__ bnez(t0, L_failed);
|
|
|
|
__ load_klass(scratch_src_klass, src);
|
|
#ifdef ASSERT
|
|
{
|
|
BLOCK_COMMENT("assert klasses not null {");
|
|
Label L1, L2;
|
|
__ bnez(scratch_src_klass, L2); // it is broken if klass is NULL
|
|
__ bind(L1);
|
|
__ stop("broken null klass");
|
|
__ bind(L2);
|
|
__ load_klass(t0, dst);
|
|
__ beqz(t0, L1); // this would be broken also
|
|
BLOCK_COMMENT("} assert klasses not null done");
|
|
}
|
|
#endif
|
|
|
|
// Load layout helper (32-bits)
|
|
//
|
|
// |array_tag| | header_size | element_type | |log2_element_size|
|
|
// 32 30 24 16 8 2 0
|
|
//
|
|
// array_tag: typeArray = 0x3, objArray = 0x2, non-array = 0x0
|
|
//
|
|
|
|
const int lh_offset = in_bytes(Klass::layout_helper_offset());
|
|
|
|
// Handle objArrays completely differently...
|
|
const jint objArray_lh = Klass::array_layout_helper(T_OBJECT);
|
|
__ lw(lh, Address(scratch_src_klass, lh_offset));
|
|
__ mvw(t0, objArray_lh);
|
|
__ beq(lh, t0, L_objArray);
|
|
|
|
// if [src->klass() != dst->klass()] then return -1
|
|
__ load_klass(t1, dst);
|
|
__ bne(t1, scratch_src_klass, L_failed);
|
|
|
|
// if [src->is_Array() != NULL] then return -1
|
|
// i.e. (lh >= 0)
|
|
__ andi(t0, lh, 1UL << 31);
|
|
__ beqz(t0, L_failed);
|
|
|
|
// At this point, it is known to be a typeArray (array_tag 0x3).
|
|
#ifdef ASSERT
|
|
{
|
|
BLOCK_COMMENT("assert primitive array {");
|
|
Label L;
|
|
__ mvw(t1, Klass::_lh_array_tag_type_value << Klass::_lh_array_tag_shift);
|
|
__ bge(lh, t1, L);
|
|
__ stop("must be a primitive array");
|
|
__ bind(L);
|
|
BLOCK_COMMENT("} assert primitive array done");
|
|
}
|
|
#endif
|
|
|
|
arraycopy_range_checks(src, src_pos, dst, dst_pos, scratch_length,
|
|
t1, L_failed);
|
|
|
|
// TypeArrayKlass
|
|
//
|
|
// src_addr = (src + array_header_in_bytes()) + (src_pos << log2elemsize)
|
|
// dst_addr = (dst + array_header_in_bytes()) + (dst_pos << log2elemsize)
|
|
//
|
|
|
|
const Register t0_offset = t0; // array offset
|
|
const Register x22_elsize = lh; // element size
|
|
|
|
// Get array_header_in_bytes()
|
|
int lh_header_size_width = exact_log2(Klass::_lh_header_size_mask + 1);
|
|
int lh_header_size_msb = Klass::_lh_header_size_shift + lh_header_size_width;
|
|
__ slli(t0_offset, lh, XLEN - lh_header_size_msb); // left shift to remove 24 ~ 32;
|
|
__ srli(t0_offset, t0_offset, XLEN - lh_header_size_width); // array_offset
|
|
|
|
__ add(src, src, t0_offset); // src array offset
|
|
__ add(dst, dst, t0_offset); // dst array offset
|
|
BLOCK_COMMENT("choose copy loop based on element size");
|
|
|
|
// next registers should be set before the jump to corresponding stub
|
|
const Register from = c_rarg0; // source array address
|
|
const Register to = c_rarg1; // destination array address
|
|
const Register count = c_rarg2; // elements count
|
|
|
|
// 'from', 'to', 'count' registers should be set in such order
|
|
// since they are the same as 'src', 'src_pos', 'dst'.
|
|
|
|
assert(Klass::_lh_log2_element_size_shift == 0, "fix this code");
|
|
|
|
// The possible values of elsize are 0-3, i.e. exact_log2(element
|
|
// size in bytes). We do a simple bitwise binary search.
|
|
__ BIND(L_copy_bytes);
|
|
__ andi(t0, x22_elsize, 2);
|
|
__ bnez(t0, L_copy_ints);
|
|
__ andi(t0, x22_elsize, 1);
|
|
__ bnez(t0, L_copy_shorts);
|
|
__ add(from, src, src_pos); // src_addr
|
|
__ add(to, dst, dst_pos); // dst_addr
|
|
__ addw(count, scratch_length, zr); // length
|
|
__ j(RuntimeAddress(byte_copy_entry));
|
|
|
|
__ BIND(L_copy_shorts);
|
|
__ shadd(from, src_pos, src, t0, 1); // src_addr
|
|
__ shadd(to, dst_pos, dst, t0, 1); // dst_addr
|
|
__ addw(count, scratch_length, zr); // length
|
|
__ j(RuntimeAddress(short_copy_entry));
|
|
|
|
__ BIND(L_copy_ints);
|
|
__ andi(t0, x22_elsize, 1);
|
|
__ bnez(t0, L_copy_longs);
|
|
__ shadd(from, src_pos, src, t0, 2); // src_addr
|
|
__ shadd(to, dst_pos, dst, t0, 2); // dst_addr
|
|
__ addw(count, scratch_length, zr); // length
|
|
__ j(RuntimeAddress(int_copy_entry));
|
|
|
|
__ BIND(L_copy_longs);
|
|
#ifdef ASSERT
|
|
{
|
|
BLOCK_COMMENT("assert long copy {");
|
|
Label L;
|
|
__ andi(lh, lh, Klass::_lh_log2_element_size_mask); // lh -> x22_elsize
|
|
__ addw(lh, lh, zr);
|
|
__ mvw(t0, LogBytesPerLong);
|
|
__ beq(x22_elsize, t0, L);
|
|
__ stop("must be long copy, but elsize is wrong");
|
|
__ bind(L);
|
|
BLOCK_COMMENT("} assert long copy done");
|
|
}
|
|
#endif
|
|
__ shadd(from, src_pos, src, t0, 3); // src_addr
|
|
__ shadd(to, dst_pos, dst, t0, 3); // dst_addr
|
|
__ addw(count, scratch_length, zr); // length
|
|
__ j(RuntimeAddress(long_copy_entry));
|
|
|
|
// ObjArrayKlass
|
|
__ BIND(L_objArray);
|
|
// live at this point: scratch_src_klass, scratch_length, src[_pos], dst[_pos]
|
|
|
|
Label L_plain_copy, L_checkcast_copy;
|
|
// test array classes for subtyping
|
|
__ load_klass(t2, dst);
|
|
__ bne(scratch_src_klass, t2, L_checkcast_copy); // usual case is exact equality
|
|
|
|
// Identically typed arrays can be copied without element-wise checks.
|
|
arraycopy_range_checks(src, src_pos, dst, dst_pos, scratch_length,
|
|
t1, L_failed);
|
|
|
|
__ shadd(from, src_pos, src, t0, LogBytesPerHeapOop);
|
|
__ add(from, from, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
|
|
__ shadd(to, dst_pos, dst, t0, LogBytesPerHeapOop);
|
|
__ add(to, to, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
|
|
__ addw(count, scratch_length, zr); // length
|
|
__ BIND(L_plain_copy);
|
|
__ j(RuntimeAddress(oop_copy_entry));
|
|
|
|
__ BIND(L_checkcast_copy);
|
|
// live at this point: scratch_src_klass, scratch_length, t2 (dst_klass)
|
|
{
|
|
// Before looking at dst.length, make sure dst is also an objArray.
|
|
__ lwu(t0, Address(t2, lh_offset));
|
|
__ mvw(t1, objArray_lh);
|
|
__ bne(t0, t1, L_failed);
|
|
|
|
// It is safe to examine both src.length and dst.length.
|
|
arraycopy_range_checks(src, src_pos, dst, dst_pos, scratch_length,
|
|
t2, L_failed);
|
|
|
|
__ load_klass(dst_klass, dst); // reload
|
|
|
|
// Marshal the base address arguments now, freeing registers.
|
|
__ shadd(from, src_pos, src, t0, LogBytesPerHeapOop);
|
|
__ add(from, from, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
|
|
__ shadd(to, dst_pos, dst, t0, LogBytesPerHeapOop);
|
|
__ add(to, to, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
|
|
__ addw(count, length, zr); // length (reloaded)
|
|
const Register sco_temp = c_rarg3; // this register is free now
|
|
assert_different_registers(from, to, count, sco_temp,
|
|
dst_klass, scratch_src_klass);
|
|
|
|
// Generate the type check.
|
|
const int sco_offset = in_bytes(Klass::super_check_offset_offset());
|
|
__ lwu(sco_temp, Address(dst_klass, sco_offset));
|
|
|
|
// Smashes t0, t1
|
|
generate_type_check(scratch_src_klass, sco_temp, dst_klass, L_plain_copy);
|
|
|
|
// Fetch destination element klass from the ObjArrayKlass header.
|
|
int ek_offset = in_bytes(ObjArrayKlass::element_klass_offset());
|
|
__ ld(dst_klass, Address(dst_klass, ek_offset));
|
|
__ lwu(sco_temp, Address(dst_klass, sco_offset));
|
|
|
|
// the checkcast_copy loop needs two extra arguments:
|
|
assert(c_rarg3 == sco_temp, "#3 already in place");
|
|
// Set up arguments for checkcast_copy_entry.
|
|
__ mv(c_rarg4, dst_klass); // dst.klass.element_klass
|
|
__ j(RuntimeAddress(checkcast_copy_entry));
|
|
}
|
|
|
|
__ BIND(L_failed);
|
|
__ mv(x10, -1);
|
|
__ leave(); // required for proper stackwalking of RuntimeStub frame
|
|
__ ret();
|
|
|
|
return start;
|
|
}
|
|
|
|
//
|
|
// Generate stub for array fill. If "aligned" is true, the
|
|
// "to" address is assumed to be heapword aligned.
|
|
//
|
|
// Arguments for generated stub:
|
|
// to: c_rarg0
|
|
// value: c_rarg1
|
|
// count: c_rarg2 treated as signed
|
|
//
|
|
address generate_fill(BasicType t, bool aligned, const char* name) {
|
|
__ align(CodeEntryAlignment);
|
|
StubCodeMark mark(this, "StubRoutines", name);
|
|
address start = __ pc();
|
|
|
|
BLOCK_COMMENT("Entry:");
|
|
|
|
const Register to = c_rarg0; // source array address
|
|
const Register value = c_rarg1; // value
|
|
const Register count = c_rarg2; // elements count
|
|
|
|
const Register bz_base = x28; // base for block_zero routine
|
|
const Register cnt_words = x29; // temp register
|
|
const Register tmp_reg = t1;
|
|
|
|
__ enter();
|
|
|
|
Label L_fill_elements, L_exit1;
|
|
|
|
int shift = -1;
|
|
switch (t) {
|
|
case T_BYTE:
|
|
shift = 0;
|
|
|
|
// Zero extend value
|
|
// 8 bit -> 16 bit
|
|
__ andi(value, value, 0xff);
|
|
__ mv(tmp_reg, value);
|
|
__ slli(tmp_reg, tmp_reg, 8);
|
|
__ orr(value, value, tmp_reg);
|
|
|
|
// 16 bit -> 32 bit
|
|
__ mv(tmp_reg, value);
|
|
__ slli(tmp_reg, tmp_reg, 16);
|
|
__ orr(value, value, tmp_reg);
|
|
|
|
__ mv(tmp_reg, 8 >> shift); // Short arrays (< 8 bytes) fill by element
|
|
__ bltu(count, tmp_reg, L_fill_elements);
|
|
break;
|
|
case T_SHORT:
|
|
shift = 1;
|
|
// Zero extend value
|
|
// 16 bit -> 32 bit
|
|
__ andi(value, value, 0xffff);
|
|
__ mv(tmp_reg, value);
|
|
__ slli(tmp_reg, tmp_reg, 16);
|
|
__ orr(value, value, tmp_reg);
|
|
|
|
// Short arrays (< 8 bytes) fill by element
|
|
__ mv(tmp_reg, 8 >> shift);
|
|
__ bltu(count, tmp_reg, L_fill_elements);
|
|
break;
|
|
case T_INT:
|
|
shift = 2;
|
|
|
|
// Short arrays (< 8 bytes) fill by element
|
|
__ mv(tmp_reg, 8 >> shift);
|
|
__ bltu(count, tmp_reg, L_fill_elements);
|
|
break;
|
|
default: ShouldNotReachHere();
|
|
}
|
|
|
|
// Align source address at 8 bytes address boundary.
|
|
Label L_skip_align1, L_skip_align2, L_skip_align4;
|
|
if (!aligned) {
|
|
switch (t) {
|
|
case T_BYTE:
|
|
// One byte misalignment happens only for byte arrays.
|
|
__ andi(t0, to, 1);
|
|
__ beqz(t0, L_skip_align1);
|
|
__ sb(value, Address(to, 0));
|
|
__ addi(to, to, 1);
|
|
__ addiw(count, count, -1);
|
|
__ bind(L_skip_align1);
|
|
// Fallthrough
|
|
case T_SHORT:
|
|
// Two bytes misalignment happens only for byte and short (char) arrays.
|
|
__ andi(t0, to, 2);
|
|
__ beqz(t0, L_skip_align2);
|
|
__ sh(value, Address(to, 0));
|
|
__ addi(to, to, 2);
|
|
__ addiw(count, count, -(2 >> shift));
|
|
__ bind(L_skip_align2);
|
|
// Fallthrough
|
|
case T_INT:
|
|
// Align to 8 bytes, we know we are 4 byte aligned to start.
|
|
__ andi(t0, to, 4);
|
|
__ beqz(t0, L_skip_align4);
|
|
__ sw(value, Address(to, 0));
|
|
__ addi(to, to, 4);
|
|
__ addiw(count, count, -(4 >> shift));
|
|
__ bind(L_skip_align4);
|
|
break;
|
|
default: ShouldNotReachHere();
|
|
}
|
|
}
|
|
|
|
//
|
|
// Fill large chunks
|
|
//
|
|
__ srliw(cnt_words, count, 3 - shift); // number of words
|
|
|
|
// 32 bit -> 64 bit
|
|
__ andi(value, value, 0xffffffff);
|
|
__ mv(tmp_reg, value);
|
|
__ slli(tmp_reg, tmp_reg, 32);
|
|
__ orr(value, value, tmp_reg);
|
|
|
|
__ slli(tmp_reg, cnt_words, 3 - shift);
|
|
__ subw(count, count, tmp_reg);
|
|
{
|
|
__ fill_words(to, cnt_words, value);
|
|
}
|
|
|
|
// Remaining count is less than 8 bytes. Fill it by a single store.
|
|
// Note that the total length is no less than 8 bytes.
|
|
if (t == T_BYTE || t == T_SHORT) {
|
|
__ beqz(count, L_exit1);
|
|
__ shadd(to, count, to, tmp_reg, shift); // points to the end
|
|
__ sd(value, Address(to, -8)); // overwrite some elements
|
|
__ bind(L_exit1);
|
|
__ leave();
|
|
__ ret();
|
|
}
|
|
|
|
// Handle copies less than 8 bytes.
|
|
Label L_fill_2, L_fill_4, L_exit2;
|
|
__ bind(L_fill_elements);
|
|
switch (t) {
|
|
case T_BYTE:
|
|
__ andi(t0, count, 1);
|
|
__ beqz(t0, L_fill_2);
|
|
__ sb(value, Address(to, 0));
|
|
__ addi(to, to, 1);
|
|
__ bind(L_fill_2);
|
|
__ andi(t0, count, 2);
|
|
__ beqz(t0, L_fill_4);
|
|
__ sh(value, Address(to, 0));
|
|
__ addi(to, to, 2);
|
|
__ bind(L_fill_4);
|
|
__ andi(t0, count, 4);
|
|
__ beqz(t0, L_exit2);
|
|
__ sw(value, Address(to, 0));
|
|
break;
|
|
case T_SHORT:
|
|
__ andi(t0, count, 1);
|
|
__ beqz(t0, L_fill_4);
|
|
__ sh(value, Address(to, 0));
|
|
__ addi(to, to, 2);
|
|
__ bind(L_fill_4);
|
|
__ andi(t0, count, 2);
|
|
__ beqz(t0, L_exit2);
|
|
__ sw(value, Address(to, 0));
|
|
break;
|
|
case T_INT:
|
|
__ beqz(count, L_exit2);
|
|
__ sw(value, Address(to, 0));
|
|
break;
|
|
default: ShouldNotReachHere();
|
|
}
|
|
__ bind(L_exit2);
|
|
__ leave();
|
|
__ ret();
|
|
return start;
|
|
}
|
|
|
|
void generate_arraycopy_stubs() {
|
|
address entry = NULL;
|
|
address entry_jbyte_arraycopy = NULL;
|
|
address entry_jshort_arraycopy = NULL;
|
|
address entry_jint_arraycopy = NULL;
|
|
address entry_oop_arraycopy = NULL;
|
|
address entry_jlong_arraycopy = NULL;
|
|
address entry_checkcast_arraycopy = NULL;
|
|
|
|
generate_copy_longs(copy_f, c_rarg0, c_rarg1, t1, copy_forwards);
|
|
generate_copy_longs(copy_b, c_rarg0, c_rarg1, t1, copy_backwards);
|
|
|
|
StubRoutines::riscv::_zero_blocks = generate_zero_blocks();
|
|
|
|
//*** jbyte
|
|
// Always need aligned and unaligned versions
|
|
StubRoutines::_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy(false, &entry,
|
|
"jbyte_disjoint_arraycopy");
|
|
StubRoutines::_jbyte_arraycopy = generate_conjoint_byte_copy(false, entry,
|
|
&entry_jbyte_arraycopy,
|
|
"jbyte_arraycopy");
|
|
StubRoutines::_arrayof_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy(true, &entry,
|
|
"arrayof_jbyte_disjoint_arraycopy");
|
|
StubRoutines::_arrayof_jbyte_arraycopy = generate_conjoint_byte_copy(true, entry, NULL,
|
|
"arrayof_jbyte_arraycopy");
|
|
|
|
//*** jshort
|
|
// Always need aligned and unaligned versions
|
|
StubRoutines::_jshort_disjoint_arraycopy = generate_disjoint_short_copy(false, &entry,
|
|
"jshort_disjoint_arraycopy");
|
|
StubRoutines::_jshort_arraycopy = generate_conjoint_short_copy(false, entry,
|
|
&entry_jshort_arraycopy,
|
|
"jshort_arraycopy");
|
|
StubRoutines::_arrayof_jshort_disjoint_arraycopy = generate_disjoint_short_copy(true, &entry,
|
|
"arrayof_jshort_disjoint_arraycopy");
|
|
StubRoutines::_arrayof_jshort_arraycopy = generate_conjoint_short_copy(true, entry, NULL,
|
|
"arrayof_jshort_arraycopy");
|
|
|
|
//*** jint
|
|
// Aligned versions
|
|
StubRoutines::_arrayof_jint_disjoint_arraycopy = generate_disjoint_int_copy(true, &entry,
|
|
"arrayof_jint_disjoint_arraycopy");
|
|
StubRoutines::_arrayof_jint_arraycopy = generate_conjoint_int_copy(true, entry, &entry_jint_arraycopy,
|
|
"arrayof_jint_arraycopy");
|
|
// In 64 bit we need both aligned and unaligned versions of jint arraycopy.
|
|
// entry_jint_arraycopy always points to the unaligned version
|
|
StubRoutines::_jint_disjoint_arraycopy = generate_disjoint_int_copy(false, &entry,
|
|
"jint_disjoint_arraycopy");
|
|
StubRoutines::_jint_arraycopy = generate_conjoint_int_copy(false, entry,
|
|
&entry_jint_arraycopy,
|
|
"jint_arraycopy");
|
|
|
|
//*** jlong
|
|
// It is always aligned
|
|
StubRoutines::_arrayof_jlong_disjoint_arraycopy = generate_disjoint_long_copy(true, &entry,
|
|
"arrayof_jlong_disjoint_arraycopy");
|
|
StubRoutines::_arrayof_jlong_arraycopy = generate_conjoint_long_copy(true, entry, &entry_jlong_arraycopy,
|
|
"arrayof_jlong_arraycopy");
|
|
StubRoutines::_jlong_disjoint_arraycopy = StubRoutines::_arrayof_jlong_disjoint_arraycopy;
|
|
StubRoutines::_jlong_arraycopy = StubRoutines::_arrayof_jlong_arraycopy;
|
|
|
|
//*** oops
|
|
{
|
|
// With compressed oops we need unaligned versions; notice that
|
|
// we overwrite entry_oop_arraycopy.
|
|
bool aligned = !UseCompressedOops;
|
|
|
|
StubRoutines::_arrayof_oop_disjoint_arraycopy
|
|
= generate_disjoint_oop_copy(aligned, &entry, "arrayof_oop_disjoint_arraycopy",
|
|
/*dest_uninitialized*/false);
|
|
StubRoutines::_arrayof_oop_arraycopy
|
|
= generate_conjoint_oop_copy(aligned, entry, &entry_oop_arraycopy, "arrayof_oop_arraycopy",
|
|
/*dest_uninitialized*/false);
|
|
// Aligned versions without pre-barriers
|
|
StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit
|
|
= generate_disjoint_oop_copy(aligned, &entry, "arrayof_oop_disjoint_arraycopy_uninit",
|
|
/*dest_uninitialized*/true);
|
|
StubRoutines::_arrayof_oop_arraycopy_uninit
|
|
= generate_conjoint_oop_copy(aligned, entry, NULL, "arrayof_oop_arraycopy_uninit",
|
|
/*dest_uninitialized*/true);
|
|
}
|
|
|
|
StubRoutines::_oop_disjoint_arraycopy = StubRoutines::_arrayof_oop_disjoint_arraycopy;
|
|
StubRoutines::_oop_arraycopy = StubRoutines::_arrayof_oop_arraycopy;
|
|
StubRoutines::_oop_disjoint_arraycopy_uninit = StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit;
|
|
StubRoutines::_oop_arraycopy_uninit = StubRoutines::_arrayof_oop_arraycopy_uninit;
|
|
|
|
StubRoutines::_checkcast_arraycopy = generate_checkcast_copy("checkcast_arraycopy", &entry_checkcast_arraycopy);
|
|
StubRoutines::_checkcast_arraycopy_uninit = generate_checkcast_copy("checkcast_arraycopy_uninit", NULL,
|
|
/*dest_uninitialized*/true);
|
|
|
|
|
|
StubRoutines::_unsafe_arraycopy = generate_unsafe_copy("unsafe_arraycopy",
|
|
entry_jbyte_arraycopy,
|
|
entry_jshort_arraycopy,
|
|
entry_jint_arraycopy,
|
|
entry_jlong_arraycopy);
|
|
|
|
StubRoutines::_generic_arraycopy = generate_generic_copy("generic_arraycopy",
|
|
entry_jbyte_arraycopy,
|
|
entry_jshort_arraycopy,
|
|
entry_jint_arraycopy,
|
|
entry_oop_arraycopy,
|
|
entry_jlong_arraycopy,
|
|
entry_checkcast_arraycopy);
|
|
|
|
StubRoutines::_jbyte_fill = generate_fill(T_BYTE, false, "jbyte_fill");
|
|
StubRoutines::_jshort_fill = generate_fill(T_SHORT, false, "jshort_fill");
|
|
StubRoutines::_jint_fill = generate_fill(T_INT, false, "jint_fill");
|
|
StubRoutines::_arrayof_jbyte_fill = generate_fill(T_BYTE, true, "arrayof_jbyte_fill");
|
|
StubRoutines::_arrayof_jshort_fill = generate_fill(T_SHORT, true, "arrayof_jshort_fill");
|
|
StubRoutines::_arrayof_jint_fill = generate_fill(T_INT, true, "arrayof_jint_fill");
|
|
}
|
|
|
|
// code for comparing 16 bytes of strings with same encoding
|
|
void compare_string_16_bytes_same(Label &DIFF1, Label &DIFF2) {
|
|
const Register result = x10, str1 = x11, cnt1 = x12, str2 = x13, tmp1 = x28, tmp2 = x29, tmp4 = x7, tmp5 = x31;
|
|
__ ld(tmp5, Address(str1));
|
|
__ addi(str1, str1, 8);
|
|
__ xorr(tmp4, tmp1, tmp2);
|
|
__ ld(cnt1, Address(str2));
|
|
__ addi(str2, str2, 8);
|
|
__ bnez(tmp4, DIFF1);
|
|
__ ld(tmp1, Address(str1));
|
|
__ addi(str1, str1, 8);
|
|
__ xorr(tmp4, tmp5, cnt1);
|
|
__ ld(tmp2, Address(str2));
|
|
__ addi(str2, str2, 8);
|
|
__ bnez(tmp4, DIFF2);
|
|
}
|
|
|
|
// code for comparing 8 characters of strings with Latin1 and Utf16 encoding
|
|
void compare_string_8_x_LU(Register tmpL, Register tmpU, Label &DIFF1,
|
|
Label &DIFF2) {
|
|
const Register strU = x12, curU = x7, strL = x29, tmp = x30;
|
|
__ ld(tmpL, Address(strL));
|
|
__ addi(strL, strL, 8);
|
|
__ ld(tmpU, Address(strU));
|
|
__ addi(strU, strU, 8);
|
|
__ inflate_lo32(tmp, tmpL);
|
|
__ mv(t0, tmp);
|
|
__ xorr(tmp, curU, t0);
|
|
__ bnez(tmp, DIFF2);
|
|
|
|
__ ld(curU, Address(strU));
|
|
__ addi(strU, strU, 8);
|
|
__ inflate_hi32(tmp, tmpL);
|
|
__ mv(t0, tmp);
|
|
__ xorr(tmp, tmpU, t0);
|
|
__ bnez(tmp, DIFF1);
|
|
}
|
|
|
|
// x10 = result
|
|
// x11 = str1
|
|
// x12 = cnt1
|
|
// x13 = str2
|
|
// x14 = cnt2
|
|
// x28 = tmp1
|
|
// x29 = tmp2
|
|
// x30 = tmp3
|
|
address generate_compare_long_string_different_encoding(bool isLU) {
|
|
__ align(CodeEntryAlignment);
|
|
StubCodeMark mark(this, "StubRoutines", isLU ? "compare_long_string_different_encoding LU" : "compare_long_string_different_encoding UL");
|
|
address entry = __ pc();
|
|
Label SMALL_LOOP, TAIL, TAIL_LOAD_16, LOAD_LAST, DIFF1, DIFF2,
|
|
DONE, CALCULATE_DIFFERENCE;
|
|
const Register result = x10, str1 = x11, cnt1 = x12, str2 = x13, cnt2 = x14,
|
|
tmp1 = x28, tmp2 = x29, tmp3 = x30, tmp4 = x7, tmp5 = x31;
|
|
RegSet spilled_regs = RegSet::of(tmp4, tmp5);
|
|
|
|
// cnt2 == amount of characters left to compare
|
|
// Check already loaded first 4 symbols
|
|
__ inflate_lo32(tmp3, isLU ? tmp1 : tmp2);
|
|
__ mv(isLU ? tmp1 : tmp2, tmp3);
|
|
__ addi(str1, str1, isLU ? wordSize / 2 : wordSize);
|
|
__ addi(str2, str2, isLU ? wordSize : wordSize / 2);
|
|
__ sub(cnt2, cnt2, 8); // Already loaded 4 symbols. Last 4 is special case.
|
|
__ push_reg(spilled_regs, sp);
|
|
|
|
if (isLU) {
|
|
__ add(str1, str1, cnt2);
|
|
__ shadd(str2, cnt2, str2, t0, 1);
|
|
} else {
|
|
__ shadd(str1, cnt2, str1, t0, 1);
|
|
__ add(str2, str2, cnt2);
|
|
}
|
|
__ xorr(tmp3, tmp1, tmp2);
|
|
__ mv(tmp5, tmp2);
|
|
__ bnez(tmp3, CALCULATE_DIFFERENCE);
|
|
|
|
Register strU = isLU ? str2 : str1,
|
|
strL = isLU ? str1 : str2,
|
|
tmpU = isLU ? tmp5 : tmp1, // where to keep U for comparison
|
|
tmpL = isLU ? tmp1 : tmp5; // where to keep L for comparison
|
|
|
|
__ sub(tmp2, strL, cnt2); // strL pointer to load from
|
|
__ slli(t0, cnt2, 1);
|
|
__ sub(cnt1, strU, t0); // strU pointer to load from
|
|
|
|
__ ld(tmp4, Address(cnt1));
|
|
__ addi(cnt1, cnt1, 8);
|
|
__ beqz(cnt2, LOAD_LAST); // no characters left except last load
|
|
__ sub(cnt2, cnt2, 16);
|
|
__ bltz(cnt2, TAIL);
|
|
__ bind(SMALL_LOOP); // smaller loop
|
|
__ sub(cnt2, cnt2, 16);
|
|
compare_string_8_x_LU(tmpL, tmpU, DIFF1, DIFF2);
|
|
compare_string_8_x_LU(tmpL, tmpU, DIFF1, DIFF2);
|
|
__ bgez(cnt2, SMALL_LOOP);
|
|
__ addi(t0, cnt2, 16);
|
|
__ beqz(t0, LOAD_LAST);
|
|
__ bind(TAIL); // 1..15 characters left until last load (last 4 characters)
|
|
// Address of 8 bytes before last 4 characters in UTF-16 string
|
|
__ shadd(cnt1, cnt2, cnt1, t0, 1);
|
|
// Address of 16 bytes before last 4 characters in Latin1 string
|
|
__ add(tmp2, tmp2, cnt2);
|
|
__ ld(tmp4, Address(cnt1, -8));
|
|
// last 16 characters before last load
|
|
compare_string_8_x_LU(tmpL, tmpU, DIFF1, DIFF2);
|
|
compare_string_8_x_LU(tmpL, tmpU, DIFF1, DIFF2);
|
|
__ j(LOAD_LAST);
|
|
__ bind(DIFF2);
|
|
__ mv(tmpU, tmp4);
|
|
__ bind(DIFF1);
|
|
__ mv(tmpL, t0);
|
|
__ j(CALCULATE_DIFFERENCE);
|
|
__ bind(LOAD_LAST);
|
|
// Last 4 UTF-16 characters are already pre-loaded into tmp4 by compare_string_8_x_LU.
|
|
// No need to load it again
|
|
__ mv(tmpU, tmp4);
|
|
__ ld(tmpL, Address(strL));
|
|
__ inflate_lo32(tmp3, tmpL);
|
|
__ mv(tmpL, tmp3);
|
|
__ xorr(tmp3, tmpU, tmpL);
|
|
__ beqz(tmp3, DONE);
|
|
|
|
// Find the first different characters in the longwords and
|
|
// compute their difference.
|
|
__ bind(CALCULATE_DIFFERENCE);
|
|
__ ctzc_bit(tmp4, tmp3);
|
|
__ srl(tmp1, tmp1, tmp4);
|
|
__ srl(tmp5, tmp5, tmp4);
|
|
__ andi(tmp1, tmp1, 0xFFFF);
|
|
__ andi(tmp5, tmp5, 0xFFFF);
|
|
__ sub(result, tmp1, tmp5);
|
|
__ bind(DONE);
|
|
__ pop_reg(spilled_regs, sp);
|
|
__ ret();
|
|
return entry;
|
|
}
|
|
|
|
address generate_method_entry_barrier() {
|
|
__ align(CodeEntryAlignment);
|
|
StubCodeMark mark(this, "StubRoutines", "nmethod_entry_barrier");
|
|
|
|
Label deoptimize_label;
|
|
|
|
address start = __ pc();
|
|
|
|
BarrierSetAssembler* bs_asm = BarrierSet::barrier_set()->barrier_set_assembler();
|
|
|
|
if (bs_asm->nmethod_patching_type() == NMethodPatchingType::conc_instruction_and_data_patch) {
|
|
BarrierSetNMethod* bs_nm = BarrierSet::barrier_set()->barrier_set_nmethod();
|
|
Address thread_epoch_addr(xthread, in_bytes(bs_nm->thread_disarmed_offset()) + 4);
|
|
__ la(t1, ExternalAddress(bs_asm->patching_epoch_addr()));
|
|
__ lwu(t1, t1);
|
|
__ sw(t1, thread_epoch_addr);
|
|
__ membar(__ LoadLoad);
|
|
}
|
|
|
|
__ set_last_Java_frame(sp, fp, ra, t0);
|
|
|
|
__ enter();
|
|
__ add(t1, sp, wordSize);
|
|
|
|
__ sub(sp, sp, 4 * wordSize);
|
|
|
|
__ push_call_clobbered_registers();
|
|
|
|
__ mv(c_rarg0, t1);
|
|
__ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSetNMethod::nmethod_stub_entry_barrier), 1);
|
|
|
|
__ reset_last_Java_frame(true);
|
|
|
|
__ mv(t0, x10);
|
|
|
|
__ pop_call_clobbered_registers();
|
|
|
|
__ bnez(t0, deoptimize_label);
|
|
|
|
__ leave();
|
|
__ ret();
|
|
|
|
__ BIND(deoptimize_label);
|
|
|
|
__ ld(t0, Address(sp, 0));
|
|
__ ld(fp, Address(sp, wordSize));
|
|
__ ld(ra, Address(sp, wordSize * 2));
|
|
__ ld(t1, Address(sp, wordSize * 3));
|
|
|
|
__ mv(sp, t0);
|
|
__ jr(t1);
|
|
|
|
return start;
|
|
}
|
|
|
|
// x10 = result
|
|
// x11 = str1
|
|
// x12 = cnt1
|
|
// x13 = str2
|
|
// x14 = cnt2
|
|
// x28 = tmp1
|
|
// x29 = tmp2
|
|
// x30 = tmp3
|
|
// x31 = tmp4
|
|
address generate_compare_long_string_same_encoding(bool isLL) {
|
|
__ align(CodeEntryAlignment);
|
|
StubCodeMark mark(this, "StubRoutines", isLL ?
|
|
"compare_long_string_same_encoding LL" : "compare_long_string_same_encoding UU");
|
|
address entry = __ pc();
|
|
Label SMALL_LOOP, CHECK_LAST, DIFF2, TAIL,
|
|
LENGTH_DIFF, DIFF, LAST_CHECK_AND_LENGTH_DIFF;
|
|
const Register result = x10, str1 = x11, cnt1 = x12, str2 = x13, cnt2 = x14,
|
|
tmp1 = x28, tmp2 = x29, tmp3 = x30, tmp4 = x7, tmp5 = x31;
|
|
RegSet spilled_regs = RegSet::of(tmp4, tmp5);
|
|
|
|
// cnt1/cnt2 contains amount of characters to compare. cnt1 can be re-used
|
|
// update cnt2 counter with already loaded 8 bytes
|
|
__ sub(cnt2, cnt2, wordSize / (isLL ? 1 : 2));
|
|
// update pointers, because of previous read
|
|
__ add(str1, str1, wordSize);
|
|
__ add(str2, str2, wordSize);
|
|
// less than 16 bytes left?
|
|
__ sub(cnt2, cnt2, isLL ? 16 : 8);
|
|
__ push_reg(spilled_regs, sp);
|
|
__ bltz(cnt2, TAIL);
|
|
__ bind(SMALL_LOOP);
|
|
compare_string_16_bytes_same(DIFF, DIFF2);
|
|
__ sub(cnt2, cnt2, isLL ? 16 : 8);
|
|
__ bgez(cnt2, SMALL_LOOP);
|
|
__ bind(TAIL);
|
|
__ addi(cnt2, cnt2, isLL ? 16 : 8);
|
|
__ beqz(cnt2, LAST_CHECK_AND_LENGTH_DIFF);
|
|
__ sub(cnt2, cnt2, isLL ? 8 : 4);
|
|
__ blez(cnt2, CHECK_LAST);
|
|
__ xorr(tmp4, tmp1, tmp2);
|
|
__ bnez(tmp4, DIFF);
|
|
__ ld(tmp1, Address(str1));
|
|
__ addi(str1, str1, 8);
|
|
__ ld(tmp2, Address(str2));
|
|
__ addi(str2, str2, 8);
|
|
__ sub(cnt2, cnt2, isLL ? 8 : 4);
|
|
__ bind(CHECK_LAST);
|
|
if (!isLL) {
|
|
__ add(cnt2, cnt2, cnt2); // now in bytes
|
|
}
|
|
__ xorr(tmp4, tmp1, tmp2);
|
|
__ bnez(tmp4, DIFF);
|
|
__ add(str1, str1, cnt2);
|
|
__ ld(tmp5, Address(str1));
|
|
__ add(str2, str2, cnt2);
|
|
__ ld(cnt1, Address(str2));
|
|
__ xorr(tmp4, tmp5, cnt1);
|
|
__ beqz(tmp4, LENGTH_DIFF);
|
|
// Find the first different characters in the longwords and
|
|
// compute their difference.
|
|
__ bind(DIFF2);
|
|
__ ctzc_bit(tmp3, tmp4, isLL); // count zero from lsb to msb
|
|
__ srl(tmp5, tmp5, tmp3);
|
|
__ srl(cnt1, cnt1, tmp3);
|
|
if (isLL) {
|
|
__ andi(tmp5, tmp5, 0xFF);
|
|
__ andi(cnt1, cnt1, 0xFF);
|
|
} else {
|
|
__ andi(tmp5, tmp5, 0xFFFF);
|
|
__ andi(cnt1, cnt1, 0xFFFF);
|
|
}
|
|
__ sub(result, tmp5, cnt1);
|
|
__ j(LENGTH_DIFF);
|
|
__ bind(DIFF);
|
|
__ ctzc_bit(tmp3, tmp4, isLL); // count zero from lsb to msb
|
|
__ srl(tmp1, tmp1, tmp3);
|
|
__ srl(tmp2, tmp2, tmp3);
|
|
if (isLL) {
|
|
__ andi(tmp1, tmp1, 0xFF);
|
|
__ andi(tmp2, tmp2, 0xFF);
|
|
} else {
|
|
__ andi(tmp1, tmp1, 0xFFFF);
|
|
__ andi(tmp2, tmp2, 0xFFFF);
|
|
}
|
|
__ sub(result, tmp1, tmp2);
|
|
__ j(LENGTH_DIFF);
|
|
__ bind(LAST_CHECK_AND_LENGTH_DIFF);
|
|
__ xorr(tmp4, tmp1, tmp2);
|
|
__ bnez(tmp4, DIFF);
|
|
__ bind(LENGTH_DIFF);
|
|
__ pop_reg(spilled_regs, sp);
|
|
__ ret();
|
|
return entry;
|
|
}
|
|
|
|
void generate_compare_long_strings() {
|
|
StubRoutines::riscv::_compare_long_string_LL = generate_compare_long_string_same_encoding(true);
|
|
StubRoutines::riscv::_compare_long_string_UU = generate_compare_long_string_same_encoding(false);
|
|
StubRoutines::riscv::_compare_long_string_LU = generate_compare_long_string_different_encoding(true);
|
|
StubRoutines::riscv::_compare_long_string_UL = generate_compare_long_string_different_encoding(false);
|
|
}
|
|
|
|
// x10 result
|
|
// x11 src
|
|
// x12 src count
|
|
// x13 pattern
|
|
// x14 pattern count
|
|
address generate_string_indexof_linear(bool needle_isL, bool haystack_isL)
|
|
{
|
|
const char* stubName = needle_isL
|
|
? (haystack_isL ? "indexof_linear_ll" : "indexof_linear_ul")
|
|
: "indexof_linear_uu";
|
|
__ align(CodeEntryAlignment);
|
|
StubCodeMark mark(this, "StubRoutines", stubName);
|
|
address entry = __ pc();
|
|
|
|
int needle_chr_size = needle_isL ? 1 : 2;
|
|
int haystack_chr_size = haystack_isL ? 1 : 2;
|
|
int needle_chr_shift = needle_isL ? 0 : 1;
|
|
int haystack_chr_shift = haystack_isL ? 0 : 1;
|
|
bool isL = needle_isL && haystack_isL;
|
|
// parameters
|
|
Register result = x10, haystack = x11, haystack_len = x12, needle = x13, needle_len = x14;
|
|
// temporary registers
|
|
Register mask1 = x20, match_mask = x21, first = x22, trailing_zeros = x23, mask2 = x24, tmp = x25;
|
|
// redefinitions
|
|
Register ch1 = x28, ch2 = x29;
|
|
RegSet spilled_regs = RegSet::range(x20, x25) + RegSet::range(x28, x29);
|
|
|
|
__ push_reg(spilled_regs, sp);
|
|
|
|
Label L_LOOP, L_LOOP_PROCEED, L_SMALL, L_HAS_ZERO,
|
|
L_HAS_ZERO_LOOP, L_CMP_LOOP, L_CMP_LOOP_NOMATCH, L_SMALL_PROCEED,
|
|
L_SMALL_HAS_ZERO_LOOP, L_SMALL_CMP_LOOP_NOMATCH, L_SMALL_CMP_LOOP,
|
|
L_POST_LOOP, L_CMP_LOOP_LAST_CMP, L_HAS_ZERO_LOOP_NOMATCH,
|
|
L_SMALL_CMP_LOOP_LAST_CMP, L_SMALL_CMP_LOOP_LAST_CMP2,
|
|
L_CMP_LOOP_LAST_CMP2, DONE, NOMATCH;
|
|
|
|
__ ld(ch1, Address(needle));
|
|
__ ld(ch2, Address(haystack));
|
|
// src.length - pattern.length
|
|
__ sub(haystack_len, haystack_len, needle_len);
|
|
|
|
// first is needle[0]
|
|
__ andi(first, ch1, needle_isL ? 0xFF : 0xFFFF, first);
|
|
uint64_t mask0101 = UCONST64(0x0101010101010101);
|
|
uint64_t mask0001 = UCONST64(0x0001000100010001);
|
|
__ mv(mask1, haystack_isL ? mask0101 : mask0001);
|
|
__ mul(first, first, mask1);
|
|
uint64_t mask7f7f = UCONST64(0x7f7f7f7f7f7f7f7f);
|
|
uint64_t mask7fff = UCONST64(0x7fff7fff7fff7fff);
|
|
__ mv(mask2, haystack_isL ? mask7f7f : mask7fff);
|
|
if (needle_isL != haystack_isL) {
|
|
__ mv(tmp, ch1);
|
|
}
|
|
__ sub(haystack_len, haystack_len, wordSize / haystack_chr_size - 1);
|
|
__ blez(haystack_len, L_SMALL);
|
|
|
|
if (needle_isL != haystack_isL) {
|
|
__ inflate_lo32(ch1, tmp, match_mask, trailing_zeros);
|
|
}
|
|
// xorr, sub, orr, notr, andr
|
|
// compare and set match_mask[i] with 0x80/0x8000 (Latin1/UTF16) if ch2[i] == first[i]
|
|
// eg:
|
|
// first: aa aa aa aa aa aa aa aa
|
|
// ch2: aa aa li nx jd ka aa aa
|
|
// match_mask: 80 80 00 00 00 00 80 80
|
|
__ compute_match_mask(ch2, first, match_mask, mask1, mask2);
|
|
|
|
// search first char of needle, if success, goto L_HAS_ZERO;
|
|
__ bnez(match_mask, L_HAS_ZERO);
|
|
__ sub(haystack_len, haystack_len, wordSize / haystack_chr_size);
|
|
__ add(result, result, wordSize / haystack_chr_size);
|
|
__ add(haystack, haystack, wordSize);
|
|
__ bltz(haystack_len, L_POST_LOOP);
|
|
|
|
__ bind(L_LOOP);
|
|
__ ld(ch2, Address(haystack));
|
|
__ compute_match_mask(ch2, first, match_mask, mask1, mask2);
|
|
__ bnez(match_mask, L_HAS_ZERO);
|
|
|
|
__ bind(L_LOOP_PROCEED);
|
|
__ sub(haystack_len, haystack_len, wordSize / haystack_chr_size);
|
|
__ add(haystack, haystack, wordSize);
|
|
__ add(result, result, wordSize / haystack_chr_size);
|
|
__ bgez(haystack_len, L_LOOP);
|
|
|
|
__ bind(L_POST_LOOP);
|
|
__ mv(ch2, -wordSize / haystack_chr_size);
|
|
__ ble(haystack_len, ch2, NOMATCH); // no extra characters to check
|
|
__ ld(ch2, Address(haystack));
|
|
__ slli(haystack_len, haystack_len, LogBitsPerByte + haystack_chr_shift);
|
|
__ neg(haystack_len, haystack_len);
|
|
__ xorr(ch2, first, ch2);
|
|
__ sub(match_mask, ch2, mask1);
|
|
__ orr(ch2, ch2, mask2);
|
|
__ mv(trailing_zeros, -1); // all bits set
|
|
__ j(L_SMALL_PROCEED);
|
|
|
|
__ align(OptoLoopAlignment);
|
|
__ bind(L_SMALL);
|
|
__ slli(haystack_len, haystack_len, LogBitsPerByte + haystack_chr_shift);
|
|
__ neg(haystack_len, haystack_len);
|
|
if (needle_isL != haystack_isL) {
|
|
__ inflate_lo32(ch1, tmp, match_mask, trailing_zeros);
|
|
}
|
|
__ xorr(ch2, first, ch2);
|
|
__ sub(match_mask, ch2, mask1);
|
|
__ orr(ch2, ch2, mask2);
|
|
__ mv(trailing_zeros, -1); // all bits set
|
|
|
|
__ bind(L_SMALL_PROCEED);
|
|
__ srl(trailing_zeros, trailing_zeros, haystack_len); // mask. zeroes on useless bits.
|
|
__ notr(ch2, ch2);
|
|
__ andr(match_mask, match_mask, ch2);
|
|
__ andr(match_mask, match_mask, trailing_zeros); // clear useless bits and check
|
|
__ beqz(match_mask, NOMATCH);
|
|
|
|
__ bind(L_SMALL_HAS_ZERO_LOOP);
|
|
__ ctzc_bit(trailing_zeros, match_mask, haystack_isL, ch2, tmp); // count trailing zeros
|
|
__ addi(trailing_zeros, trailing_zeros, haystack_isL ? 7 : 15);
|
|
__ mv(ch2, wordSize / haystack_chr_size);
|
|
__ ble(needle_len, ch2, L_SMALL_CMP_LOOP_LAST_CMP2);
|
|
__ compute_index(haystack, trailing_zeros, match_mask, result, ch2, tmp, haystack_isL);
|
|
__ mv(trailing_zeros, wordSize / haystack_chr_size);
|
|
__ bne(ch1, ch2, L_SMALL_CMP_LOOP_NOMATCH);
|
|
|
|
__ bind(L_SMALL_CMP_LOOP);
|
|
__ shadd(first, trailing_zeros, needle, first, needle_chr_shift);
|
|
__ shadd(ch2, trailing_zeros, haystack, ch2, haystack_chr_shift);
|
|
needle_isL ? __ lbu(first, Address(first)) : __ lhu(first, Address(first));
|
|
haystack_isL ? __ lbu(ch2, Address(ch2)) : __ lhu(ch2, Address(ch2));
|
|
__ add(trailing_zeros, trailing_zeros, 1);
|
|
__ bge(trailing_zeros, needle_len, L_SMALL_CMP_LOOP_LAST_CMP);
|
|
__ beq(first, ch2, L_SMALL_CMP_LOOP);
|
|
|
|
__ bind(L_SMALL_CMP_LOOP_NOMATCH);
|
|
__ beqz(match_mask, NOMATCH);
|
|
__ ctzc_bit(trailing_zeros, match_mask, haystack_isL, tmp, ch2);
|
|
__ addi(trailing_zeros, trailing_zeros, haystack_isL ? 7 : 15);
|
|
__ add(result, result, 1);
|
|
__ add(haystack, haystack, haystack_chr_size);
|
|
__ j(L_SMALL_HAS_ZERO_LOOP);
|
|
|
|
__ align(OptoLoopAlignment);
|
|
__ bind(L_SMALL_CMP_LOOP_LAST_CMP);
|
|
__ bne(first, ch2, L_SMALL_CMP_LOOP_NOMATCH);
|
|
__ j(DONE);
|
|
|
|
__ align(OptoLoopAlignment);
|
|
__ bind(L_SMALL_CMP_LOOP_LAST_CMP2);
|
|
__ compute_index(haystack, trailing_zeros, match_mask, result, ch2, tmp, haystack_isL);
|
|
__ bne(ch1, ch2, L_SMALL_CMP_LOOP_NOMATCH);
|
|
__ j(DONE);
|
|
|
|
__ align(OptoLoopAlignment);
|
|
__ bind(L_HAS_ZERO);
|
|
__ ctzc_bit(trailing_zeros, match_mask, haystack_isL, tmp, ch2);
|
|
__ addi(trailing_zeros, trailing_zeros, haystack_isL ? 7 : 15);
|
|
__ slli(needle_len, needle_len, BitsPerByte * wordSize / 2);
|
|
__ orr(haystack_len, haystack_len, needle_len); // restore needle_len(32bits)
|
|
__ sub(result, result, 1); // array index from 0, so result -= 1
|
|
|
|
__ bind(L_HAS_ZERO_LOOP);
|
|
__ mv(needle_len, wordSize / haystack_chr_size);
|
|
__ srli(ch2, haystack_len, BitsPerByte * wordSize / 2);
|
|
__ bge(needle_len, ch2, L_CMP_LOOP_LAST_CMP2);
|
|
// load next 8 bytes from haystack, and increase result index
|
|
__ compute_index(haystack, trailing_zeros, match_mask, result, ch2, tmp, haystack_isL);
|
|
__ add(result, result, 1);
|
|
__ mv(trailing_zeros, wordSize / haystack_chr_size);
|
|
__ bne(ch1, ch2, L_CMP_LOOP_NOMATCH);
|
|
|
|
// compare one char
|
|
__ bind(L_CMP_LOOP);
|
|
__ shadd(needle_len, trailing_zeros, needle, needle_len, needle_chr_shift);
|
|
needle_isL ? __ lbu(needle_len, Address(needle_len)) : __ lhu(needle_len, Address(needle_len));
|
|
__ shadd(ch2, trailing_zeros, haystack, ch2, haystack_chr_shift);
|
|
haystack_isL ? __ lbu(ch2, Address(ch2)) : __ lhu(ch2, Address(ch2));
|
|
__ add(trailing_zeros, trailing_zeros, 1); // next char index
|
|
__ srli(tmp, haystack_len, BitsPerByte * wordSize / 2);
|
|
__ bge(trailing_zeros, tmp, L_CMP_LOOP_LAST_CMP);
|
|
__ beq(needle_len, ch2, L_CMP_LOOP);
|
|
|
|
__ bind(L_CMP_LOOP_NOMATCH);
|
|
__ beqz(match_mask, L_HAS_ZERO_LOOP_NOMATCH);
|
|
__ ctzc_bit(trailing_zeros, match_mask, haystack_isL, needle_len, ch2); // find next "first" char index
|
|
__ addi(trailing_zeros, trailing_zeros, haystack_isL ? 7 : 15);
|
|
__ add(haystack, haystack, haystack_chr_size);
|
|
__ j(L_HAS_ZERO_LOOP);
|
|
|
|
__ align(OptoLoopAlignment);
|
|
__ bind(L_CMP_LOOP_LAST_CMP);
|
|
__ bne(needle_len, ch2, L_CMP_LOOP_NOMATCH);
|
|
__ j(DONE);
|
|
|
|
__ align(OptoLoopAlignment);
|
|
__ bind(L_CMP_LOOP_LAST_CMP2);
|
|
__ compute_index(haystack, trailing_zeros, match_mask, result, ch2, tmp, haystack_isL);
|
|
__ add(result, result, 1);
|
|
__ bne(ch1, ch2, L_CMP_LOOP_NOMATCH);
|
|
__ j(DONE);
|
|
|
|
__ align(OptoLoopAlignment);
|
|
__ bind(L_HAS_ZERO_LOOP_NOMATCH);
|
|
// 1) Restore "result" index. Index was wordSize/str2_chr_size * N until
|
|
// L_HAS_ZERO block. Byte octet was analyzed in L_HAS_ZERO_LOOP,
|
|
// so, result was increased at max by wordSize/str2_chr_size - 1, so,
|
|
// respective high bit wasn't changed. L_LOOP_PROCEED will increase
|
|
// result by analyzed characters value, so, we can just reset lower bits
|
|
// in result here. Clear 2 lower bits for UU/UL and 3 bits for LL
|
|
// 2) restore needle_len and haystack_len values from "compressed" haystack_len
|
|
// 3) advance haystack value to represent next haystack octet. result & 7/3 is
|
|
// index of last analyzed substring inside current octet. So, haystack in at
|
|
// respective start address. We need to advance it to next octet
|
|
__ andi(match_mask, result, wordSize / haystack_chr_size - 1);
|
|
__ srli(needle_len, haystack_len, BitsPerByte * wordSize / 2);
|
|
__ andi(result, result, haystack_isL ? -8 : -4);
|
|
__ slli(tmp, match_mask, haystack_chr_shift);
|
|
__ sub(haystack, haystack, tmp);
|
|
__ addw(haystack_len, haystack_len, zr);
|
|
__ j(L_LOOP_PROCEED);
|
|
|
|
__ align(OptoLoopAlignment);
|
|
__ bind(NOMATCH);
|
|
__ mv(result, -1);
|
|
|
|
__ bind(DONE);
|
|
__ pop_reg(spilled_regs, sp);
|
|
__ ret();
|
|
return entry;
|
|
}
|
|
|
|
void generate_string_indexof_stubs()
|
|
{
|
|
StubRoutines::riscv::_string_indexof_linear_ll = generate_string_indexof_linear(true, true);
|
|
StubRoutines::riscv::_string_indexof_linear_uu = generate_string_indexof_linear(false, false);
|
|
StubRoutines::riscv::_string_indexof_linear_ul = generate_string_indexof_linear(true, false);
|
|
}
|
|
|
|
#ifdef COMPILER2
|
|
address generate_mulAdd()
|
|
{
|
|
__ align(CodeEntryAlignment);
|
|
StubCodeMark mark(this, "StubRoutines", "mulAdd");
|
|
|
|
address entry = __ pc();
|
|
|
|
const Register out = x10;
|
|
const Register in = x11;
|
|
const Register offset = x12;
|
|
const Register len = x13;
|
|
const Register k = x14;
|
|
const Register tmp = x28;
|
|
|
|
BLOCK_COMMENT("Entry:");
|
|
__ enter();
|
|
__ mul_add(out, in, offset, len, k, tmp);
|
|
__ leave();
|
|
__ ret();
|
|
|
|
return entry;
|
|
}
|
|
|
|
/**
|
|
* Arguments:
|
|
*
|
|
* Input:
|
|
* c_rarg0 - x address
|
|
* c_rarg1 - x length
|
|
* c_rarg2 - y address
|
|
* c_rarg3 - y length
|
|
* c_rarg4 - z address
|
|
* c_rarg5 - z length
|
|
*/
|
|
address generate_multiplyToLen()
|
|
{
|
|
__ align(CodeEntryAlignment);
|
|
StubCodeMark mark(this, "StubRoutines", "multiplyToLen");
|
|
address entry = __ pc();
|
|
|
|
const Register x = x10;
|
|
const Register xlen = x11;
|
|
const Register y = x12;
|
|
const Register ylen = x13;
|
|
const Register z = x14;
|
|
const Register zlen = x15;
|
|
|
|
const Register tmp1 = x16;
|
|
const Register tmp2 = x17;
|
|
const Register tmp3 = x7;
|
|
const Register tmp4 = x28;
|
|
const Register tmp5 = x29;
|
|
const Register tmp6 = x30;
|
|
const Register tmp7 = x31;
|
|
|
|
BLOCK_COMMENT("Entry:");
|
|
__ enter(); // required for proper stackwalking of RuntimeStub frame
|
|
__ multiply_to_len(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7);
|
|
__ leave(); // required for proper stackwalking of RuntimeStub frame
|
|
__ ret();
|
|
|
|
return entry;
|
|
}
|
|
|
|
address generate_squareToLen()
|
|
{
|
|
__ align(CodeEntryAlignment);
|
|
StubCodeMark mark(this, "StubRoutines", "squareToLen");
|
|
address entry = __ pc();
|
|
|
|
const Register x = x10;
|
|
const Register xlen = x11;
|
|
const Register z = x12;
|
|
const Register zlen = x13;
|
|
const Register y = x14; // == x
|
|
const Register ylen = x15; // == xlen
|
|
|
|
const Register tmp1 = x16;
|
|
const Register tmp2 = x17;
|
|
const Register tmp3 = x7;
|
|
const Register tmp4 = x28;
|
|
const Register tmp5 = x29;
|
|
const Register tmp6 = x30;
|
|
const Register tmp7 = x31;
|
|
|
|
BLOCK_COMMENT("Entry:");
|
|
__ enter();
|
|
__ mv(y, x);
|
|
__ mv(ylen, xlen);
|
|
__ multiply_to_len(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7);
|
|
__ leave();
|
|
__ ret();
|
|
|
|
return entry;
|
|
}
|
|
|
|
// Arguments:
|
|
//
|
|
// Input:
|
|
// c_rarg0 - newArr address
|
|
// c_rarg1 - oldArr address
|
|
// c_rarg2 - newIdx
|
|
// c_rarg3 - shiftCount
|
|
// c_rarg4 - numIter
|
|
//
|
|
address generate_bigIntegerLeftShift() {
|
|
__ align(CodeEntryAlignment);
|
|
StubCodeMark mark(this, "StubRoutines", "bigIntegerLeftShiftWorker");
|
|
address entry = __ pc();
|
|
|
|
Label loop, exit;
|
|
|
|
Register newArr = c_rarg0;
|
|
Register oldArr = c_rarg1;
|
|
Register newIdx = c_rarg2;
|
|
Register shiftCount = c_rarg3;
|
|
Register numIter = c_rarg4;
|
|
|
|
Register shiftRevCount = c_rarg5;
|
|
Register oldArrNext = t1;
|
|
|
|
__ beqz(numIter, exit);
|
|
__ shadd(newArr, newIdx, newArr, t0, 2);
|
|
|
|
__ mv(shiftRevCount, 32);
|
|
__ sub(shiftRevCount, shiftRevCount, shiftCount);
|
|
|
|
__ bind(loop);
|
|
__ addi(oldArrNext, oldArr, 4);
|
|
__ vsetvli(t0, numIter, Assembler::e32, Assembler::m4);
|
|
__ vle32_v(v0, oldArr);
|
|
__ vle32_v(v4, oldArrNext);
|
|
__ vsll_vx(v0, v0, shiftCount);
|
|
__ vsrl_vx(v4, v4, shiftRevCount);
|
|
__ vor_vv(v0, v0, v4);
|
|
__ vse32_v(v0, newArr);
|
|
__ sub(numIter, numIter, t0);
|
|
__ shadd(oldArr, t0, oldArr, t1, 2);
|
|
__ shadd(newArr, t0, newArr, t1, 2);
|
|
__ bnez(numIter, loop);
|
|
|
|
__ bind(exit);
|
|
__ ret();
|
|
|
|
return entry;
|
|
}
|
|
|
|
// Arguments:
|
|
//
|
|
// Input:
|
|
// c_rarg0 - newArr address
|
|
// c_rarg1 - oldArr address
|
|
// c_rarg2 - newIdx
|
|
// c_rarg3 - shiftCount
|
|
// c_rarg4 - numIter
|
|
//
|
|
address generate_bigIntegerRightShift() {
|
|
__ align(CodeEntryAlignment);
|
|
StubCodeMark mark(this, "StubRoutines", "bigIntegerRightShiftWorker");
|
|
address entry = __ pc();
|
|
|
|
Label loop, exit;
|
|
|
|
Register newArr = c_rarg0;
|
|
Register oldArr = c_rarg1;
|
|
Register newIdx = c_rarg2;
|
|
Register shiftCount = c_rarg3;
|
|
Register numIter = c_rarg4;
|
|
Register idx = numIter;
|
|
|
|
Register shiftRevCount = c_rarg5;
|
|
Register oldArrNext = c_rarg6;
|
|
Register newArrCur = t0;
|
|
Register oldArrCur = t1;
|
|
|
|
__ beqz(idx, exit);
|
|
__ shadd(newArr, newIdx, newArr, t0, 2);
|
|
|
|
__ mv(shiftRevCount, 32);
|
|
__ sub(shiftRevCount, shiftRevCount, shiftCount);
|
|
|
|
__ bind(loop);
|
|
__ vsetvli(t0, idx, Assembler::e32, Assembler::m4);
|
|
__ sub(idx, idx, t0);
|
|
__ shadd(oldArrNext, idx, oldArr, t1, 2);
|
|
__ shadd(newArrCur, idx, newArr, t1, 2);
|
|
__ addi(oldArrCur, oldArrNext, 4);
|
|
__ vle32_v(v0, oldArrCur);
|
|
__ vle32_v(v4, oldArrNext);
|
|
__ vsrl_vx(v0, v0, shiftCount);
|
|
__ vsll_vx(v4, v4, shiftRevCount);
|
|
__ vor_vv(v0, v0, v4);
|
|
__ vse32_v(v0, newArrCur);
|
|
__ bnez(idx, loop);
|
|
|
|
__ bind(exit);
|
|
__ ret();
|
|
|
|
return entry;
|
|
}
|
|
#endif
|
|
|
|
#ifdef COMPILER2
|
|
class MontgomeryMultiplyGenerator : public MacroAssembler {
|
|
|
|
Register Pa_base, Pb_base, Pn_base, Pm_base, inv, Rlen, Ra, Rb, Rm, Rn,
|
|
Pa, Pb, Pn, Pm, Rhi_ab, Rlo_ab, Rhi_mn, Rlo_mn, tmp0, tmp1, tmp2, Ri, Rj;
|
|
|
|
RegSet _toSave;
|
|
bool _squaring;
|
|
|
|
public:
|
|
MontgomeryMultiplyGenerator (Assembler *as, bool squaring)
|
|
: MacroAssembler(as->code()), _squaring(squaring) {
|
|
|
|
// Register allocation
|
|
|
|
RegSetIterator<Register> regs = RegSet::range(x10, x26).begin();
|
|
Pa_base = *regs; // Argument registers
|
|
if (squaring) {
|
|
Pb_base = Pa_base;
|
|
} else {
|
|
Pb_base = *++regs;
|
|
}
|
|
Pn_base = *++regs;
|
|
Rlen= *++regs;
|
|
inv = *++regs;
|
|
Pm_base = *++regs;
|
|
|
|
// Working registers:
|
|
Ra = *++regs; // The current digit of a, b, n, and m.
|
|
Rb = *++regs;
|
|
Rm = *++regs;
|
|
Rn = *++regs;
|
|
|
|
Pa = *++regs; // Pointers to the current/next digit of a, b, n, and m.
|
|
Pb = *++regs;
|
|
Pm = *++regs;
|
|
Pn = *++regs;
|
|
|
|
tmp0 = *++regs; // Three registers which form a
|
|
tmp1 = *++regs; // triple-precision accumuator.
|
|
tmp2 = *++regs;
|
|
|
|
Ri = x6; // Inner and outer loop indexes.
|
|
Rj = x7;
|
|
|
|
Rhi_ab = x28; // Product registers: low and high parts
|
|
Rlo_ab = x29; // of a*b and m*n.
|
|
Rhi_mn = x30;
|
|
Rlo_mn = x31;
|
|
|
|
// x18 and up are callee-saved.
|
|
_toSave = RegSet::range(x18, *regs) + Pm_base;
|
|
}
|
|
|
|
private:
|
|
void save_regs() {
|
|
push_reg(_toSave, sp);
|
|
}
|
|
|
|
void restore_regs() {
|
|
pop_reg(_toSave, sp);
|
|
}
|
|
|
|
template <typename T>
|
|
void unroll_2(Register count, T block) {
|
|
Label loop, end, odd;
|
|
beqz(count, end);
|
|
andi(t0, count, 0x1);
|
|
bnez(t0, odd);
|
|
align(16);
|
|
bind(loop);
|
|
(this->*block)();
|
|
bind(odd);
|
|
(this->*block)();
|
|
addi(count, count, -2);
|
|
bgtz(count, loop);
|
|
bind(end);
|
|
}
|
|
|
|
template <typename T>
|
|
void unroll_2(Register count, T block, Register d, Register s, Register tmp) {
|
|
Label loop, end, odd;
|
|
beqz(count, end);
|
|
andi(tmp, count, 0x1);
|
|
bnez(tmp, odd);
|
|
align(16);
|
|
bind(loop);
|
|
(this->*block)(d, s, tmp);
|
|
bind(odd);
|
|
(this->*block)(d, s, tmp);
|
|
addi(count, count, -2);
|
|
bgtz(count, loop);
|
|
bind(end);
|
|
}
|
|
|
|
void pre1(RegisterOrConstant i) {
|
|
block_comment("pre1");
|
|
// Pa = Pa_base;
|
|
// Pb = Pb_base + i;
|
|
// Pm = Pm_base;
|
|
// Pn = Pn_base + i;
|
|
// Ra = *Pa;
|
|
// Rb = *Pb;
|
|
// Rm = *Pm;
|
|
// Rn = *Pn;
|
|
if (i.is_register()) {
|
|
slli(t0, i.as_register(), LogBytesPerWord);
|
|
} else {
|
|
mv(t0, i.as_constant());
|
|
slli(t0, t0, LogBytesPerWord);
|
|
}
|
|
|
|
mv(Pa, Pa_base);
|
|
add(Pb, Pb_base, t0);
|
|
mv(Pm, Pm_base);
|
|
add(Pn, Pn_base, t0);
|
|
|
|
ld(Ra, Address(Pa));
|
|
ld(Rb, Address(Pb));
|
|
ld(Rm, Address(Pm));
|
|
ld(Rn, Address(Pn));
|
|
|
|
// Zero the m*n result.
|
|
mv(Rhi_mn, zr);
|
|
mv(Rlo_mn, zr);
|
|
}
|
|
|
|
// The core multiply-accumulate step of a Montgomery
|
|
// multiplication. The idea is to schedule operations as a
|
|
// pipeline so that instructions with long latencies (loads and
|
|
// multiplies) have time to complete before their results are
|
|
// used. This most benefits in-order implementations of the
|
|
// architecture but out-of-order ones also benefit.
|
|
void step() {
|
|
block_comment("step");
|
|
// MACC(Ra, Rb, tmp0, tmp1, tmp2);
|
|
// Ra = *++Pa;
|
|
// Rb = *--Pb;
|
|
mulhu(Rhi_ab, Ra, Rb);
|
|
mul(Rlo_ab, Ra, Rb);
|
|
addi(Pa, Pa, wordSize);
|
|
ld(Ra, Address(Pa));
|
|
addi(Pb, Pb, -wordSize);
|
|
ld(Rb, Address(Pb));
|
|
acc(Rhi_mn, Rlo_mn, tmp0, tmp1, tmp2); // The pending m*n from the
|
|
// previous iteration.
|
|
// MACC(Rm, Rn, tmp0, tmp1, tmp2);
|
|
// Rm = *++Pm;
|
|
// Rn = *--Pn;
|
|
mulhu(Rhi_mn, Rm, Rn);
|
|
mul(Rlo_mn, Rm, Rn);
|
|
addi(Pm, Pm, wordSize);
|
|
ld(Rm, Address(Pm));
|
|
addi(Pn, Pn, -wordSize);
|
|
ld(Rn, Address(Pn));
|
|
acc(Rhi_ab, Rlo_ab, tmp0, tmp1, tmp2);
|
|
}
|
|
|
|
void post1() {
|
|
block_comment("post1");
|
|
|
|
// MACC(Ra, Rb, tmp0, tmp1, tmp2);
|
|
// Ra = *++Pa;
|
|
// Rb = *--Pb;
|
|
mulhu(Rhi_ab, Ra, Rb);
|
|
mul(Rlo_ab, Ra, Rb);
|
|
acc(Rhi_mn, Rlo_mn, tmp0, tmp1, tmp2); // The pending m*n
|
|
acc(Rhi_ab, Rlo_ab, tmp0, tmp1, tmp2);
|
|
|
|
// *Pm = Rm = tmp0 * inv;
|
|
mul(Rm, tmp0, inv);
|
|
sd(Rm, Address(Pm));
|
|
|
|
// MACC(Rm, Rn, tmp0, tmp1, tmp2);
|
|
// tmp0 = tmp1; tmp1 = tmp2; tmp2 = 0;
|
|
mulhu(Rhi_mn, Rm, Rn);
|
|
|
|
#ifndef PRODUCT
|
|
// assert(m[i] * n[0] + tmp0 == 0, "broken Montgomery multiply");
|
|
{
|
|
mul(Rlo_mn, Rm, Rn);
|
|
add(Rlo_mn, tmp0, Rlo_mn);
|
|
Label ok;
|
|
beqz(Rlo_mn, ok);
|
|
stop("broken Montgomery multiply");
|
|
bind(ok);
|
|
}
|
|
#endif
|
|
// We have very carefully set things up so that
|
|
// m[i]*n[0] + tmp0 == 0 (mod b), so we don't have to calculate
|
|
// the lower half of Rm * Rn because we know the result already:
|
|
// it must be -tmp0. tmp0 + (-tmp0) must generate a carry iff
|
|
// tmp0 != 0. So, rather than do a mul and an cad we just set
|
|
// the carry flag iff tmp0 is nonzero.
|
|
//
|
|
// mul(Rlo_mn, Rm, Rn);
|
|
// cad(zr, tmp0, Rlo_mn);
|
|
addi(t0, tmp0, -1);
|
|
sltu(t0, t0, tmp0); // Set carry iff tmp0 is nonzero
|
|
cadc(tmp0, tmp1, Rhi_mn, t0);
|
|
adc(tmp1, tmp2, zr, t0);
|
|
mv(tmp2, zr);
|
|
}
|
|
|
|
void pre2(Register i, Register len) {
|
|
block_comment("pre2");
|
|
// Pa = Pa_base + i-len;
|
|
// Pb = Pb_base + len;
|
|
// Pm = Pm_base + i-len;
|
|
// Pn = Pn_base + len;
|
|
|
|
sub(Rj, i, len);
|
|
// Rj == i-len
|
|
|
|
// Ra as temp register
|
|
slli(Ra, Rj, LogBytesPerWord);
|
|
add(Pa, Pa_base, Ra);
|
|
add(Pm, Pm_base, Ra);
|
|
slli(Ra, len, LogBytesPerWord);
|
|
add(Pb, Pb_base, Ra);
|
|
add(Pn, Pn_base, Ra);
|
|
|
|
// Ra = *++Pa;
|
|
// Rb = *--Pb;
|
|
// Rm = *++Pm;
|
|
// Rn = *--Pn;
|
|
add(Pa, Pa, wordSize);
|
|
ld(Ra, Address(Pa));
|
|
add(Pb, Pb, -wordSize);
|
|
ld(Rb, Address(Pb));
|
|
add(Pm, Pm, wordSize);
|
|
ld(Rm, Address(Pm));
|
|
add(Pn, Pn, -wordSize);
|
|
ld(Rn, Address(Pn));
|
|
|
|
mv(Rhi_mn, zr);
|
|
mv(Rlo_mn, zr);
|
|
}
|
|
|
|
void post2(Register i, Register len) {
|
|
block_comment("post2");
|
|
sub(Rj, i, len);
|
|
|
|
cad(tmp0, tmp0, Rlo_mn, t0); // The pending m*n, low part
|
|
|
|
// As soon as we know the least significant digit of our result,
|
|
// store it.
|
|
// Pm_base[i-len] = tmp0;
|
|
// Rj as temp register
|
|
slli(Rj, Rj, LogBytesPerWord);
|
|
add(Rj, Pm_base, Rj);
|
|
sd(tmp0, Address(Rj));
|
|
|
|
// tmp0 = tmp1; tmp1 = tmp2; tmp2 = 0;
|
|
cadc(tmp0, tmp1, Rhi_mn, t0); // The pending m*n, high part
|
|
adc(tmp1, tmp2, zr, t0);
|
|
mv(tmp2, zr);
|
|
}
|
|
|
|
// A carry in tmp0 after Montgomery multiplication means that we
|
|
// should subtract multiples of n from our result in m. We'll
|
|
// keep doing that until there is no carry.
|
|
void normalize(Register len) {
|
|
block_comment("normalize");
|
|
// while (tmp0)
|
|
// tmp0 = sub(Pm_base, Pn_base, tmp0, len);
|
|
Label loop, post, again;
|
|
Register cnt = tmp1, i = tmp2; // Re-use registers; we're done with them now
|
|
beqz(tmp0, post); {
|
|
bind(again); {
|
|
mv(i, zr);
|
|
mv(cnt, len);
|
|
slli(Rn, i, LogBytesPerWord);
|
|
add(Rm, Pm_base, Rn);
|
|
ld(Rm, Address(Rm));
|
|
add(Rn, Pn_base, Rn);
|
|
ld(Rn, Address(Rn));
|
|
mv(t0, 1); // set carry flag, i.e. no borrow
|
|
align(16);
|
|
bind(loop); {
|
|
notr(Rn, Rn);
|
|
add(Rm, Rm, t0);
|
|
add(Rm, Rm, Rn);
|
|
sltu(t0, Rm, Rn);
|
|
slli(Rn, i, LogBytesPerWord); // Rn as temp register
|
|
add(Rn, Pm_base, Rn);
|
|
sd(Rm, Address(Rn));
|
|
add(i, i, 1);
|
|
slli(Rn, i, LogBytesPerWord);
|
|
add(Rm, Pm_base, Rn);
|
|
ld(Rm, Address(Rm));
|
|
add(Rn, Pn_base, Rn);
|
|
ld(Rn, Address(Rn));
|
|
sub(cnt, cnt, 1);
|
|
} bnez(cnt, loop);
|
|
addi(tmp0, tmp0, -1);
|
|
add(tmp0, tmp0, t0);
|
|
} bnez(tmp0, again);
|
|
} bind(post);
|
|
}
|
|
|
|
// Move memory at s to d, reversing words.
|
|
// Increments d to end of copied memory
|
|
// Destroys tmp1, tmp2
|
|
// Preserves len
|
|
// Leaves s pointing to the address which was in d at start
|
|
void reverse(Register d, Register s, Register len, Register tmp1, Register tmp2) {
|
|
assert(tmp1->encoding() < x28->encoding(), "register corruption");
|
|
assert(tmp2->encoding() < x28->encoding(), "register corruption");
|
|
|
|
slli(tmp1, len, LogBytesPerWord);
|
|
add(s, s, tmp1);
|
|
mv(tmp1, len);
|
|
unroll_2(tmp1, &MontgomeryMultiplyGenerator::reverse1, d, s, tmp2);
|
|
slli(tmp1, len, LogBytesPerWord);
|
|
sub(s, d, tmp1);
|
|
}
|
|
// [63...0] -> [31...0][63...32]
|
|
void reverse1(Register d, Register s, Register tmp) {
|
|
addi(s, s, -wordSize);
|
|
ld(tmp, Address(s));
|
|
ror_imm(tmp, tmp, 32, t0);
|
|
sd(tmp, Address(d));
|
|
addi(d, d, wordSize);
|
|
}
|
|
|
|
void step_squaring() {
|
|
// An extra ACC
|
|
step();
|
|
acc(Rhi_ab, Rlo_ab, tmp0, tmp1, tmp2);
|
|
}
|
|
|
|
void last_squaring(Register i) {
|
|
Label dont;
|
|
// if ((i & 1) == 0) {
|
|
andi(t0, i, 0x1);
|
|
bnez(t0, dont); {
|
|
// MACC(Ra, Rb, tmp0, tmp1, tmp2);
|
|
// Ra = *++Pa;
|
|
// Rb = *--Pb;
|
|
mulhu(Rhi_ab, Ra, Rb);
|
|
mul(Rlo_ab, Ra, Rb);
|
|
acc(Rhi_ab, Rlo_ab, tmp0, tmp1, tmp2);
|
|
} bind(dont);
|
|
}
|
|
|
|
void extra_step_squaring() {
|
|
acc(Rhi_mn, Rlo_mn, tmp0, tmp1, tmp2); // The pending m*n
|
|
|
|
// MACC(Rm, Rn, tmp0, tmp1, tmp2);
|
|
// Rm = *++Pm;
|
|
// Rn = *--Pn;
|
|
mulhu(Rhi_mn, Rm, Rn);
|
|
mul(Rlo_mn, Rm, Rn);
|
|
addi(Pm, Pm, wordSize);
|
|
ld(Rm, Address(Pm));
|
|
addi(Pn, Pn, -wordSize);
|
|
ld(Rn, Address(Pn));
|
|
}
|
|
|
|
void post1_squaring() {
|
|
acc(Rhi_mn, Rlo_mn, tmp0, tmp1, tmp2); // The pending m*n
|
|
|
|
// *Pm = Rm = tmp0 * inv;
|
|
mul(Rm, tmp0, inv);
|
|
sd(Rm, Address(Pm));
|
|
|
|
// MACC(Rm, Rn, tmp0, tmp1, tmp2);
|
|
// tmp0 = tmp1; tmp1 = tmp2; tmp2 = 0;
|
|
mulhu(Rhi_mn, Rm, Rn);
|
|
|
|
#ifndef PRODUCT
|
|
// assert(m[i] * n[0] + tmp0 == 0, "broken Montgomery multiply");
|
|
{
|
|
mul(Rlo_mn, Rm, Rn);
|
|
add(Rlo_mn, tmp0, Rlo_mn);
|
|
Label ok;
|
|
beqz(Rlo_mn, ok); {
|
|
stop("broken Montgomery multiply");
|
|
} bind(ok);
|
|
}
|
|
#endif
|
|
// We have very carefully set things up so that
|
|
// m[i]*n[0] + tmp0 == 0 (mod b), so we don't have to calculate
|
|
// the lower half of Rm * Rn because we know the result already:
|
|
// it must be -tmp0. tmp0 + (-tmp0) must generate a carry iff
|
|
// tmp0 != 0. So, rather than do a mul and a cad we just set
|
|
// the carry flag iff tmp0 is nonzero.
|
|
//
|
|
// mul(Rlo_mn, Rm, Rn);
|
|
// cad(zr, tmp, Rlo_mn);
|
|
addi(t0, tmp0, -1);
|
|
sltu(t0, t0, tmp0); // Set carry iff tmp0 is nonzero
|
|
cadc(tmp0, tmp1, Rhi_mn, t0);
|
|
adc(tmp1, tmp2, zr, t0);
|
|
mv(tmp2, zr);
|
|
}
|
|
|
|
// use t0 as carry
|
|
void acc(Register Rhi, Register Rlo,
|
|
Register tmp0, Register tmp1, Register tmp2) {
|
|
cad(tmp0, tmp0, Rlo, t0);
|
|
cadc(tmp1, tmp1, Rhi, t0);
|
|
adc(tmp2, tmp2, zr, t0);
|
|
}
|
|
|
|
public:
|
|
/**
|
|
* Fast Montgomery multiplication. The derivation of the
|
|
* algorithm is in A Cryptographic Library for the Motorola
|
|
* DSP56000, Dusse and Kaliski, Proc. EUROCRYPT 90, pp. 230-237.
|
|
*
|
|
* Arguments:
|
|
*
|
|
* Inputs for multiplication:
|
|
* c_rarg0 - int array elements a
|
|
* c_rarg1 - int array elements b
|
|
* c_rarg2 - int array elements n (the modulus)
|
|
* c_rarg3 - int length
|
|
* c_rarg4 - int inv
|
|
* c_rarg5 - int array elements m (the result)
|
|
*
|
|
* Inputs for squaring:
|
|
* c_rarg0 - int array elements a
|
|
* c_rarg1 - int array elements n (the modulus)
|
|
* c_rarg2 - int length
|
|
* c_rarg3 - int inv
|
|
* c_rarg4 - int array elements m (the result)
|
|
*
|
|
*/
|
|
address generate_multiply() {
|
|
Label argh, nothing;
|
|
bind(argh);
|
|
stop("MontgomeryMultiply total_allocation must be <= 8192");
|
|
|
|
align(CodeEntryAlignment);
|
|
address entry = pc();
|
|
|
|
beqz(Rlen, nothing);
|
|
|
|
enter();
|
|
|
|
// Make room.
|
|
mv(Ra, 512);
|
|
bgt(Rlen, Ra, argh);
|
|
slli(Ra, Rlen, exact_log2(4 * sizeof(jint)));
|
|
sub(Ra, sp, Ra);
|
|
andi(sp, Ra, -2 * wordSize);
|
|
|
|
srliw(Rlen, Rlen, 1); // length in longwords = len/2
|
|
|
|
{
|
|
// Copy input args, reversing as we go. We use Ra as a
|
|
// temporary variable.
|
|
reverse(Ra, Pa_base, Rlen, Ri, Rj);
|
|
if (!_squaring)
|
|
reverse(Ra, Pb_base, Rlen, Ri, Rj);
|
|
reverse(Ra, Pn_base, Rlen, Ri, Rj);
|
|
}
|
|
|
|
// Push all call-saved registers and also Pm_base which we'll need
|
|
// at the end.
|
|
save_regs();
|
|
|
|
#ifndef PRODUCT
|
|
// assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply");
|
|
{
|
|
ld(Rn, Address(Pn_base));
|
|
mul(Rlo_mn, Rn, inv);
|
|
mv(t0, -1);
|
|
Label ok;
|
|
beq(Rlo_mn, t0, ok);
|
|
stop("broken inverse in Montgomery multiply");
|
|
bind(ok);
|
|
}
|
|
#endif
|
|
|
|
mv(Pm_base, Ra);
|
|
|
|
mv(tmp0, zr);
|
|
mv(tmp1, zr);
|
|
mv(tmp2, zr);
|
|
|
|
block_comment("for (int i = 0; i < len; i++) {");
|
|
mv(Ri, zr); {
|
|
Label loop, end;
|
|
bge(Ri, Rlen, end);
|
|
|
|
bind(loop);
|
|
pre1(Ri);
|
|
|
|
block_comment(" for (j = i; j; j--) {"); {
|
|
mv(Rj, Ri);
|
|
unroll_2(Rj, &MontgomeryMultiplyGenerator::step);
|
|
} block_comment(" } // j");
|
|
|
|
post1();
|
|
addw(Ri, Ri, 1);
|
|
blt(Ri, Rlen, loop);
|
|
bind(end);
|
|
block_comment("} // i");
|
|
}
|
|
|
|
block_comment("for (int i = len; i < 2*len; i++) {");
|
|
mv(Ri, Rlen); {
|
|
Label loop, end;
|
|
slli(t0, Rlen, 1);
|
|
bge(Ri, t0, end);
|
|
|
|
bind(loop);
|
|
pre2(Ri, Rlen);
|
|
|
|
block_comment(" for (j = len*2-i-1; j; j--) {"); {
|
|
slliw(Rj, Rlen, 1);
|
|
subw(Rj, Rj, Ri);
|
|
subw(Rj, Rj, 1);
|
|
unroll_2(Rj, &MontgomeryMultiplyGenerator::step);
|
|
} block_comment(" } // j");
|
|
|
|
post2(Ri, Rlen);
|
|
addw(Ri, Ri, 1);
|
|
slli(t0, Rlen, 1);
|
|
blt(Ri, t0, loop);
|
|
bind(end);
|
|
}
|
|
block_comment("} // i");
|
|
|
|
normalize(Rlen);
|
|
|
|
mv(Ra, Pm_base); // Save Pm_base in Ra
|
|
restore_regs(); // Restore caller's Pm_base
|
|
|
|
// Copy our result into caller's Pm_base
|
|
reverse(Pm_base, Ra, Rlen, Ri, Rj);
|
|
|
|
leave();
|
|
bind(nothing);
|
|
ret();
|
|
|
|
return entry;
|
|
}
|
|
|
|
/**
|
|
*
|
|
* Arguments:
|
|
*
|
|
* Inputs:
|
|
* c_rarg0 - int array elements a
|
|
* c_rarg1 - int array elements n (the modulus)
|
|
* c_rarg2 - int length
|
|
* c_rarg3 - int inv
|
|
* c_rarg4 - int array elements m (the result)
|
|
*
|
|
*/
|
|
address generate_square() {
|
|
Label argh;
|
|
bind(argh);
|
|
stop("MontgomeryMultiply total_allocation must be <= 8192");
|
|
|
|
align(CodeEntryAlignment);
|
|
address entry = pc();
|
|
|
|
enter();
|
|
|
|
// Make room.
|
|
mv(Ra, 512);
|
|
bgt(Rlen, Ra, argh);
|
|
slli(Ra, Rlen, exact_log2(4 * sizeof(jint)));
|
|
sub(Ra, sp, Ra);
|
|
andi(sp, Ra, -2 * wordSize);
|
|
|
|
srliw(Rlen, Rlen, 1); // length in longwords = len/2
|
|
|
|
{
|
|
// Copy input args, reversing as we go. We use Ra as a
|
|
// temporary variable.
|
|
reverse(Ra, Pa_base, Rlen, Ri, Rj);
|
|
reverse(Ra, Pn_base, Rlen, Ri, Rj);
|
|
}
|
|
|
|
// Push all call-saved registers and also Pm_base which we'll need
|
|
// at the end.
|
|
save_regs();
|
|
|
|
mv(Pm_base, Ra);
|
|
|
|
mv(tmp0, zr);
|
|
mv(tmp1, zr);
|
|
mv(tmp2, zr);
|
|
|
|
block_comment("for (int i = 0; i < len; i++) {");
|
|
mv(Ri, zr); {
|
|
Label loop, end;
|
|
bind(loop);
|
|
bge(Ri, Rlen, end);
|
|
|
|
pre1(Ri);
|
|
|
|
block_comment("for (j = (i+1)/2; j; j--) {"); {
|
|
addi(Rj, Ri, 1);
|
|
srliw(Rj, Rj, 1);
|
|
unroll_2(Rj, &MontgomeryMultiplyGenerator::step_squaring);
|
|
} block_comment(" } // j");
|
|
|
|
last_squaring(Ri);
|
|
|
|
block_comment(" for (j = i/2; j; j--) {"); {
|
|
srliw(Rj, Ri, 1);
|
|
unroll_2(Rj, &MontgomeryMultiplyGenerator::extra_step_squaring);
|
|
} block_comment(" } // j");
|
|
|
|
post1_squaring();
|
|
addi(Ri, Ri, 1);
|
|
blt(Ri, Rlen, loop);
|
|
|
|
bind(end);
|
|
block_comment("} // i");
|
|
}
|
|
|
|
block_comment("for (int i = len; i < 2*len; i++) {");
|
|
mv(Ri, Rlen); {
|
|
Label loop, end;
|
|
bind(loop);
|
|
slli(t0, Rlen, 1);
|
|
bge(Ri, t0, end);
|
|
|
|
pre2(Ri, Rlen);
|
|
|
|
block_comment(" for (j = (2*len-i-1)/2; j; j--) {"); {
|
|
slli(Rj, Rlen, 1);
|
|
sub(Rj, Rj, Ri);
|
|
sub(Rj, Rj, 1);
|
|
srliw(Rj, Rj, 1);
|
|
unroll_2(Rj, &MontgomeryMultiplyGenerator::step_squaring);
|
|
} block_comment(" } // j");
|
|
|
|
last_squaring(Ri);
|
|
|
|
block_comment(" for (j = (2*len-i)/2; j; j--) {"); {
|
|
slli(Rj, Rlen, 1);
|
|
sub(Rj, Rj, Ri);
|
|
srliw(Rj, Rj, 1);
|
|
unroll_2(Rj, &MontgomeryMultiplyGenerator::extra_step_squaring);
|
|
} block_comment(" } // j");
|
|
|
|
post2(Ri, Rlen);
|
|
addi(Ri, Ri, 1);
|
|
slli(t0, Rlen, 1);
|
|
blt(Ri, t0, loop);
|
|
|
|
bind(end);
|
|
block_comment("} // i");
|
|
}
|
|
|
|
normalize(Rlen);
|
|
|
|
mv(Ra, Pm_base); // Save Pm_base in Ra
|
|
restore_regs(); // Restore caller's Pm_base
|
|
|
|
// Copy our result into caller's Pm_base
|
|
reverse(Pm_base, Ra, Rlen, Ri, Rj);
|
|
|
|
leave();
|
|
ret();
|
|
|
|
return entry;
|
|
}
|
|
};
|
|
#endif // COMPILER2
|
|
|
|
// Continuation point for throwing of implicit exceptions that are
|
|
// not handled in the current activation. Fabricates an exception
|
|
// oop and initiates normal exception dispatching in this
|
|
// frame. Since we need to preserve callee-saved values (currently
|
|
// only for C2, but done for C1 as well) we need a callee-saved oop
|
|
// map and therefore have to make these stubs into RuntimeStubs
|
|
// rather than BufferBlobs. If the compiler needs all registers to
|
|
// be preserved between the fault point and the exception handler
|
|
// then it must assume responsibility for that in
|
|
// AbstractCompiler::continuation_for_implicit_null_exception or
|
|
// continuation_for_implicit_division_by_zero_exception. All other
|
|
// implicit exceptions (e.g., NullPointerException or
|
|
// AbstractMethodError on entry) are either at call sites or
|
|
// otherwise assume that stack unwinding will be initiated, so
|
|
// caller saved registers were assumed volatile in the compiler.
|
|
|
|
#undef __
|
|
#define __ masm->
|
|
|
|
address generate_throw_exception(const char* name,
|
|
address runtime_entry,
|
|
Register arg1 = noreg,
|
|
Register arg2 = noreg) {
|
|
// Information about frame layout at time of blocking runtime call.
|
|
// Note that we only have to preserve callee-saved registers since
|
|
// the compilers are responsible for supplying a continuation point
|
|
// if they expect all registers to be preserved.
|
|
// n.b. riscv asserts that frame::arg_reg_save_area_bytes == 0
|
|
assert_cond(runtime_entry != NULL);
|
|
enum layout {
|
|
fp_off = 0,
|
|
fp_off2,
|
|
return_off,
|
|
return_off2,
|
|
framesize // inclusive of return address
|
|
};
|
|
|
|
const int insts_size = 512;
|
|
const int locs_size = 64;
|
|
|
|
CodeBuffer code(name, insts_size, locs_size);
|
|
OopMapSet* oop_maps = new OopMapSet();
|
|
MacroAssembler* masm = new MacroAssembler(&code);
|
|
assert_cond(oop_maps != NULL && masm != NULL);
|
|
|
|
address start = __ pc();
|
|
|
|
// This is an inlined and slightly modified version of call_VM
|
|
// which has the ability to fetch the return PC out of
|
|
// thread-local storage and also sets up last_Java_sp slightly
|
|
// differently than the real call_VM
|
|
|
|
__ enter(); // Save FP and RA before call
|
|
|
|
assert(is_even(framesize / 2), "sp not 16-byte aligned");
|
|
|
|
// ra and fp are already in place
|
|
__ addi(sp, fp, 0 - ((unsigned)framesize << LogBytesPerInt)); // prolog
|
|
|
|
int frame_complete = __ pc() - start;
|
|
|
|
// Set up last_Java_sp and last_Java_fp
|
|
address the_pc = __ pc();
|
|
__ set_last_Java_frame(sp, fp, the_pc, t0);
|
|
|
|
// Call runtime
|
|
if (arg1 != noreg) {
|
|
assert(arg2 != c_rarg1, "clobbered");
|
|
__ mv(c_rarg1, arg1);
|
|
}
|
|
if (arg2 != noreg) {
|
|
__ mv(c_rarg2, arg2);
|
|
}
|
|
__ mv(c_rarg0, xthread);
|
|
BLOCK_COMMENT("call runtime_entry");
|
|
int32_t offset = 0;
|
|
__ movptr(t0, runtime_entry, offset);
|
|
__ jalr(x1, t0, offset);
|
|
|
|
// Generate oop map
|
|
OopMap* map = new OopMap(framesize, 0);
|
|
assert_cond(map != NULL);
|
|
|
|
oop_maps->add_gc_map(the_pc - start, map);
|
|
|
|
__ reset_last_Java_frame(true);
|
|
|
|
__ leave();
|
|
|
|
// check for pending exceptions
|
|
#ifdef ASSERT
|
|
Label L;
|
|
__ ld(t0, Address(xthread, Thread::pending_exception_offset()));
|
|
__ bnez(t0, L);
|
|
__ should_not_reach_here();
|
|
__ bind(L);
|
|
#endif // ASSERT
|
|
__ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
|
|
|
|
|
|
// codeBlob framesize is in words (not VMRegImpl::slot_size)
|
|
RuntimeStub* stub =
|
|
RuntimeStub::new_runtime_stub(name,
|
|
&code,
|
|
frame_complete,
|
|
(framesize >> (LogBytesPerWord - LogBytesPerInt)),
|
|
oop_maps, false);
|
|
assert(stub != NULL, "create runtime stub fail!");
|
|
return stub->entry_point();
|
|
}
|
|
|
|
address generate_cont_thaw() {
|
|
if (!Continuations::enabled()) return nullptr;
|
|
Unimplemented();
|
|
return nullptr;
|
|
}
|
|
|
|
address generate_cont_returnBarrier() {
|
|
if (!Continuations::enabled()) return nullptr;
|
|
Unimplemented();
|
|
return nullptr;
|
|
}
|
|
|
|
address generate_cont_returnBarrier_exception() {
|
|
if (!Continuations::enabled()) return nullptr;
|
|
Unimplemented();
|
|
return nullptr;
|
|
}
|
|
|
|
#if INCLUDE_JFR
|
|
|
|
#undef __
|
|
#define __ _masm->
|
|
|
|
static void jfr_prologue(address the_pc, MacroAssembler* _masm, Register thread) {
|
|
__ set_last_Java_frame(sp, fp, the_pc, t0);
|
|
__ mv(c_rarg0, thread);
|
|
}
|
|
|
|
static void jfr_epilogue(MacroAssembler* _masm) {
|
|
__ reset_last_Java_frame(true);
|
|
Label null_jobject;
|
|
__ beqz(x10, null_jobject);
|
|
DecoratorSet decorators = ACCESS_READ | IN_NATIVE;
|
|
BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
|
|
bs->load_at(_masm, decorators, T_OBJECT, x10, Address(x10, 0), t0, t1);
|
|
__ bind(null_jobject);
|
|
}
|
|
|
|
// For c2: c_rarg0 is junk, call to runtime to write a checkpoint.
|
|
// It returns a jobject handle to the event writer.
|
|
// The handle is dereferenced and the return value is the event writer oop.
|
|
static RuntimeStub* generate_jfr_write_checkpoint() {
|
|
enum layout {
|
|
fp_off,
|
|
fp_off2,
|
|
return_off,
|
|
return_off2,
|
|
framesize // inclusive of return address
|
|
};
|
|
|
|
int insts_size = 512;
|
|
int locs_size = 64;
|
|
CodeBuffer code("jfr_write_checkpoint", insts_size, locs_size);
|
|
OopMapSet* oop_maps = new OopMapSet();
|
|
MacroAssembler* masm = new MacroAssembler(&code);
|
|
MacroAssembler* _masm = masm;
|
|
|
|
address start = __ pc();
|
|
__ enter();
|
|
int frame_complete = __ pc() - start;
|
|
address the_pc = __ pc();
|
|
jfr_prologue(the_pc, _masm, xthread);
|
|
__ call_VM_leaf(CAST_FROM_FN_PTR(address, JfrIntrinsicSupport::write_checkpoint), 1);
|
|
jfr_epilogue(_masm);
|
|
__ leave();
|
|
__ ret();
|
|
|
|
OopMap* map = new OopMap(framesize, 1);
|
|
oop_maps->add_gc_map(the_pc - start, map);
|
|
|
|
RuntimeStub* stub = // codeBlob framesize is in words (not VMRegImpl::slot_size)
|
|
RuntimeStub::new_runtime_stub("jfr_write_checkpoint", &code, frame_complete,
|
|
(framesize >> (LogBytesPerWord - LogBytesPerInt)),
|
|
oop_maps, false);
|
|
return stub;
|
|
}
|
|
|
|
#undef __
|
|
|
|
#endif // INCLUDE_JFR
|
|
|
|
// Initialization
|
|
void generate_initial() {
|
|
// Generate initial stubs and initializes the entry points
|
|
|
|
// entry points that exist in all platforms Note: This is code
|
|
// that could be shared among different platforms - however the
|
|
// benefit seems to be smaller than the disadvantage of having a
|
|
// much more complicated generator structure. See also comment in
|
|
// stubRoutines.hpp.
|
|
|
|
StubRoutines::_forward_exception_entry = generate_forward_exception();
|
|
|
|
StubRoutines::_call_stub_entry =
|
|
generate_call_stub(StubRoutines::_call_stub_return_address);
|
|
|
|
// is referenced by megamorphic call
|
|
StubRoutines::_catch_exception_entry = generate_catch_exception();
|
|
|
|
// Build this early so it's available for the interpreter.
|
|
StubRoutines::_throw_StackOverflowError_entry =
|
|
generate_throw_exception("StackOverflowError throw_exception",
|
|
CAST_FROM_FN_PTR(address,
|
|
SharedRuntime::throw_StackOverflowError));
|
|
StubRoutines::_throw_delayed_StackOverflowError_entry =
|
|
generate_throw_exception("delayed StackOverflowError throw_exception",
|
|
CAST_FROM_FN_PTR(address,
|
|
SharedRuntime::throw_delayed_StackOverflowError));
|
|
}
|
|
|
|
void generate_phase1() {
|
|
// Continuation stubs:
|
|
StubRoutines::_cont_thaw = generate_cont_thaw();
|
|
StubRoutines::_cont_returnBarrier = generate_cont_returnBarrier();
|
|
StubRoutines::_cont_returnBarrierExc = generate_cont_returnBarrier_exception();
|
|
|
|
JFR_ONLY(StubRoutines::_jfr_write_checkpoint_stub = generate_jfr_write_checkpoint();)
|
|
JFR_ONLY(StubRoutines::_jfr_write_checkpoint = StubRoutines::_jfr_write_checkpoint_stub == nullptr ? nullptr
|
|
: StubRoutines::_jfr_write_checkpoint_stub->entry_point();)
|
|
}
|
|
|
|
void generate_all() {
|
|
// support for verify_oop (must happen after universe_init)
|
|
if (VerifyOops) {
|
|
StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop();
|
|
}
|
|
StubRoutines::_throw_AbstractMethodError_entry =
|
|
generate_throw_exception("AbstractMethodError throw_exception",
|
|
CAST_FROM_FN_PTR(address,
|
|
SharedRuntime::
|
|
throw_AbstractMethodError));
|
|
|
|
StubRoutines::_throw_IncompatibleClassChangeError_entry =
|
|
generate_throw_exception("IncompatibleClassChangeError throw_exception",
|
|
CAST_FROM_FN_PTR(address,
|
|
SharedRuntime::
|
|
throw_IncompatibleClassChangeError));
|
|
|
|
StubRoutines::_throw_NullPointerException_at_call_entry =
|
|
generate_throw_exception("NullPointerException at call throw_exception",
|
|
CAST_FROM_FN_PTR(address,
|
|
SharedRuntime::
|
|
throw_NullPointerException_at_call));
|
|
// arraycopy stubs used by compilers
|
|
generate_arraycopy_stubs();
|
|
|
|
#ifdef COMPILER2
|
|
if (UseMulAddIntrinsic) {
|
|
StubRoutines::_mulAdd = generate_mulAdd();
|
|
}
|
|
|
|
if (UseMultiplyToLenIntrinsic) {
|
|
StubRoutines::_multiplyToLen = generate_multiplyToLen();
|
|
}
|
|
|
|
if (UseSquareToLenIntrinsic) {
|
|
StubRoutines::_squareToLen = generate_squareToLen();
|
|
}
|
|
|
|
if (UseMontgomeryMultiplyIntrinsic) {
|
|
StubCodeMark mark(this, "StubRoutines", "montgomeryMultiply");
|
|
MontgomeryMultiplyGenerator g(_masm, /*squaring*/false);
|
|
StubRoutines::_montgomeryMultiply = g.generate_multiply();
|
|
}
|
|
|
|
if (UseMontgomerySquareIntrinsic) {
|
|
StubCodeMark mark(this, "StubRoutines", "montgomerySquare");
|
|
MontgomeryMultiplyGenerator g(_masm, /*squaring*/true);
|
|
StubRoutines::_montgomerySquare = g.generate_square();
|
|
}
|
|
|
|
if (UseRVVForBigIntegerShiftIntrinsics) {
|
|
StubRoutines::_bigIntegerLeftShiftWorker = generate_bigIntegerLeftShift();
|
|
StubRoutines::_bigIntegerRightShiftWorker = generate_bigIntegerRightShift();
|
|
}
|
|
#endif
|
|
|
|
generate_compare_long_strings();
|
|
|
|
generate_string_indexof_stubs();
|
|
|
|
BarrierSetNMethod* bs_nm = BarrierSet::barrier_set()->barrier_set_nmethod();
|
|
if (bs_nm != NULL) {
|
|
StubRoutines::riscv::_method_entry_barrier = generate_method_entry_barrier();
|
|
}
|
|
|
|
StubRoutines::riscv::set_completed();
|
|
}
|
|
|
|
public:
|
|
StubGenerator(CodeBuffer* code, int phase) : StubCodeGenerator(code) {
|
|
if (phase == 0) {
|
|
generate_initial();
|
|
} else if (phase == 1) {
|
|
generate_phase1(); // stubs that must be available for the interpreter
|
|
} else {
|
|
generate_all();
|
|
}
|
|
}
|
|
|
|
~StubGenerator() {}
|
|
}; // end class declaration
|
|
|
|
#define UCM_TABLE_MAX_ENTRIES 8
|
|
void StubGenerator_generate(CodeBuffer* code, int phase) {
|
|
if (UnsafeCopyMemory::_table == NULL) {
|
|
UnsafeCopyMemory::create_table(UCM_TABLE_MAX_ENTRIES);
|
|
}
|
|
|
|
StubGenerator g(code, phase);
|
|
}
|