8167065: Add intrinsic support for double precision shifting on x86_64

Reviewed-by: kvn
2025-08-27 23:04:50 +02:00 · 2019-12-23 14:42:21 -08:00 · 2019-12-23 14:42:21 -08:00 · 995da6eb2a
commit 995da6eb2a
parent f4af0eadb6
22 changed files with 628 additions and 50 deletions
--- a/src/hotspot/cpu/x86/assembler_x86.cpp
+++ b/src/hotspot/cpu/x86/assembler_x86.cpp
@ -4257,8 +4257,8 @@ void Assembler::pshufd(XMMRegister dst, XMMRegister src, int mode) {
 void Assembler::vpshufd(XMMRegister dst, XMMRegister src, int mode, int vector_len) {
  assert(vector_len == AVX_128bit? VM_Version::supports_avx() :
-         vector_len == AVX_256bit? VM_Version::supports_avx2() :
+         (vector_len == AVX_256bit? VM_Version::supports_avx2() :
-         0, "");
+         (vector_len == AVX_512bit? VM_Version::supports_evex() : 0)), "");
  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
  InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
@ -4737,6 +4737,36 @@ void Assembler::shrl(Register dst) {
  emit_int8((unsigned char)(0xE8 | encode));
 }
 void Assembler::shldl(Register dst, Register src) {
  int encode = prefix_and_encode(src->encoding(), dst->encoding());
  emit_int8(0x0F);
  emit_int8((unsigned char)0xA5);
  emit_int8((unsigned char)(0xC0 | encode));
 }
 void Assembler::shldl(Register dst, Register src, int8_t imm8) {
  int encode = prefix_and_encode(src->encoding(), dst->encoding());
  emit_int8(0x0F);
  emit_int8((unsigned char)0xA4);
  emit_int8((unsigned char)(0xC0 | encode));
  emit_int8(imm8);
 }
 void Assembler::shrdl(Register dst, Register src) {
  int encode = prefix_and_encode(src->encoding(), dst->encoding());
  emit_int8(0x0F);
  emit_int8((unsigned char)0xAD);
  emit_int8((unsigned char)(0xC0 | encode));
 }
 void Assembler::shrdl(Register dst, Register src, int8_t imm8) {
  int encode = prefix_and_encode(src->encoding(), dst->encoding());
  emit_int8(0x0F);
  emit_int8((unsigned char)0xAC);
  emit_int8((unsigned char)(0xC0 | encode));
  emit_int8(imm8);
 }
 // copies a single word from [esi] to [edi]
 void Assembler::smovl() {
  emit_int8((unsigned char)0xA5);
@ -6513,6 +6543,23 @@ void Assembler::vpandq(XMMRegister dst, XMMRegister nds, XMMRegister src, int ve
  emit_int8((unsigned char)(0xC0 | encode));
 }
 void Assembler::vpshldvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
  assert(VM_Version::supports_vbmi2(), "requires vbmi2");
  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
  attributes.set_is_evex_instruction();
  int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
  emit_int8(0x71);
  emit_int8((unsigned char)(0xC0 | encode));
 }
 void Assembler::vpshrdvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
  assert(VM_Version::supports_vbmi2(), "requires vbmi2");
  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
  attributes.set_is_evex_instruction();
  int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
  emit_int8(0x73);
  emit_int8((unsigned char)(0xC0 | encode));
 }
 void Assembler::pandn(XMMRegister dst, XMMRegister src) {
  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
@ -8109,26 +8156,6 @@ void Assembler::set_byte_if_not_zero(Register dst) {
  emit_int8((unsigned char)(0xE0 | dst->encoding()));
 }
 void Assembler::shldl(Register dst, Register src) {
  emit_int8(0x0F);
  emit_int8((unsigned char)0xA5);
  emit_int8((unsigned char)(0xC0 | src->encoding() << 3 | dst->encoding()));
 }
 // 0F A4 / r ib
 void Assembler::shldl(Register dst, Register src, int8_t imm8) {
  emit_int8(0x0F);
  emit_int8((unsigned char)0xA4);
  emit_int8((unsigned char)(0xC0 | src->encoding() << 3 | dst->encoding()));
  emit_int8(imm8);
 }
 void Assembler::shrdl(Register dst, Register src) {
  emit_int8(0x0F);
  emit_int8((unsigned char)0xAD);
  emit_int8((unsigned char)(0xC0 | src->encoding() << 3 | dst->encoding()));
 }
 #else // LP64
 void Assembler::set_byte_if_not_zero(Register dst) {
--- a/src/hotspot/cpu/x86/assembler_x86.hpp
+++ b/src/hotspot/cpu/x86/assembler_x86.hpp
@ -1838,6 +1838,8 @@ private:
  void shldl(Register dst, Register src);
  void shldl(Register dst, Register src, int8_t imm8);
  void shrdl(Register dst, Register src);
  void shrdl(Register dst, Register src, int8_t imm8);
  void shll(Register dst, int imm8);
  void shll(Register dst);
@ -1845,8 +1847,6 @@ private:
  void shlq(Register dst, int imm8);
  void shlq(Register dst);
  void shrdl(Register dst, Register src);
  void shrl(Register dst, int imm8);
  void shrl(Register dst);
@ -2140,6 +2140,9 @@ private:
  void evpsraq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
  void evpsraq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
  void vpshldvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
  void vpshrdvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
  // And packed integers
  void pand(XMMRegister dst, XMMRegister src);
  void vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
--- a/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp
+++ b/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp
@ -5694,6 +5694,247 @@ address generate_avx_ghash_processBlocks() {
    return start;
  }
  address generate_bigIntegerRightShift() {
    __ align(CodeEntryAlignment);
    StubCodeMark mark(this, "StubRoutines", "bigIntegerRightShiftWorker");
    address start = __ pc();
    Label Shift512Loop, ShiftTwo, ShiftTwoLoop, ShiftOne, Exit;
    // For Unix, the arguments are as follows: rdi, rsi, rdx, rcx, r8.
    const Register newArr = rdi;
    const Register oldArr = rsi;
    const Register newIdx = rdx;
    const Register shiftCount = rcx;  // It was intentional to have shiftCount in rcx since it is used implicitly for shift.
    const Register totalNumIter = r8;
    // For windows, we use r9 and r10 as temps to save rdi and rsi. Thus we cannot allocate them for our temps.
    // For everything else, we prefer using r9 and r10 since we do not have to save them before use.
    const Register tmp1 = r11;                    // Caller save.
    const Register tmp2 = rax;                    // Caller save.
    const Register tmp3 = WINDOWS_ONLY(r12) NOT_WINDOWS(r9);   // Windows: Callee save. Linux: Caller save.
    const Register tmp4 = WINDOWS_ONLY(r13) NOT_WINDOWS(r10);  // Windows: Callee save. Linux: Caller save.
    const Register tmp5 = r14;                    // Callee save.
    const Register tmp6 = r15;
    const XMMRegister x0 = xmm0;
    const XMMRegister x1 = xmm1;
    const XMMRegister x2 = xmm2;
    BLOCK_COMMENT("Entry:");
    __ enter(); // required for proper stackwalking of RuntimeStub frame
 #ifdef _WINDOWS
    setup_arg_regs(4);
    // For windows, since last argument is on stack, we need to move it to the appropriate register.
    __ movl(totalNumIter, Address(rsp, 6 * wordSize));
    // Save callee save registers.
    __ push(tmp3);
    __ push(tmp4);
 #endif
    __ push(tmp5);
    // Rename temps used throughout the code.
    const Register idx = tmp1;
    const Register nIdx = tmp2;
    __ xorl(idx, idx);
    // Start right shift from end of the array.
    // For example, if #iteration = 4 and newIdx = 1
    // then dest[4] = src[4] >> shiftCount  | src[3] <<< (shiftCount - 32)
    // if #iteration = 4 and newIdx = 0
    // then dest[3] = src[4] >> shiftCount  | src[3] <<< (shiftCount - 32)
    __ movl(idx, totalNumIter);
    __ movl(nIdx, idx);
    __ addl(nIdx, newIdx);
    // If vectorization is enabled, check if the number of iterations is at least 64
    // If not, then go to ShifTwo processing 2 iterations
    if (VM_Version::supports_vbmi2()) {
      __ cmpptr(totalNumIter, (AVX3Threshold/64));
      __ jcc(Assembler::less, ShiftTwo);
      if (AVX3Threshold < 16 * 64) {
        __ cmpl(totalNumIter, 16);
        __ jcc(Assembler::less, ShiftTwo);
      }
      __ evpbroadcastd(x0, shiftCount, Assembler::AVX_512bit);
      __ subl(idx, 16);
      __ subl(nIdx, 16);
      __ BIND(Shift512Loop);
      __ evmovdqul(x2, Address(oldArr, idx, Address::times_4, 4), Assembler::AVX_512bit);
      __ evmovdqul(x1, Address(oldArr, idx, Address::times_4), Assembler::AVX_512bit);
      __ vpshrdvd(x2, x1, x0, Assembler::AVX_512bit);
      __ evmovdqul(Address(newArr, nIdx, Address::times_4), x2, Assembler::AVX_512bit);
      __ subl(nIdx, 16);
      __ subl(idx, 16);
      __ jcc(Assembler::greaterEqual, Shift512Loop);
      __ addl(idx, 16);
      __ addl(nIdx, 16);
    }
    __ BIND(ShiftTwo);
    __ cmpl(idx, 2);
    __ jcc(Assembler::less, ShiftOne);
    __ subl(idx, 2);
    __ subl(nIdx, 2);
    __ BIND(ShiftTwoLoop);
    __ movl(tmp5, Address(oldArr, idx, Address::times_4, 8));
    __ movl(tmp4, Address(oldArr, idx, Address::times_4, 4));
    __ movl(tmp3, Address(oldArr, idx, Address::times_4));
    __ shrdl(tmp5, tmp4);
    __ shrdl(tmp4, tmp3);
    __ movl(Address(newArr, nIdx, Address::times_4, 4), tmp5);
    __ movl(Address(newArr, nIdx, Address::times_4), tmp4);
    __ subl(nIdx, 2);
    __ subl(idx, 2);
    __ jcc(Assembler::greaterEqual, ShiftTwoLoop);
    __ addl(idx, 2);
    __ addl(nIdx, 2);
    // Do the last iteration
    __ BIND(ShiftOne);
    __ cmpl(idx, 1);
    __ jcc(Assembler::less, Exit);
    __ subl(idx, 1);
    __ subl(nIdx, 1);
    __ movl(tmp4, Address(oldArr, idx, Address::times_4, 4));
    __ movl(tmp3, Address(oldArr, idx, Address::times_4));
    __ shrdl(tmp4, tmp3);
    __ movl(Address(newArr, nIdx, Address::times_4), tmp4);
    __ BIND(Exit);
    // Restore callee save registers.
    __ pop(tmp5);
 #ifdef _WINDOWS
    __ pop(tmp4);
    __ pop(tmp3);
    restore_arg_regs();
 #endif
    __ leave(); // required for proper stackwalking of RuntimeStub frame
    __ ret(0);
    return start;
  }
   /**
   *  Arguments:
   *
   *  Input:
   *    c_rarg0   - newArr address
   *    c_rarg1   - oldArr address
   *    c_rarg2   - newIdx
   *    c_rarg3   - shiftCount
   * not Win64
   *    c_rarg4   - numIter
   * Win64
   *    rsp40    - numIter
   */
  address generate_bigIntegerLeftShift() {
    __ align(CodeEntryAlignment);
    StubCodeMark mark(this,  "StubRoutines", "bigIntegerLeftShiftWorker");
    address start = __ pc();
    Label Shift512Loop, ShiftTwo, ShiftTwoLoop, ShiftOne, Exit;
    // For Unix, the arguments are as follows: rdi, rsi, rdx, rcx, r8.
    const Register newArr = rdi;
    const Register oldArr = rsi;
    const Register newIdx = rdx;
    const Register shiftCount = rcx;  // It was intentional to have shiftCount in rcx since it is used implicitly for shift.
    const Register totalNumIter = r8;
    // For windows, we use r9 and r10 as temps to save rdi and rsi. Thus we cannot allocate them for our temps.
    // For everything else, we prefer using r9 and r10 since we do not have to save them before use.
    const Register tmp1 = r11;                    // Caller save.
    const Register tmp2 = rax;                    // Caller save.
    const Register tmp3 = WINDOWS_ONLY(r12) NOT_WINDOWS(r9);   // Windows: Callee save. Linux: Caller save.
    const Register tmp4 = WINDOWS_ONLY(r13) NOT_WINDOWS(r10);  // Windows: Callee save. Linux: Caller save.
    const Register tmp5 = r14;                    // Callee save.
    const XMMRegister x0 = xmm0;
    const XMMRegister x1 = xmm1;
    const XMMRegister x2 = xmm2;
    BLOCK_COMMENT("Entry:");
    __ enter(); // required for proper stackwalking of RuntimeStub frame
 #ifdef _WINDOWS
    setup_arg_regs(4);
    // For windows, since last argument is on stack, we need to move it to the appropriate register.
    __ movl(totalNumIter, Address(rsp, 6 * wordSize));
    // Save callee save registers.
    __ push(tmp3);
    __ push(tmp4);
 #endif
    __ push(tmp5);
    // Rename temps used throughout the code
    const Register idx = tmp1;
    const Register numIterTmp = tmp2;
    // Start idx from zero.
    __ xorl(idx, idx);
    // Compute interior pointer for new array. We do this so that we can use same index for both old and new arrays.
    __ lea(newArr, Address(newArr, newIdx, Address::times_4));
    __ movl(numIterTmp, totalNumIter);
    // If vectorization is enabled, check if the number of iterations is at least 64
    // If not, then go to ShiftTwo shifting two numbers at a time
    if (VM_Version::supports_vbmi2()) {
      __ cmpl(totalNumIter, (AVX3Threshold/64));
      __ jcc(Assembler::less, ShiftTwo);
      if (AVX3Threshold < 16 * 64) {
        __ cmpl(totalNumIter, 16);
        __ jcc(Assembler::less, ShiftTwo);
      }
      __ evpbroadcastd(x0, shiftCount, Assembler::AVX_512bit);
      __ subl(numIterTmp, 16);
      __ BIND(Shift512Loop);
      __ evmovdqul(x1, Address(oldArr, idx, Address::times_4), Assembler::AVX_512bit);
      __ evmovdqul(x2, Address(oldArr, idx, Address::times_4, 0x4), Assembler::AVX_512bit);
      __ vpshldvd(x1, x2, x0, Assembler::AVX_512bit);
      __ evmovdqul(Address(newArr, idx, Address::times_4), x1, Assembler::AVX_512bit);
      __ addl(idx, 16);
      __ subl(numIterTmp, 16);
      __ jcc(Assembler::greaterEqual, Shift512Loop);
      __ addl(numIterTmp, 16);
    }
    __ BIND(ShiftTwo);
    __ cmpl(totalNumIter, 1);
    __ jcc(Assembler::less, Exit);
    __ movl(tmp3, Address(oldArr, idx, Address::times_4));
    __ subl(numIterTmp, 2);
    __ jcc(Assembler::less, ShiftOne);
    __ BIND(ShiftTwoLoop);
    __ movl(tmp4, Address(oldArr, idx, Address::times_4, 0x4));
    __ movl(tmp5, Address(oldArr, idx, Address::times_4, 0x8));
    __ shldl(tmp3, tmp4);
    __ shldl(tmp4, tmp5);
    __ movl(Address(newArr, idx, Address::times_4), tmp3);
    __ movl(Address(newArr, idx, Address::times_4, 0x4), tmp4);
    __ movl(tmp3, tmp5);
    __ addl(idx, 2);
    __ subl(numIterTmp, 2);
    __ jcc(Assembler::greaterEqual, ShiftTwoLoop);
    // Do the last iteration
    __ BIND(ShiftOne);
    __ addl(numIterTmp, 2);
    __ cmpl(numIterTmp, 1);
    __ jcc(Assembler::less, Exit);
    __ movl(tmp4, Address(oldArr, idx, Address::times_4, 0x4));
    __ shldl(tmp3, tmp4);
    __ movl(Address(newArr, idx, Address::times_4), tmp3);
    __ BIND(Exit);
    // Restore callee save registers.
    __ pop(tmp5);
 #ifdef _WINDOWS
    __ pop(tmp4);
    __ pop(tmp3);
    restore_arg_regs();
 #endif
    __ leave(); // required for proper stackwalking of RuntimeStub frame
    __ ret(0);
    return start;
  }
  address generate_libmExp() {
    StubCodeMark mark(this, "StubRoutines", "libmExp");
@ -6314,6 +6555,10 @@ address generate_avx_ghash_processBlocks() {
    if (UseMulAddIntrinsic) {
      StubRoutines::_mulAdd = generate_mulAdd();
    }
    if (VM_Version::supports_vbmi2()) {
      StubRoutines::_bigIntegerRightShiftWorker = generate_bigIntegerRightShift();
      StubRoutines::_bigIntegerLeftShiftWorker = generate_bigIntegerLeftShift();
    }
 #ifndef _WINDOWS
    if (UseMontgomeryMultiplyIntrinsic) {
      StubRoutines::_montgomeryMultiply
--- a/src/hotspot/cpu/x86/vm_version_x86.cpp
+++ b/src/hotspot/cpu/x86/vm_version_x86.cpp
@ -694,6 +694,7 @@ void VM_Version::get_processor_features() {
    _features &= ~CPU_AVX512_VPCLMULQDQ;
    _features &= ~CPU_VAES;
    _features &= ~CPU_VNNI;
    _features &= ~CPU_VBMI2;
  }
  if (UseAVX < 2)
@ -716,7 +717,7 @@ void VM_Version::get_processor_features() {
  }
  char buf[256];
-  jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
+  jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
               cores_per_cpu(), threads_per_core(),
               cpu_family(), _model, _stepping,
               (supports_cmov() ? ", cmov" : ""),
@ -749,7 +750,10 @@ void VM_Version::get_processor_features() {
               (supports_adx() ? ", adx" : ""),
               (supports_evex() ? ", evex" : ""),
               (supports_sha() ? ", sha" : ""),
-               (supports_fma() ? ", fma" : ""));
+               (supports_fma() ? ", fma" : ""),
               (supports_vbmi2() ? ", vbmi2" : ""),
               (supports_vaes() ? ", vaes" : ""),
               (supports_vnni() ? ", vnni" : ""));
  _features_string = os::strdup(buf);
  // UseSSE is set to the smaller of what hardware supports and what
--- a/src/hotspot/cpu/x86/vm_version_x86.hpp
+++ b/src/hotspot/cpu/x86/vm_version_x86.hpp
@ -345,6 +345,8 @@ protected:
 #define CPU_FLUSH ((uint64_t)UCONST64(0x20000000000))  // flush instruction
 #define CPU_FLUSHOPT ((uint64_t)UCONST64(0x40000000000)) // flushopt instruction
 #define CPU_CLWB ((uint64_t)UCONST64(0x80000000000))   // clwb instruction
 #define CPU_VBMI2 ((uint64_t)UCONST64(0x100000000000))   // VBMI2 shift left double instructions
 enum Extended_Family {
    // AMD
@ -567,6 +569,8 @@ enum Extended_Family {
          result |= CPU_VAES;
        if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vnni != 0)
          result |= CPU_VNNI;
        if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vbmi2 != 0)
          result |= CPU_VBMI2;
      }
    }
    if (_cpuid_info.sef_cpuid7_ebx.bits.bmi1 != 0)
@ -858,6 +862,7 @@ public:
  static bool supports_avx512_vpclmulqdq() { return (_features & CPU_AVX512_VPCLMULQDQ) != 0; }
  static bool supports_vaes()       { return (_features & CPU_VAES) != 0; }
  static bool supports_vnni()       { return (_features & CPU_VNNI) != 0; }
  static bool supports_vbmi2()      { return (_features & CPU_VBMI2) != 0; }
  // Intel features
  static bool is_intel_family_core() { return is_intel() &&
--- a/src/hotspot/share/aot/aotCodeHeap.cpp
+++ b/src/hotspot/share/aot/aotCodeHeap.cpp
@ -555,6 +555,8 @@ void AOTCodeHeap::link_stub_routines_symbols() {
    SET_AOT_GLOBAL_SYMBOL_VALUE("_aot_stub_routines_montgomeryMultiply",  address, StubRoutines::_montgomeryMultiply);
    SET_AOT_GLOBAL_SYMBOL_VALUE("_aot_stub_routines_montgomerySquare", address, StubRoutines::_montgomerySquare);
    SET_AOT_GLOBAL_SYMBOL_VALUE("_aot_stub_routines_vectorizedMismatch", address, StubRoutines::_vectorizedMismatch);
    SET_AOT_GLOBAL_SYMBOL_VALUE("_aot_stub_routines_bigIntegerRightShiftWorker", address, StubRoutines::_bigIntegerRightShiftWorker);
    SET_AOT_GLOBAL_SYMBOL_VALUE("_aot_stub_routines_bigIntegerLeftShiftWorker", address, StubRoutines::_bigIntegerLeftShiftWorker);
    SET_AOT_GLOBAL_SYMBOL_VALUE("_aot_stub_routines_throw_delayed_StackOverflowError_entry", address, StubRoutines::_throw_delayed_StackOverflowError_entry);
--- a/src/hotspot/share/classfile/vmSymbols.cpp
+++ b/src/hotspot/share/classfile/vmSymbols.cpp
@ -837,6 +837,9 @@ bool vmIntrinsics::is_disabled_by_flags(vmIntrinsics::ID id) {
  case vmIntrinsics::_montgomerySquare:
    if (!UseMontgomerySquareIntrinsic) return true;
    break;
  case vmIntrinsics::_bigIntegerRightShiftWorker:
  case vmIntrinsics::_bigIntegerLeftShiftWorker:
    break;
  case vmIntrinsics::_addExactI:
  case vmIntrinsics::_addExactL:
  case vmIntrinsics::_decrementExactI:
--- a/src/hotspot/share/classfile/vmSymbols.hpp
+++ b/src/hotspot/share/classfile/vmSymbols.hpp
@ -565,6 +565,7 @@
  template(char_StringBuffer_signature,               "(C)Ljava/lang/StringBuffer;")                              \
  template(int_String_signature,                      "(I)Ljava/lang/String;")                                    \
  template(boolean_boolean_int_signature,             "(ZZ)I")                                                    \
  template(big_integer_shift_worker_signature,        "([I[IIII)V")                                               \
  template(reflect_method_signature,                  "Ljava/lang/reflect/Method;")                                                    \
  /* signature symbols needed by intrinsics */                                                                    \
  VM_INTRINSICS_DO(VM_INTRINSIC_IGNORE, VM_SYMBOL_IGNORE, VM_SYMBOL_IGNORE, template, VM_ALIAS_IGNORE)            \
@ -1007,6 +1008,12 @@
   do_name(     montgomerySquare_name,                             "implMontgomerySquare")                              \
   do_signature(montgomerySquare_signature,                        "([I[IIJ[I)[I")                                      \
                                                                                                                        \
  do_intrinsic(_bigIntegerRightShiftWorker, java_math_BigInteger, rightShift_name, big_integer_shift_worker_signature, F_S) \
   do_name(     rightShift_name,                                 "shiftRightImplWorker")                                \
                                                                                                                        \
  do_intrinsic(_bigIntegerLeftShiftWorker, java_math_BigInteger, leftShift_name, big_integer_shift_worker_signature, F_S) \
   do_name(     leftShift_name,                                 "shiftLeftImplWorker")                                  \
                                                                                                                        \
  do_class(jdk_internal_util_ArraysSupport, "jdk/internal/util/ArraysSupport")                                                          \
  do_intrinsic(_vectorizedMismatch, jdk_internal_util_ArraysSupport, vectorizedMismatch_name, vectorizedMismatch_signature, F_S)\
   do_name(vectorizedMismatch_name, "vectorizedMismatch")                                                               \
--- a/src/hotspot/share/jvmci/vmStructs_jvmci.cpp
+++ b/src/hotspot/share/jvmci/vmStructs_jvmci.cpp
@ -322,6 +322,8 @@
  static_field(StubRoutines,                _montgomeryMultiply,                              address)                               \
  static_field(StubRoutines,                _montgomerySquare,                                address)                               \
  static_field(StubRoutines,                _vectorizedMismatch,                              address)                               \
  static_field(StubRoutines,                _bigIntegerRightShiftWorker,                      address)                               \
  static_field(StubRoutines,                _bigIntegerLeftShiftWorker,                       address)                               \
                                                                                                                                     \
  nonstatic_field(Thread,                   _tlab,                                            ThreadLocalAllocBuffer)                \
  nonstatic_field(Thread,                   _allocated_bytes,                                 jlong)                                 \
--- a/src/hotspot/share/opto/c2compiler.cpp
+++ b/src/hotspot/share/opto/c2compiler.cpp
@ -628,6 +628,8 @@ bool C2Compiler::is_intrinsic_supported(const methodHandle& method, bool is_virt
  case vmIntrinsics::_mulAdd:
  case vmIntrinsics::_montgomeryMultiply:
  case vmIntrinsics::_montgomerySquare:
  case vmIntrinsics::_bigIntegerRightShiftWorker:
  case vmIntrinsics::_bigIntegerLeftShiftWorker:
  case vmIntrinsics::_vectorizedMismatch:
  case vmIntrinsics::_ghash_processBlocks:
  case vmIntrinsics::_base64_encodeBlock:
--- a/src/hotspot/share/opto/escape.cpp
+++ b/src/hotspot/share/opto/escape.cpp
@ -1006,6 +1006,8 @@ void ConnectionGraph::process_call_arguments(CallNode *call) {
                  strcmp(call->as_CallLeaf()->_name, "mulAdd") == 0 ||
                  strcmp(call->as_CallLeaf()->_name, "montgomery_multiply") == 0 ||
                  strcmp(call->as_CallLeaf()->_name, "montgomery_square") == 0 ||
                  strcmp(call->as_CallLeaf()->_name, "bigIntegerRightShiftWorker") == 0 ||
                  strcmp(call->as_CallLeaf()->_name, "bigIntegerLeftShiftWorker") == 0 ||
                  strcmp(call->as_CallLeaf()->_name, "vectorizedMismatch") == 0)
                 ))) {
            call->dump();
--- a/src/hotspot/share/opto/library_call.cpp
+++ b/src/hotspot/share/opto/library_call.cpp
@ -327,6 +327,7 @@ class LibraryCallKit : public GraphKit {
  bool inline_mulAdd();
  bool inline_montgomeryMultiply();
  bool inline_montgomerySquare();
  bool inline_bigIntegerShift(bool isRightShift);
  bool inline_vectorizedMismatch();
  bool inline_fma(vmIntrinsics::ID id);
  bool inline_character_compare(vmIntrinsics::ID id);
@ -845,6 +846,11 @@ bool LibraryCallKit::try_to_inline(int predicate) {
  case vmIntrinsics::_montgomerySquare:
    return inline_montgomerySquare();
  case vmIntrinsics::_bigIntegerRightShiftWorker:
    return inline_bigIntegerShift(true);
  case vmIntrinsics::_bigIntegerLeftShiftWorker:
    return inline_bigIntegerShift(false);
  case vmIntrinsics::_vectorizedMismatch:
    return inline_vectorizedMismatch();
@ -5253,6 +5259,60 @@ bool LibraryCallKit::inline_montgomerySquare() {
  return true;
 }
 bool LibraryCallKit::inline_bigIntegerShift(bool isRightShift) {
  address stubAddr = NULL;
  const char* stubName = NULL;
  stubAddr = isRightShift? StubRoutines::bigIntegerRightShift(): StubRoutines::bigIntegerLeftShift();
  if (stubAddr == NULL) {
    return false; // Intrinsic's stub is not implemented on this platform
  }
  stubName = isRightShift? "bigIntegerRightShiftWorker" : "bigIntegerLeftShiftWorker";
  assert(callee()->signature()->size() == 5, "expected 5 arguments");
  Node* newArr = argument(0);
  Node* oldArr = argument(1);
  Node* newIdx = argument(2);
  Node* shiftCount = argument(3);
  Node* numIter = argument(4);
  const Type* newArr_type = newArr->Value(&_gvn);
  const TypeAryPtr* top_newArr = newArr_type->isa_aryptr();
  const Type* oldArr_type = oldArr->Value(&_gvn);
  const TypeAryPtr* top_oldArr = oldArr_type->isa_aryptr();
  if (top_newArr == NULL || top_newArr->klass() == NULL || top_oldArr == NULL
      || top_oldArr->klass() == NULL) {
    return false;
  }
  BasicType newArr_elem = newArr_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type();
  BasicType oldArr_elem = oldArr_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type();
  if (newArr_elem != T_INT || oldArr_elem != T_INT) {
    return false;
  }
  // Make the call
  {
    Node* newArr_start = array_element_address(newArr, intcon(0), newArr_elem);
    Node* oldArr_start = array_element_address(oldArr, intcon(0), oldArr_elem);
    Node* call = make_runtime_call(RC_LEAF,
                                   OptoRuntime::bigIntegerShift_Type(),
                                   stubAddr,
                                   stubName,
                                   TypePtr::BOTTOM,
                                   newArr_start,
                                   oldArr_start,
                                   newIdx,
                                   shiftCount,
                                   numIter);
  }
  return true;
 }
 //-------------inline_vectorizedMismatch------------------------------
 bool LibraryCallKit::inline_vectorizedMismatch() {
  assert(UseVectorizedMismatchIntrinsic, "not implementated on this platform");
--- a/src/hotspot/share/opto/runtime.cpp
+++ b/src/hotspot/share/opto/runtime.cpp
@ -1111,6 +1111,25 @@ const TypeFunc* OptoRuntime::montgomerySquare_Type() {
  return TypeFunc::make(domain, range);
 }
 const TypeFunc * OptoRuntime::bigIntegerShift_Type() {
  int argcnt = 5;
  const Type** fields = TypeTuple::fields(argcnt);
  int argp = TypeFunc::Parms;
  fields[argp++] = TypePtr::NOTNULL;    // newArr
  fields[argp++] = TypePtr::NOTNULL;    // oldArr
  fields[argp++] = TypeInt::INT;        // newIdx
  fields[argp++] = TypeInt::INT;        // shiftCount
  fields[argp++] = TypeInt::INT;        // numIter
  assert(argp == TypeFunc::Parms + argcnt, "correct decoding");
  const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms + argcnt, fields);
  // no result type needed
  fields = TypeTuple::fields(1);
  fields[TypeFunc::Parms + 0] = NULL;
  const TypeTuple* range = TypeTuple::make(TypeFunc::Parms, fields);
  return TypeFunc::make(domain, range);
 }
 const TypeFunc* OptoRuntime::vectorizedMismatch_Type() {
  // create input type (domain)
  int num_args = 4;
--- a/src/hotspot/share/opto/runtime.hpp
+++ b/src/hotspot/share/opto/runtime.hpp
@ -289,6 +289,8 @@ private:
  static const TypeFunc* mulAdd_Type();
  static const TypeFunc* bigIntegerShift_Type();
  static const TypeFunc* vectorizedMismatch_Type();
  static const TypeFunc* ghash_processBlocks_Type();
--- a/src/hotspot/share/runtime/stubRoutines.cpp
+++ b/src/hotspot/share/runtime/stubRoutines.cpp
@ -157,6 +157,8 @@ address StubRoutines::_squareToLen = NULL;
 address StubRoutines::_mulAdd = NULL;
 address StubRoutines::_montgomeryMultiply = NULL;
 address StubRoutines::_montgomerySquare = NULL;
 address StubRoutines::_bigIntegerRightShiftWorker = NULL;
 address StubRoutines::_bigIntegerLeftShiftWorker = NULL;
 address StubRoutines::_vectorizedMismatch = NULL;
--- a/src/hotspot/share/runtime/stubRoutines.hpp
+++ b/src/hotspot/share/runtime/stubRoutines.hpp
@ -239,6 +239,8 @@ class StubRoutines: AllStatic {
  static address _mulAdd;
  static address _montgomeryMultiply;
  static address _montgomerySquare;
  static address _bigIntegerRightShiftWorker;
  static address _bigIntegerLeftShiftWorker;
  static address _vectorizedMismatch;
@ -414,6 +416,8 @@ class StubRoutines: AllStatic {
  static address mulAdd()              { return _mulAdd; }
  static address montgomeryMultiply()  { return _montgomeryMultiply; }
  static address montgomerySquare()    { return _montgomerySquare; }
  static address bigIntegerRightShift() { return _bigIntegerRightShiftWorker; }
  static address bigIntegerLeftShift()  { return _bigIntegerLeftShiftWorker; }
  static address vectorizedMismatch()  { return _vectorizedMismatch; }
--- a/src/hotspot/share/runtime/vmStructs.cpp
+++ b/src/hotspot/share/runtime/vmStructs.cpp
@ -602,6 +602,8 @@ typedef HashtableEntry<InstanceKlass*, mtClass>  KlassHashtableEntry;
     static_field(StubRoutines,                _updateBytesCRC32C,                            address)                               \
     static_field(StubRoutines,                _multiplyToLen,                                address)                               \
     static_field(StubRoutines,                _squareToLen,                                  address)                               \
     static_field(StubRoutines,                _bigIntegerRightShiftWorker,                   address)                               \
     static_field(StubRoutines,                _bigIntegerLeftShiftWorker,                    address)                               \
     static_field(StubRoutines,                _mulAdd,                                       address)                               \
     static_field(StubRoutines,                _dexp,                                         address)                               \
     static_field(StubRoutines,                _dlog,                                         address)                               \
--- a/src/java.base/share/classes/java/math/BigInteger.java
+++ b/src/java.base/share/classes/java/math/BigInteger.java
@ -42,6 +42,7 @@ import jdk.internal.math.DoubleConsts;
 import jdk.internal.math.FloatConsts;
 import jdk.internal.HotSpotIntrinsicCandidate;
 import jdk.internal.vm.annotation.Stable;
 import jdk.internal.vm.annotation.ForceInline;
 /**
 * Immutable arbitrary-precision integers.  All operations behave as if
@ -2621,12 +2622,8 @@ public class BigInteger extends Number implements Comparable<BigInteger> {
    // shifts a up to len right n bits assumes no leading zeros, 0<n<32
    static void primitiveRightShift(int[] a, int len, int n) {
-        int n2 = 32 - n;
+        Objects.checkFromToIndex(0, len, a.length);
-        for (int i=len-1, c=a[i]; i > 0; i--) {
+        shiftRightImplWorker(a, a, 1, n, len-1);
            int b = c;
            c = a[i-1];
            a[i] = (c << n2) | (b >>> n);
        }
        a[0] >>>= n;
    }
@ -2634,13 +2631,8 @@ public class BigInteger extends Number implements Comparable<BigInteger> {
    static void primitiveLeftShift(int[] a, int len, int n) {
        if (len == 0 || n == 0)
            return;
-
+        Objects.checkFromToIndex(0, len, a.length);
-        int n2 = 32 - n;
+        shiftLeftImplWorker(a, a, 0, n, len-1);
        for (int i=0, c=a[i], m=i+len-1; i < m; i++) {
            int b = c;
            c = a[i+1];
            a[i] = (b << n) | (c >>> n2);
        }
        a[len-1] <<= n;
    }
@ -3353,14 +3345,25 @@ public class BigInteger extends Number implements Comparable<BigInteger> {
            } else {
                newMag = new int[magLen + nInts];
            }
-            int j=0;
+            int numIter = magLen - 1;
-            while (j < magLen-1)
+            Objects.checkFromToIndex(0, numIter + 1, mag.length);
-                newMag[i++] = mag[j++] << nBits | mag[j] >>> nBits2;
+            Objects.checkFromToIndex(i, numIter + i + 1, newMag.length);
-            newMag[i] = mag[j] << nBits;
+            shiftLeftImplWorker(newMag, mag, i, nBits, numIter);
            newMag[numIter + i] = mag[numIter] << nBits;
        }
        return newMag;
    }
    @ForceInline
    @HotSpotIntrinsicCandidate
    private static void shiftLeftImplWorker(int[] newArr, int[] oldArr, int newIdx, int shiftCount, int numIter) {
        int shiftCountRight = 32 - shiftCount;
        int oldIdx = 0;
        while (oldIdx < numIter) {
            newArr[newIdx++] = (oldArr[oldIdx++] << shiftCount) | (oldArr[oldIdx] >>> shiftCountRight);
        }
    }
    /**
     * Returns a BigInteger whose value is {@code (this >> n)}.  Sign
     * extension is performed.  The shift distance, {@code n}, may be
@ -3415,11 +3418,10 @@ public class BigInteger extends Number implements Comparable<BigInteger> {
            } else {
                newMag = new int[magLen - nInts -1];
            }
-
+            int numIter = magLen - nInts - 1;
-            int nBits2 = 32 - nBits;
+            Objects.checkFromToIndex(0, numIter + 1, mag.length);
-            int j=0;
+            Objects.checkFromToIndex(i, numIter + i, newMag.length);
-            while (j < magLen - nInts - 1)
+            shiftRightImplWorker(newMag, mag, i, nBits, numIter);
                newMag[i++] = (mag[j++] << nBits2) | (mag[j] >>> nBits);
        }
        if (signum < 0) {
@ -3437,6 +3439,17 @@ public class BigInteger extends Number implements Comparable<BigInteger> {
        return new BigInteger(newMag, signum);
    }
    @ForceInline
    @HotSpotIntrinsicCandidate
    private static void shiftRightImplWorker(int[] newArr, int[] oldArr, int newIdx, int shiftCount, int numIter) {
        int shiftCountLeft = 32 - shiftCount;
        int idx = numIter;
        int nidx = (newIdx == 0) ? numIter - 1 : numIter;
        while (nidx >= newIdx) {
            newArr[nidx--] = (oldArr[idx--] >>> shiftCount) | (oldArr[idx] << shiftCountLeft);
        }
    }
    int[] javaIncrement(int[] val) {
        int lastSum = 0;
        for (int i=val.length-1;  i >= 0 && lastSum == 0; i--)
--- a/src/jdk.aot/share/classes/jdk.tools.jaotc.binformat/src/jdk/tools/jaotc/binformat/BinaryContainer.java
+++ b/src/jdk.aot/share/classes/jdk.tools.jaotc.binformat/src/jdk/tools/jaotc/binformat/BinaryContainer.java
@ -229,6 +229,8 @@ public final class BinaryContainer implements SymbolTable {
        {"StubRoutines::_montgomeryMultiply", "_aot_stub_routines_montgomeryMultiply" },
        {"StubRoutines::_montgomerySquare", "_aot_stub_routines_montgomerySquare" },
        {"StubRoutines::_vectorizedMismatch", "_aot_stub_routines_vectorizedMismatch" },
        {"StubRoutines::_bigIntegerRightShiftWorker", "_aot_stub_routines_bigIntegerRightShiftWorker" },
        {"StubRoutines::_bigIntegerLeftShiftWorker", "_aot_stub_routines_bigIntegerLeftShiftWorker" },
        {"StubRoutines::_throw_delayed_StackOverflowError_entry", "_aot_stub_routines_throw_delayed_StackOverflowError_entry" },
--- a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot.test/src/org/graalvm/compiler/hotspot/test/CheckGraalIntrinsics.java
+++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot.test/src/org/graalvm/compiler/hotspot/test/CheckGraalIntrinsics.java
@ -416,7 +416,9 @@ public class CheckGraalIntrinsics extends GraalTest {
        if (isJDK14OrHigher()) {
            add(toBeInvestigated,
                            "com/sun/crypto/provider/ElectronicCodeBook.implECBDecrypt([BII[BI)I",
-                            "com/sun/crypto/provider/ElectronicCodeBook.implECBEncrypt([BII[BI)I");
+                            "com/sun/crypto/provider/ElectronicCodeBook.implECBEncrypt([BII[BI)I",
                            "java/math/BigInteger.shiftLeftImplWorker([I[IIII)V",
                            "java/math/BigInteger.shiftRightImplWorker([I[IIII)V");
        }
        if (!config.inlineNotify()) {
--- a/test/hotspot/jtreg/compiler/intrinsics/bigInteger/TestShift.java
+++ b/test/hotspot/jtreg/compiler/intrinsics/bigInteger/TestShift.java
@ -0,0 +1,130 @@
 /*
 * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 */
 /**
 * @test
 * @bug 8234692
 * @summary Add C2 x86 intrinsic for BigInteger::shiftLeft() and BigInteger::shiftRight() method
 * @requires vm.compiler2.enabled
 *
 * @run main/othervm/timeout=600 -XX:-TieredCompilation -Xbatch
 *      -XX:CompileCommand=exclude,compiler.intrinsics.bigInteger.TestShift::main
 *      -XX:CompileCommand=option,compiler.intrinsics.bigInteger.TestShift::base_left_shift,ccstr,DisableIntrinsic,_bigIntegerLeftShiftWorker
 *      -XX:CompileCommand=option,compiler.intrinsics.bigInteger.TestShift::base_right_shift,ccstr,DisableIntrinsic,_bigIntegerRightShiftWorker
 *      -XX:CompileCommand=inline,java.math.BigInteger::shiftLeft
 *      -XX:CompileCommand=inline,java.math.BigInteger::shiftRight
 *      compiler.intrinsics.bigInteger.TestShift
 *
 * @run main/othervm/timeout=600
 *      -XX:CompileCommand=exclude,compiler.intrinsics.bigInteger.TestShift::main
 *      -XX:CompileCommand=option,compiler.intrinsics.bigInteger.TestShift::base_left_shift,ccstr,DisableIntrinsic,_bigIntegerLeftShiftWorker
 *      -XX:CompileCommand=option,compiler.intrinsics.bigInteger.TestShift::base_right_shift,ccstr,DisableIntrinsic,_bigIntegerRightShiftWorker
 *      -XX:CompileCommand=inline,java.math.BigInteger::shiftLeft
 *      -XX:CompileCommand=inline,java.math.BigInteger::shiftRight
 *      compiler.intrinsics.bigInteger.TestShift
 *
 */
 package compiler.intrinsics.bigInteger;
 import java.math.BigInteger;
 import java.util.Arrays;
 import java.util.Random;
 public class TestShift {
    public static BigInteger base_left_shift(BigInteger op1, int shift) {
      return op1.shiftLeft(shift);
    }
    public static BigInteger new_left_shift(BigInteger op1, int shift) {
      return op1.shiftLeft(shift);
    }
    public static BigInteger base_right_shift(BigInteger op1, int shift) {
      return op1.shiftRight(shift);
    }
    public static BigInteger new_right_shift(BigInteger op1, int shift) {
      return op1.shiftRight(shift);
    }
    public static boolean bytecompare(BigInteger b1, BigInteger b2) {
      byte[] data1 = b1.toByteArray();
      byte[] data2 = b2.toByteArray();
      if (data1.length != data2.length)
        return false;
      for (int i = 0; i < data1.length; i++) {
        if (data1[i] != data2[i])
          return false;
      }
      return true;
    }
    public static String stringify(BigInteger b) {
      String strout= "";
      byte [] data = b.toByteArray();
      for (int i = 0; i < data.length; i++) {
        strout += (String.format("%02x",data[i]) + " ");
      }
      return strout;
    }
    public static void main(String args[]) throws Exception {
      BigInteger [] inputbuffer = new BigInteger[10];
      BigInteger [] oldLeftShiftResult = new BigInteger[10];
      BigInteger [] newLeftShiftResult = new BigInteger[10];
      BigInteger [] oldRightShiftResult = new BigInteger[10];
      BigInteger [] newRightShiftResult = new BigInteger[10];
      Random rand = new Random();
      long seed = System.nanoTime();
      rand.setSeed(seed);
      int shiftCount = rand.nextInt(30) + 1;
      for(int i = 0; i < inputbuffer.length; i++) {
        int numbits = rand.nextInt(4096)+32;
        inputbuffer[i] = new BigInteger(numbits, rand);
      }
      for (int j = 0; j < 100000; j++) {
        for(int i = 0; i < inputbuffer.length; i++) {
           oldLeftShiftResult[i] = base_left_shift(inputbuffer[i], shiftCount);
           newLeftShiftResult[i] = new_left_shift(inputbuffer[i], shiftCount);
           if (!bytecompare(oldLeftShiftResult[i], newLeftShiftResult[i])) {
            System.out.println("mismatch for input:" + stringify(inputbuffer[i]) + "\n" + "expected left shift result:" + stringify(oldLeftShiftResult[i]) + "\n" +
                               "calculated left shift result:" + stringify(newLeftShiftResult[i]));
            throw new Exception("Failed");
          }
          oldRightShiftResult[i] = base_right_shift(inputbuffer[i], shiftCount);
          newRightShiftResult[i] = new_right_shift(inputbuffer[i], shiftCount);
          if (!bytecompare(oldRightShiftResult[i], newRightShiftResult[i])) {
            System.out.println("mismatch for input:" + stringify(inputbuffer[i]) + "\n" + "expected right shift result:" + stringify(oldRightShiftResult[i]) + "\n" +
                               "calculated right shift result:" + stringify(newRightShiftResult[i]));
            throw new Exception("Failed");
          }
        }
      }
    }
 }
--- a/test/micro/org/openjdk/bench/java/math/BigIntegers.java
+++ b/test/micro/org/openjdk/bench/java/math/BigIntegers.java
@ -45,7 +45,7 @@ import java.util.concurrent.TimeUnit;
@State(Scope.Thread)
 public class BigIntegers {
-    private BigInteger[] hugeArray, largeArray, smallArray;
+    private BigInteger[] hugeArray, largeArray, smallArray, shiftArray;
    public String[] dummyStringArray;
    public Object[] dummyArr;
    private static final int TESTSIZE = 1000;
@ -53,6 +53,7 @@ public class BigIntegers {
    @Setup
    public void setup() {
        Random r = new Random(1123);
        int numbits = r.nextInt(16384);
        hugeArray = new BigInteger[TESTSIZE]; /*
         * Huge numbers larger than
@ -67,6 +68,10 @@ public class BigIntegers {
         * Small number less than
         * MAX_INT
         */
        shiftArray = new BigInteger[TESTSIZE]; /*
         * Each array entry is atmost 16k bits
         * in size
         */
        dummyStringArray = new String[TESTSIZE];
        dummyArr = new Object[TESTSIZE];
@ -78,6 +83,7 @@ public class BigIntegers {
                    + ((long) value + (long) Integer.MAX_VALUE));
            largeArray[i] = new BigInteger("" + ((long) value + (long) Integer.MAX_VALUE));
            smallArray[i] = new BigInteger("" + ((long) value / 1000));
            shiftArray[i] = new BigInteger(numbits, r);
        }
    }
@ -137,4 +143,38 @@ public class BigIntegers {
        }
        bh.consume(tmp);
    }
    /** Invokes the shiftLeft method of BigInteger with different values. */
    @Benchmark
    @OperationsPerInvocation(TESTSIZE)
    public void testLeftShift(Blackhole bh) {
        Random rand = new Random();
        int shift = rand.nextInt(30) + 1;
        BigInteger tmp = null;
        for (BigInteger s : shiftArray) {
            if (tmp == null) {
                tmp = s;
                continue;
            }
            tmp = tmp.shiftLeft(shift);
        }
        bh.consume(tmp);
    }
    /** Invokes the shiftRight method of BigInteger with different values. */
    @Benchmark
    @OperationsPerInvocation(TESTSIZE)
    public void testRightShift(Blackhole bh) {
        Random rand = new Random();
        int shift = rand.nextInt(30) + 1;
        BigInteger tmp = null;
        for (BigInteger s : shiftArray) {
            if (tmp == null) {
                tmp = s;
                continue;
            }
            tmp = tmp.shiftRight(shift);
        }
        bh.consume(tmp);
    }
 }