8223347: Integration of Vector API (Incubator)

Co-authored-by: Vivek Deshpande <vdeshpande@openjdk.org>
Co-authored-by: Qi Feng <qfeng@openjdk.org>
Co-authored-by: Ian Graves <igraves@openjdk.org>
Co-authored-by: Jean-Philippe Halimi <jphalimi@openjdk.org>
Co-authored-by: Vladimir Ivanov <vlivanov@openjdk.org>
Co-authored-by: Ningsheng Jian <njian@openjdk.org>
Co-authored-by: Razvan Lupusoru <rlupusoru@openjdk.org>
Co-authored-by: Smita Kamath <svkamath@openjdk.org>
Co-authored-by: Rahul Kandu <rkandu@openjdk.org>
Co-authored-by: Kishor Kharbas <kkharbas@openjdk.org>
Co-authored-by: Eric Liu <Eric.Liu2@arm.com>
Co-authored-by: Aaloan Miftah <someusername3@gmail.com>
Co-authored-by: John R Rose <jrose@openjdk.org>
Co-authored-by: Shravya Rukmannagari <srukmannagar@openjdk.org>
Co-authored-by: Paul Sandoz <psandoz@openjdk.org>
Co-authored-by: Sandhya Viswanathan <sviswanathan@openjdk.org>
Co-authored-by: Lauren Walkowski <lauren.walkowski@arm.com>
Co-authored-by: Yang Zang <Yang.Zhang@arm.com>
Co-authored-by: Joshua Zhu <jzhu@openjdk.org>
Co-authored-by: Wang Zhuo <wzhuo@openjdk.org>
Co-authored-by: Jatin Bhateja <jbhateja@openjdk.org>
Reviewed-by: erikj, chegar, kvn, darcy, forax, briangoetz, aph, epavlova, coleenp
This commit is contained in:
Paul Sandoz 2020-10-14 20:02:46 +00:00
parent 386e7e8b73
commit 0c99b19258
336 changed files with 293978 additions and 2083 deletions

View file

@ -59,6 +59,7 @@ BOOT_MODULES += \
java.security.sasl \
java.xml \
jdk.incubator.foreign \
jdk.incubator.vector \
jdk.internal.vm.ci \
jdk.jfr \
jdk.management \
@ -145,6 +146,7 @@ DOCS_MODULES += \
jdk.hotspot.agent \
jdk.httpserver \
jdk.incubator.jpackage \
jdk.incubator.vector \
jdk.jartool \
jdk.javadoc \
jdk.jcmd \

View file

@ -138,6 +138,7 @@ ifeq ($(call check-jvm-feature, compiler2), true)
ifeq ($(HOTSPOT_TARGET_CPU_ARCH), aarch64)
AD_SRC_FILES += $(call uniq, $(wildcard $(foreach d, $(AD_SRC_ROOTS), \
$d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/$(HOTSPOT_TARGET_CPU_ARCH)_neon.ad \
$d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/$(HOTSPOT_TARGET_CPU_ARCH)_sve.ad \
)))
endif

View file

@ -106,7 +106,7 @@ public class Spp {
static final String LNSEP = System.getProperty("line.separator");
static final String KEY = "([a-zA-Z0-9]+)";
static final String VAR = "([a-zA-Z0-9_\\-]+)";
static final String TEXT = "([a-zA-Z0-9&;,.<>/#() \\?\\[\\]\\$]+)"; // $ -- hack embedded $var$
static final String TEXT = "([\\p{Print}&&[^{#:}]]+)";
static final int GN_NOT = 1;
static final int GN_KEY = 2;
@ -140,6 +140,10 @@ public class Spp {
}
}
}
if (repl == null) {
System.err.println("Error: undefined variable in line " + ln);
System.exit(-1);
}
vardef.appendReplacement(buf, repl);
}
vardef.appendTail(buf);

View file

@ -1,4 +1,7 @@
import os
import random
import subprocess
import sys
AARCH64_AS = "as"
AARCH64_OBJDUMP = "objdump"
@ -129,6 +132,8 @@ class OperandFactory:
_modes = {'x' : GeneralRegister,
'w' : GeneralRegister,
'b' : FloatRegister,
'h' : FloatRegister,
's' : FloatRegister,
'd' : FloatRegister,
'z' : FloatZero,
@ -198,16 +203,16 @@ class InstructionWithModes(Instruction):
self.isFloat = (mode == 'd') | (mode == 's')
if self.isFloat:
self.isWord = mode != 'd'
self.asmRegPrefix = ["d", "s"][self.isWord]
self.asmRegPrefix = ["d", "s"][self.isWord]
else:
self.isWord = mode != 'x'
self.asmRegPrefix = ["x", "w"][self.isWord]
def name(self):
return self._name + (self.mode if self.mode != 'x' else '')
def aname(self):
return (self._name+mode if (mode == 'b' or mode == 'h')
return (self._name+mode if (mode == 'b' or mode == 'h')
else self._name)
class ThreeRegInstruction(Instruction):
@ -220,17 +225,17 @@ class ThreeRegInstruction(Instruction):
def cstr(self):
return (super(ThreeRegInstruction, self).cstr()
+ ('%s, %s, %s'
+ ('%s, %s, %s'
% (self.reg[0],
self.reg[1], self.reg[2])))
def astr(self):
prefix = self.asmRegPrefix
return (super(ThreeRegInstruction, self).astr()
+ ('%s, %s, %s'
+ ('%s, %s, %s'
% (self.reg[0].astr(prefix),
self.reg[1].astr(prefix), self.reg[2].astr(prefix))))
class FourRegInstruction(ThreeRegInstruction):
def generate(self):
@ -241,12 +246,12 @@ class FourRegInstruction(ThreeRegInstruction):
def cstr(self):
return (super(FourRegInstruction, self).cstr()
+ (', %s' % self.reg[3]))
def astr(self):
prefix = self.asmRegPrefix
return (super(FourRegInstruction, self).astr()
+ (', %s' % self.reg[3].astr(prefix)))
class TwoRegInstruction(Instruction):
def generate(self):
@ -261,17 +266,17 @@ class TwoRegInstruction(Instruction):
def astr(self):
prefix = self.asmRegPrefix
return (super(TwoRegInstruction, self).astr()
+ ('%s, %s'
+ ('%s, %s'
% (self.reg[0].astr(prefix),
self.reg[1].astr(prefix))))
class TwoRegImmedInstruction(TwoRegInstruction):
def generate(self):
super(TwoRegImmedInstruction, self).generate()
self.immed = random.randint(0, 1<<11 -1)
return self
def cstr(self):
return (super(TwoRegImmedInstruction, self).cstr()
+ ', %su' % self.immed)
@ -301,9 +306,9 @@ class ArithOp(ThreeRegInstruction):
self.kind = ShiftKind().generate()
self.distance = random.randint(0, (1<<5)-1 if self.isWord else (1<<6)-1)
return self
def cstr(self):
return ('%s, Assembler::%s, %s);'
return ('%s, Assembler::%s, %s);'
% (ThreeRegInstruction.cstr(self),
self.kind.cstr(), self.distance))
@ -314,9 +319,9 @@ class ArithOp(ThreeRegInstruction):
self.distance))
class AddSubCarryOp(ThreeRegInstruction):
def cstr(self):
return ('%s);'
return ('%s);'
% (ThreeRegInstruction.cstr(self)))
class AddSubExtendedOp(ThreeRegInstruction):
@ -332,76 +337,75 @@ class AddSubExtendedOp(ThreeRegInstruction):
def cstr(self):
return (super(AddSubExtendedOp, self).cstr()
+ (", ext::" + AddSubExtendedOp.optNames[self.option]
+ (", ext::" + AddSubExtendedOp.optNames[self.option]
+ ", " + str(self.amount) + ");"))
def astr(self):
return (super(AddSubExtendedOp, self).astr()
+ (", " + AddSubExtendedOp.optNames[self.option]
+ (", " + AddSubExtendedOp.optNames[self.option]
+ " #" + str(self.amount)))
class AddSubImmOp(TwoRegImmedInstruction):
def cstr(self):
return super(AddSubImmOp, self).cstr() + ");"
class LogicalImmOp(AddSubImmOp):
# These tables are legal immediate logical operands
immediates32 \
= [0x1, 0x3f, 0x1f0, 0x7e0,
0x1c00, 0x3ff0, 0x8000, 0x1e000,
0x3e000, 0x78000, 0xe0000, 0x100000,
0x1fffe0, 0x3fe000, 0x780000, 0x7ffff8,
0xff8000, 0x1800180, 0x1fffc00, 0x3c003c0,
0x3ffff00, 0x7c00000, 0x7fffe00, 0xf000f00,
0xfffe000, 0x18181818, 0x1ffc0000, 0x1ffffffe,
0x3f003f00, 0x3fffe000, 0x60006000, 0x7f807f80,
0x7ffffc00, 0x800001ff, 0x803fffff, 0x9f9f9f9f,
0xc0000fff, 0xc0c0c0c0, 0xe0000000, 0xe003e003,
0xe3ffffff, 0xf0000fff, 0xf0f0f0f0, 0xf80000ff,
0xf83ff83f, 0xfc00007f, 0xfc1fffff, 0xfe0001ff,
0xfe3fffff, 0xff003fff, 0xff800003, 0xff87ff87,
0xffc00fff, 0xffe0000f, 0xffefffef, 0xfff1fff1,
0xfff83fff, 0xfffc0fff, 0xfffe0fff, 0xffff3fff,
0xffffc007, 0xffffe1ff, 0xfffff80f, 0xfffffe07,
= [0x1, 0x3f, 0x1f0, 0x7e0,
0x1c00, 0x3ff0, 0x8000, 0x1e000,
0x3e000, 0x78000, 0xe0000, 0x100000,
0x1fffe0, 0x3fe000, 0x780000, 0x7ffff8,
0xff8000, 0x1800180, 0x1fffc00, 0x3c003c0,
0x3ffff00, 0x7c00000, 0x7fffe00, 0xf000f00,
0xfffe000, 0x18181818, 0x1ffc0000, 0x1ffffffe,
0x3f003f00, 0x3fffe000, 0x60006000, 0x7f807f80,
0x7ffffc00, 0x800001ff, 0x803fffff, 0x9f9f9f9f,
0xc0000fff, 0xc0c0c0c0, 0xe0000000, 0xe003e003,
0xe3ffffff, 0xf0000fff, 0xf0f0f0f0, 0xf80000ff,
0xf83ff83f, 0xfc00007f, 0xfc1fffff, 0xfe0001ff,
0xfe3fffff, 0xff003fff, 0xff800003, 0xff87ff87,
0xffc00fff, 0xffe0000f, 0xffefffef, 0xfff1fff1,
0xfff83fff, 0xfffc0fff, 0xfffe0fff, 0xffff3fff,
0xffffc007, 0xffffe1ff, 0xfffff80f, 0xfffffe07,
0xffffffbf, 0xfffffffd]
immediates \
= [0x1, 0x1f80, 0x3fff0, 0x3ffffc,
0x3fe0000, 0x1ffc0000, 0xf8000000, 0x3ffffc000,
0xffffffe00, 0x3ffffff800, 0xffffc00000, 0x3f000000000,
0x7fffffff800, 0x1fe000001fe0, 0x3ffffff80000, 0xc00000000000,
0x1ffc000000000, 0x3ffff0003ffff, 0x7ffffffe00000, 0xfffffffffc000,
0x1ffffffffffc00, 0x3fffffffffff00, 0x7ffffffffffc00, 0xffffffffff8000,
0x1ffffffff800000, 0x3fffffc03fffffc, 0x7fffc0000000000, 0xff80ff80ff80ff8,
0x1c00000000000000, 0x1fffffffffff0000, 0x3fffff803fffff80, 0x7fc000007fc00000,
0x8000000000000000, 0x803fffff803fffff, 0xc000007fc000007f, 0xe00000000000ffff,
0xe3ffffffffffffff, 0xf007f007f007f007, 0xf80003ffffffffff, 0xfc000003fc000003,
0xfe000000007fffff, 0xff00000000007fff, 0xff800000000003ff, 0xffc00000000000ff,
0xffe00000000003ff, 0xfff0000000003fff, 0xfff80000001fffff, 0xfffc0000fffc0000,
0xfffe003fffffffff, 0xffff3fffffffffff, 0xffffc0000007ffff, 0xffffe01fffffe01f,
0xfffff800000007ff, 0xfffffc0fffffffff, 0xffffff00003fffff, 0xffffffc0000007ff,
0xfffffff0000001ff, 0xfffffffc00003fff, 0xffffffff07ffffff, 0xffffffffe003ffff,
= [0x1, 0x1f80, 0x3fff0, 0x3ffffc,
0x3fe0000, 0x1ffc0000, 0xf8000000, 0x3ffffc000,
0xffffffe00, 0x3ffffff800, 0xffffc00000, 0x3f000000000,
0x7fffffff800, 0x1fe000001fe0, 0x3ffffff80000, 0xc00000000000,
0x1ffc000000000, 0x3ffff0003ffff, 0x7ffffffe00000, 0xfffffffffc000,
0x1ffffffffffc00, 0x3fffffffffff00, 0x7ffffffffffc00, 0xffffffffff8000,
0x1ffffffff800000, 0x3fffffc03fffffc, 0x7fffc0000000000, 0xff80ff80ff80ff8,
0x1c00000000000000, 0x1fffffffffff0000, 0x3fffff803fffff80, 0x7fc000007fc00000,
0x8000000000000000, 0x803fffff803fffff, 0xc000007fc000007f, 0xe00000000000ffff,
0xe3ffffffffffffff, 0xf007f007f007f007, 0xf80003ffffffffff, 0xfc000003fc000003,
0xfe000000007fffff, 0xff00000000007fff, 0xff800000000003ff, 0xffc00000000000ff,
0xffe00000000003ff, 0xfff0000000003fff, 0xfff80000001fffff, 0xfffc0000fffc0000,
0xfffe003fffffffff, 0xffff3fffffffffff, 0xffffc0000007ffff, 0xffffe01fffffe01f,
0xfffff800000007ff, 0xfffffc0fffffffff, 0xffffff00003fffff, 0xffffffc0000007ff,
0xfffffff0000001ff, 0xfffffffc00003fff, 0xffffffff07ffffff, 0xffffffffe003ffff,
0xfffffffffc01ffff, 0xffffffffffc00003, 0xfffffffffffc000f, 0xffffffffffffe07f]
def generate(self):
AddSubImmOp.generate(self)
self.immed = \
self.immediates32[random.randint(0, len(self.immediates32)-1)] \
if self.isWord \
else \
self.immediates[random.randint(0, len(self.immediates)-1)]
if self.isWord else \
self.immediates[random.randint(0, len(self.immediates)-1)]
return self
def astr(self):
return (super(TwoRegImmedInstruction, self).astr()
+ ', #0x%x' % self.immed)
def cstr(self):
return super(AddSubImmOp, self).cstr() + "ll);"
class MultiOp():
def multipleForms(self):
@ -422,9 +426,9 @@ class AbsOp(MultiOp, Instruction):
return Instruction.astr(self) + "%s"
class RegAndAbsOp(MultiOp, Instruction):
def multipleForms(self):
if self.name() == "adrp":
if self.name() == "adrp":
# We can only test one form of adrp because anything other
# than "adrp ." requires relocs in the assembler output
return 1
@ -434,11 +438,11 @@ class RegAndAbsOp(MultiOp, Instruction):
Instruction.generate(self)
self.reg = GeneralRegister().generate()
return self
def cstr(self):
if self.name() == "adrp":
return "__ _adrp(" + "%s, %s);" % (self.reg, "%s")
return (super(RegAndAbsOp, self).cstr()
return (super(RegAndAbsOp, self).cstr()
+ "%s, %s);" % (self.reg, "%s"))
def astr(self):
@ -446,14 +450,14 @@ class RegAndAbsOp(MultiOp, Instruction):
+ self.reg.astr(self.asmRegPrefix) + ", %s")
class RegImmAbsOp(RegAndAbsOp):
def cstr(self):
return (Instruction.cstr(self)
+ "%s, %s, %s);" % (self.reg, self.immed, "%s"))
def astr(self):
return (Instruction.astr(self)
+ ("%s, #%s, %s"
+ ("%s, #%s, %s"
% (self.reg.astr(self.asmRegPrefix), self.immed, "%s")))
def generate(self):
@ -462,7 +466,7 @@ class RegImmAbsOp(RegAndAbsOp):
return self
class MoveWideImmOp(RegImmAbsOp):
def multipleForms(self):
return 0
@ -472,8 +476,8 @@ class MoveWideImmOp(RegImmAbsOp):
def astr(self):
return (Instruction.astr(self)
+ ("%s, #%s, lsl %s"
% (self.reg.astr(self.asmRegPrefix),
+ ("%s, #%s, lsl %s"
% (self.reg.astr(self.asmRegPrefix),
self.immed, self.shift)))
def generate(self):
@ -486,7 +490,7 @@ class MoveWideImmOp(RegImmAbsOp):
return self
class BitfieldOp(TwoRegInstruction):
def cstr(self):
return (Instruction.cstr(self)
+ ("%s, %s, %s, %s);"
@ -513,16 +517,16 @@ class ExtractOp(ThreeRegInstruction):
def cstr(self):
return (ThreeRegInstruction.cstr(self)
+ (", %s);" % self.lsb))
def astr(self):
return (ThreeRegInstruction.astr(self)
+ (", #%s" % self.lsb))
class CondBranchOp(MultiOp, Instruction):
def cstr(self):
return "__ br(Assembler::" + self.name() + ", %s);"
def astr(self):
return "b." + self.name() + "\t%s"
@ -530,10 +534,10 @@ class ImmOp(Instruction):
def cstr(self):
return "%s%s);" % (Instruction.cstr(self), self.immed)
def astr(self):
return Instruction.astr(self) + "#" + str(self.immed)
def generate(self):
self.immed = random.randint(0, 1<<16 -1)
return self
@ -542,6 +546,8 @@ class Op(Instruction):
def cstr(self):
return Instruction.cstr(self) + ");"
def astr(self):
return self.aname();
class SystemOp(Instruction):
@ -573,11 +579,11 @@ class ConditionalCompareOp(TwoRegImmedInstruction):
return self
def cstr(self):
return (super(ConditionalCompareOp, self).cstr() + ", "
return (super(ConditionalCompareOp, self).cstr() + ", "
+ "Assembler::" + conditionCodes[self.cond] + ");")
def astr(self):
return (super(ConditionalCompareOp, self).astr() +
return (super(ConditionalCompareOp, self).astr() +
", " + conditionCodes[self.cond])
class ConditionalCompareImmedOp(Instruction):
@ -596,33 +602,33 @@ class ConditionalCompareImmedOp(Instruction):
+ "Assembler::" + conditionCodes[self.cond] + ");")
def astr(self):
return (Instruction.astr(self)
+ self.reg.astr(self.asmRegPrefix)
return (Instruction.astr(self)
+ self.reg.astr(self.asmRegPrefix)
+ ", #" + str(self.immed)
+ ", #" + str(self.immed2)
+ ", " + conditionCodes[self.cond])
class TwoRegOp(TwoRegInstruction):
def cstr(self):
return TwoRegInstruction.cstr(self) + ");"
class ThreeRegOp(ThreeRegInstruction):
def cstr(self):
return ThreeRegInstruction.cstr(self) + ");"
class FourRegMulOp(FourRegInstruction):
def cstr(self):
return FourRegInstruction.cstr(self) + ");"
def astr(self):
isMaddsub = self.name().startswith("madd") | self.name().startswith("msub")
midPrefix = self.asmRegPrefix if isMaddsub else "w"
return (Instruction.astr(self)
+ self.reg[0].astr(self.asmRegPrefix)
+ ", " + self.reg[1].astr(midPrefix)
return (Instruction.astr(self)
+ self.reg[0].astr(self.asmRegPrefix)
+ ", " + self.reg[1].astr(midPrefix)
+ ", " + self.reg[2].astr(midPrefix)
+ ", " + self.reg[3].astr(self.asmRegPrefix))
@ -638,8 +644,8 @@ class ConditionalSelectOp(ThreeRegInstruction):
+ "Assembler::" + conditionCodes[self.cond] + ");")
def astr(self):
return (ThreeRegInstruction.astr(self)
+ ", " + conditionCodes[self.cond])
return (ThreeRegInstruction.astr(self)
+ ", " + conditionCodes[self.cond])
class LoadStoreExclusiveOp(InstructionWithModes):
@ -651,7 +657,7 @@ class LoadStoreExclusiveOp(InstructionWithModes):
result = self.aname() + '\t'
regs = list(self.regs)
index = regs.pop() # The last reg is the index register
prefix = ('x' if (self.mode == 'x')
prefix = ('x' if (self.mode == 'x')
& ((self.name().startswith("ld"))
| (self.name().startswith("stlr"))) # Ewww :-(
else 'w')
@ -698,17 +704,17 @@ class LoadStoreExclusiveOp(InstructionWithModes):
return self._name
class Address(object):
base_plus_unscaled_offset, pre, post, base_plus_reg, \
base_plus_scaled_offset, pcrel, post_reg, base_only = range(8)
kinds = ["base_plus_unscaled_offset", "pre", "post", "base_plus_reg",
kinds = ["base_plus_unscaled_offset", "pre", "post", "base_plus_reg",
"base_plus_scaled_offset", "pcrel", "post_reg", "base_only"]
extend_kinds = ["uxtw", "lsl", "sxtw", "sxtx"]
@classmethod
def kindToStr(cls, i):
return cls.kinds[i]
def generate(self, kind, shift_distance):
self.kind = kind
self.base = GeneralRegister().generate()
@ -738,7 +744,7 @@ class Address(object):
Address.pcrel: "",
Address.base_plus_reg: "Address(%s, %s, Address::%s(%s))" \
% (self.base, self.index, self.extend_kind, self.shift_distance),
Address.base_plus_scaled_offset:
Address.base_plus_scaled_offset:
"Address(%s, %s)" % (self.base, self.offset) } [self.kind]
if (self.kind == Address.pcrel):
result = ["__ pc()", "back", "forth"][self.offset]
@ -758,7 +764,7 @@ class Address(object):
Address.base_only: "[%s]" % (self.base.astr(prefix)),
Address.pcrel: "",
Address.base_plus_reg: "[%s, %s, %s #%s]" \
% (self.base.astr(prefix), self.index.astr(extend_prefix),
% (self.base.astr(prefix), self.index.astr(extend_prefix),
self.extend_kind, self.shift_distance),
Address.base_plus_scaled_offset: \
"[%s, %s]" \
@ -767,7 +773,7 @@ class Address(object):
if (self.kind == Address.pcrel):
result = [".", "back", "forth"][self.offset]
return result
class LoadStoreOp(InstructionWithModes):
def __init__(self, args):
@ -822,14 +828,14 @@ class LoadStoreOp(InstructionWithModes):
class LoadStorePairOp(InstructionWithModes):
numRegs = 2
def __init__(self, args):
name, self.asmname, self.kind, mode = args
InstructionWithModes.__init__(self, name, mode)
self.offset = random.randint(-1<<4, 1<<4-1) << 4
def generate(self):
self.reg = [OperandFactory.create(self.mode).generate()
self.reg = [OperandFactory.create(self.mode).generate()
for i in range(self.numRegs)]
self.base = OperandFactory.create('x').generate()
kindStr = Address.kindToStr(self.kind);
@ -846,8 +852,8 @@ class LoadStorePairOp(InstructionWithModes):
address = ["[%s, #%s]", "[%s, #%s]!", "[%s], #%s"][self.kind]
address = address % (self.base.astr('x'), self.offset)
result = "%s\t%s, %s, %s" \
% (self.asmname,
self.reg[0].astr(self.asmRegPrefix),
% (self.asmname,
self.reg[0].astr(self.asmRegPrefix),
self.reg[1].astr(self.asmRegPrefix), address)
return result
@ -875,7 +881,7 @@ class FloatInstruction(Instruction):
Instruction.__init__(self, name)
def generate(self):
self.reg = [OperandFactory.create(self.modes[i]).generate()
self.reg = [OperandFactory.create(self.modes[i]).generate()
for i in range(self.numRegs)]
return self
@ -884,7 +890,7 @@ class FloatInstruction(Instruction):
return (formatStr
% tuple([Instruction.cstr(self)] +
[str(self.reg[i]) for i in range(self.numRegs)])) # Yowza
def astr(self):
formatStr = "%s%s" + ''.join([", %s" for i in range(1, self.numRegs)])
return (formatStr
@ -985,7 +991,7 @@ class SVEReductionOp(Instruction):
moreReg +
[str(self.reg[2]) + self._width.astr()])
class LdStSIMDOp(Instruction):
class LdStNEONOp(Instruction):
def __init__(self, args):
self._name, self.regnum, self.arrangement, self.addresskind = args
@ -1004,7 +1010,7 @@ class LdStSIMDOp(Instruction):
return self
def cstr(self):
buf = super(LdStSIMDOp, self).cstr() + str(self._firstSIMDreg)
buf = super(LdStNEONOp, self).cstr() + str(self._firstSIMDreg)
current = self._firstSIMDreg
for cnt in range(1, self.regnum):
buf = '%s, %s' % (buf, current.nextReg())
@ -1022,6 +1028,57 @@ class LdStSIMDOp(Instruction):
def aname(self):
return self._name
class NEONReduceInstruction(Instruction):
def __init__(self, args):
self._name, self.insname, self.arrangement = args
def generate(self):
current = FloatRegister().generate()
self.dstSIMDreg = current
self.srcSIMDreg = current.nextReg()
return self
def cstr(self):
buf = Instruction.cstr(self) + str(self.dstSIMDreg)
buf = '%s, __ T%s, %s);' % (buf, self.arrangement, self.srcSIMDreg)
return buf
def astr(self):
buf = '%s\t%s' % (self.insname, self.dstSIMDreg.astr(self.arrangement[-1].lower()))
buf = '%s, %s.%s' % (buf, self.srcSIMDreg, self.arrangement)
return buf
def aname(self):
return self._name
class CommonNEONInstruction(Instruction):
def __init__(self, args):
self._name, self.insname, self.arrangement = args
def generate(self):
self._firstSIMDreg = FloatRegister().generate()
return self
def cstr(self):
buf = Instruction.cstr(self) + str(self._firstSIMDreg)
buf = '%s, __ T%s' % (buf, self.arrangement)
current = self._firstSIMDreg
for cnt in range(1, self.numRegs):
buf = '%s, %s' % (buf, current.nextReg())
current = current.nextReg()
return '%s);' % (buf)
def astr(self):
buf = '%s\t%s.%s' % (self.insname, self._firstSIMDreg, self.arrangement)
current = self._firstSIMDreg
for cnt in range(1, self.numRegs):
buf = '%s, %s.%s' % (buf, current.nextReg(), self.arrangement)
current = current.nextReg()
return buf
def aname(self):
return self._name
class SHA512SIMDOp(Instruction):
def generate(self):
@ -1097,6 +1154,12 @@ class FloatConvertOp(TwoRegFloatOp):
def cname(self):
return self._cname
class TwoRegNEONOp(CommonNEONInstruction):
numRegs = 2
class ThreeRegNEONOp(TwoRegNEONOp):
numRegs = 3
class SpecialCases(Instruction):
def __init__(self, data):
self._name = data[0]
@ -1129,6 +1192,7 @@ def generate(kind, names):
outfile = open("aarch64ops.s", "w")
# To minimize the changes of assembler test code
random.seed(0)
print "// BEGIN Generated code -- do not edit"
@ -1139,18 +1203,18 @@ print " __ bind(back);"
outfile.write("back:\n")
generate (ArithOp,
generate (ArithOp,
[ "add", "sub", "adds", "subs",
"addw", "subw", "addsw", "subsw",
"and", "orr", "eor", "ands",
"andw", "orrw", "eorw", "andsw",
"bic", "orn", "eon", "bics",
"andw", "orrw", "eorw", "andsw",
"bic", "orn", "eon", "bics",
"bicw", "ornw", "eonw", "bicsw" ])
generate (AddSubImmOp,
generate (AddSubImmOp,
[ "addw", "addsw", "subw", "subsw",
"add", "adds", "sub", "subs"])
generate (LogicalImmOp,
generate (LogicalImmOp,
[ "andw", "orrw", "eorw", "andsw",
"and", "orr", "eor", "ands"])
@ -1191,26 +1255,26 @@ for mode in 'xw':
["stxp", mode, 4], ["stlxp", mode, 4]])
for kind in range(6):
print "\n// " + Address.kindToStr(kind),
sys.stdout.write("\n// " + Address.kindToStr(kind))
if kind != Address.pcrel:
generate (LoadStoreOp,
[["str", "str", kind, "x"], ["str", "str", kind, "w"],
generate (LoadStoreOp,
[["str", "str", kind, "x"], ["str", "str", kind, "w"],
["str", "strb", kind, "b"], ["str", "strh", kind, "h"],
["ldr", "ldr", kind, "x"], ["ldr", "ldr", kind, "w"],
["ldr", "ldr", kind, "x"], ["ldr", "ldr", kind, "w"],
["ldr", "ldrb", kind, "b"], ["ldr", "ldrh", kind, "h"],
["ldrsb", "ldrsb", kind, "x"], ["ldrsh", "ldrsh", kind, "x"],
["ldrsb", "ldrsb", kind, "x"], ["ldrsh", "ldrsh", kind, "x"],
["ldrsh", "ldrsh", kind, "w"], ["ldrsw", "ldrsw", kind, "x"],
["ldr", "ldr", kind, "d"], ["ldr", "ldr", kind, "s"],
["str", "str", kind, "d"], ["str", "str", kind, "s"],
["ldr", "ldr", kind, "d"], ["ldr", "ldr", kind, "s"],
["str", "str", kind, "d"], ["str", "str", kind, "s"],
])
else:
generate (LoadStoreOp,
generate (LoadStoreOp,
[["ldr", "ldr", kind, "x"], ["ldr", "ldr", kind, "w"]])
for kind in (Address.base_plus_unscaled_offset, Address.pcrel, Address.base_plus_reg, \
Address.base_plus_scaled_offset):
generate (LoadStoreOp,
generate (LoadStoreOp,
[["prfm", "prfm\tPLDL1KEEP,", kind, "x"]])
generate(AddSubCarryOp, ["adcw", "adcsw", "sbcw", "sbcsw", "adc", "adcs", "sbc", "sbcs"])
@ -1219,32 +1283,32 @@ generate(AddSubExtendedOp, ["addw", "addsw", "sub", "subsw", "add", "adds", "sub
generate(ConditionalCompareOp, ["ccmnw", "ccmpw", "ccmn", "ccmp"])
generate(ConditionalCompareImmedOp, ["ccmnw", "ccmpw", "ccmn", "ccmp"])
generate(ConditionalSelectOp,
generate(ConditionalSelectOp,
["cselw", "csincw", "csinvw", "csnegw", "csel", "csinc", "csinv", "csneg"])
generate(TwoRegOp,
["rbitw", "rev16w", "revw", "clzw", "clsw", "rbit",
generate(TwoRegOp,
["rbitw", "rev16w", "revw", "clzw", "clsw", "rbit",
"rev16", "rev32", "rev", "clz", "cls"])
generate(ThreeRegOp,
["udivw", "sdivw", "lslvw", "lsrvw", "asrvw", "rorvw", "udiv", "sdiv",
generate(ThreeRegOp,
["udivw", "sdivw", "lslvw", "lsrvw", "asrvw", "rorvw", "udiv", "sdiv",
"lslv", "lsrv", "asrv", "rorv", "umulh", "smulh"])
generate(FourRegMulOp,
generate(FourRegMulOp,
["maddw", "msubw", "madd", "msub", "smaddl", "smsubl", "umaddl", "umsubl"])
generate(ThreeRegFloatOp,
[["fmuls", "sss"], ["fdivs", "sss"], ["fadds", "sss"], ["fsubs", "sss"],
generate(ThreeRegFloatOp,
[["fmuls", "sss"], ["fdivs", "sss"], ["fadds", "sss"], ["fsubs", "sss"],
["fmuls", "sss"],
["fmuld", "ddd"], ["fdivd", "ddd"], ["faddd", "ddd"], ["fsubd", "ddd"],
["fmuld", "ddd"], ["fdivd", "ddd"], ["faddd", "ddd"], ["fsubd", "ddd"],
["fmuld", "ddd"]])
generate(FourRegFloatOp,
[["fmadds", "ssss"], ["fmsubs", "ssss"], ["fnmadds", "ssss"], ["fnmadds", "ssss"],
generate(FourRegFloatOp,
[["fmadds", "ssss"], ["fmsubs", "ssss"], ["fnmadds", "ssss"], ["fnmadds", "ssss"],
["fmaddd", "dddd"], ["fmsubd", "dddd"], ["fnmaddd", "dddd"], ["fnmaddd", "dddd"],])
generate(TwoRegFloatOp,
[["fmovs", "ss"], ["fabss", "ss"], ["fnegs", "ss"], ["fsqrts", "ss"],
generate(TwoRegFloatOp,
[["fmovs", "ss"], ["fabss", "ss"], ["fnegs", "ss"], ["fsqrts", "ss"],
["fcvts", "ds"],
["fmovd", "dd"], ["fabsd", "dd"], ["fnegd", "dd"], ["fsqrtd", "dd"],
["fmovd", "dd"], ["fabsd", "dd"], ["fnegd", "dd"], ["fsqrtd", "dd"],
["fcvtd", "sd"],
])
@ -1255,18 +1319,18 @@ generate(FloatConvertOp, [["fcvtzsw", "fcvtzs", "ws"], ["fcvtzs", "fcvtzs", "xs"
["fmovs", "fmov", "ws"], ["fmovd", "fmov", "xd"],
["fmovs", "fmov", "sw"], ["fmovd", "fmov", "dx"]])
generate(TwoRegFloatOp, [["fcmps", "ss"], ["fcmpd", "dd"],
generate(TwoRegFloatOp, [["fcmps", "ss"], ["fcmpd", "dd"],
["fcmps", "sz"], ["fcmpd", "dz"]])
for kind in range(3):
generate(LoadStorePairOp, [["stp", "stp", kind, "w"], ["ldp", "ldp", kind, "w"],
["ldpsw", "ldpsw", kind, "x"],
["ldpsw", "ldpsw", kind, "x"],
["stp", "stp", kind, "x"], ["ldp", "ldp", kind, "x"]
])
generate(LoadStorePairOp, [["stnp", "stnp", 0, "w"], ["ldnp", "ldnp", 0, "w"],
["stnp", "stnp", 0, "x"], ["ldnp", "ldnp", 0, "x"]])
generate(LdStSIMDOp, [["ld1", 1, "8B", Address.base_only],
generate(LdStNEONOp, [["ld1", 1, "8B", Address.base_only],
["ld1", 2, "16B", Address.post],
["ld1", 3, "1D", Address.post_reg],
["ld1", 4, "8H", Address.post],
@ -1290,6 +1354,93 @@ generate(LdStSIMDOp, [["ld1", 1, "8B", Address.base_only],
["ld4r", 4, "2S", Address.post_reg],
])
generate(NEONReduceInstruction,
[["addv", "addv", "8B"], ["addv", "addv", "16B"],
["addv", "addv", "4H"], ["addv", "addv", "8H"],
["addv", "addv", "4S"],
["smaxv", "smaxv", "8B"], ["smaxv", "smaxv", "16B"],
["smaxv", "smaxv", "4H"], ["smaxv", "smaxv", "8H"],
["smaxv", "smaxv", "4S"], ["fmaxv", "fmaxv", "4S"],
["sminv", "sminv", "8B"], ["sminv", "sminv", "16B"],
["sminv", "sminv", "4H"], ["sminv", "sminv", "8H"],
["sminv", "sminv", "4S"], ["fminv", "fminv", "4S"],
])
generate(TwoRegNEONOp,
[["absr", "abs", "8B"], ["absr", "abs", "16B"],
["absr", "abs", "4H"], ["absr", "abs", "8H"],
["absr", "abs", "2S"], ["absr", "abs", "4S"],
["absr", "abs", "2D"],
["fabs", "fabs", "2S"], ["fabs", "fabs", "4S"],
["fabs", "fabs", "2D"],
["fneg", "fneg", "2S"], ["fneg", "fneg", "4S"],
["fneg", "fneg", "2D"],
["fsqrt", "fsqrt", "2S"], ["fsqrt", "fsqrt", "4S"],
["fsqrt", "fsqrt", "2D"],
["notr", "not", "8B"], ["notr", "not", "16B"],
])
generate(ThreeRegNEONOp,
[["andr", "and", "8B"], ["andr", "and", "16B"],
["orr", "orr", "8B"], ["orr", "orr", "16B"],
["eor", "eor", "8B"], ["eor", "eor", "16B"],
["addv", "add", "8B"], ["addv", "add", "16B"],
["addv", "add", "4H"], ["addv", "add", "8H"],
["addv", "add", "2S"], ["addv", "add", "4S"],
["addv", "add", "2D"],
["fadd", "fadd", "2S"], ["fadd", "fadd", "4S"],
["fadd", "fadd", "2D"],
["subv", "sub", "8B"], ["subv", "sub", "16B"],
["subv", "sub", "4H"], ["subv", "sub", "8H"],
["subv", "sub", "2S"], ["subv", "sub", "4S"],
["subv", "sub", "2D"],
["fsub", "fsub", "2S"], ["fsub", "fsub", "4S"],
["fsub", "fsub", "2D"],
["mulv", "mul", "8B"], ["mulv", "mul", "16B"],
["mulv", "mul", "4H"], ["mulv", "mul", "8H"],
["mulv", "mul", "2S"], ["mulv", "mul", "4S"],
["fmul", "fmul", "2S"], ["fmul", "fmul", "4S"],
["fmul", "fmul", "2D"],
["mlav", "mla", "4H"], ["mlav", "mla", "8H"],
["mlav", "mla", "2S"], ["mlav", "mla", "4S"],
["fmla", "fmla", "2S"], ["fmla", "fmla", "4S"],
["fmla", "fmla", "2D"],
["mlsv", "mls", "4H"], ["mlsv", "mls", "8H"],
["mlsv", "mls", "2S"], ["mlsv", "mls", "4S"],
["fmls", "fmls", "2S"], ["fmls", "fmls", "4S"],
["fmls", "fmls", "2D"],
["fdiv", "fdiv", "2S"], ["fdiv", "fdiv", "4S"],
["fdiv", "fdiv", "2D"],
["maxv", "smax", "8B"], ["maxv", "smax", "16B"],
["maxv", "smax", "4H"], ["maxv", "smax", "8H"],
["maxv", "smax", "2S"], ["maxv", "smax", "4S"],
["fmax", "fmax", "2S"], ["fmax", "fmax", "4S"],
["fmax", "fmax", "2D"],
["minv", "smin", "8B"], ["minv", "smin", "16B"],
["minv", "smin", "4H"], ["minv", "smin", "8H"],
["minv", "smin", "2S"], ["minv", "smin", "4S"],
["fmin", "fmin", "2S"], ["fmin", "fmin", "4S"],
["fmin", "fmin", "2D"],
["cmeq", "cmeq", "8B"], ["cmeq", "cmeq", "16B"],
["cmeq", "cmeq", "4H"], ["cmeq", "cmeq", "8H"],
["cmeq", "cmeq", "2S"], ["cmeq", "cmeq", "4S"],
["cmeq", "cmeq", "2D"],
["fcmeq", "fcmeq", "2S"], ["fcmeq", "fcmeq", "4S"],
["fcmeq", "fcmeq", "2D"],
["cmgt", "cmgt", "8B"], ["cmgt", "cmgt", "16B"],
["cmgt", "cmgt", "4H"], ["cmgt", "cmgt", "8H"],
["cmgt", "cmgt", "2S"], ["cmgt", "cmgt", "4S"],
["cmgt", "cmgt", "2D"],
["fcmgt", "fcmgt", "2S"], ["fcmgt", "fcmgt", "4S"],
["fcmgt", "fcmgt", "2D"],
["cmge", "cmge", "8B"], ["cmge", "cmge", "16B"],
["cmge", "cmge", "4H"], ["cmge", "cmge", "8H"],
["cmge", "cmge", "2S"], ["cmge", "cmge", "4S"],
["cmge", "cmge", "2D"],
["fcmge", "fcmge", "2S"], ["fcmge", "fcmge", "4S"],
["fcmge", "fcmge", "2D"],
])
generate(SHA512SIMDOp, ["sha512h", "sha512h2", "sha512su0", "sha512su1"])
generate(SpecialCases, [["ccmn", "__ ccmn(zr, zr, 3u, Assembler::LE);", "ccmn\txzr, xzr, #3, LE"],
@ -1344,9 +1495,9 @@ generate(SpecialCases, [["ccmn", "__ ccmn(zr, zr, 3u, Assembler::LE);",
])
print "\n// FloatImmediateOp"
for float in ("2.0", "2.125", "4.0", "4.25", "8.0", "8.5", "16.0", "17.0", "0.125",
"0.1328125", "0.25", "0.265625", "0.5", "0.53125", "1.0", "1.0625",
"-2.0", "-2.125", "-4.0", "-4.25", "-8.0", "-8.5", "-16.0", "-17.0",
for float in ("2.0", "2.125", "4.0", "4.25", "8.0", "8.5", "16.0", "17.0", "0.125",
"0.1328125", "0.25", "0.265625", "0.5", "0.53125", "1.0", "1.0625",
"-2.0", "-2.125", "-4.0", "-4.25", "-8.0", "-8.5", "-16.0", "-17.0",
"-0.125", "-0.1328125", "-0.25", "-0.265625", "-0.5", "-0.53125", "-1.0", "-1.0625"):
astr = "fmov d0, #" + float
cstr = "__ fmovd(v0, " + float + ");"
@ -1414,16 +1565,11 @@ outfile.write("forth:\n")
outfile.close()
import subprocess
import sys
# compile for sve with 8.1 and sha2 because of lse atomics and sha512 crypto extension.
subprocess.check_call([AARCH64_AS, "-march=armv8.1-a+sha2+sve", "aarch64ops.s", "-o", "aarch64ops.o"])
print
print "/*",
sys.stdout.flush()
subprocess.check_call([AARCH64_OBJDUMP, "-d", "aarch64ops.o"])
print "/*"
print "*/"
subprocess.check_call([AARCH64_OBJCOPY, "-O", "binary", "-j", ".text", "aarch64ops.o", "aarch64ops.bin"])
@ -1444,4 +1590,7 @@ while i < len(bytes):
print "\n };"
print "// END Generated code -- do not edit"
infile.close()
for f in ["aarch64ops.s", "aarch64ops.o", "aarch64ops.bin"]:
os.remove(f)

View file

@ -2410,6 +2410,12 @@ const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType
break;
case Op_MulVL:
return false;
case Op_VectorLoadShuffle:
case Op_VectorRearrange:
if (vlen < 4) {
return false;
}
break;
default:
break;
}
@ -2421,6 +2427,10 @@ const bool Matcher::has_predicated_vectors(void) {
return UseSVE > 0;
}
bool Matcher::supports_vector_variable_shifts(void) {
return true;
}
const int Matcher::float_pressure(int default_pressure_threshold) {
return default_pressure_threshold;
}
@ -2466,11 +2476,18 @@ const int Matcher::min_vector_size(const BasicType bt) {
if ((UseSVE > 0) && (MaxVectorSize >= 16)) {
// Currently vector length less than SVE vector register size is not supported.
return max_size;
} else {
// For the moment limit the vector size to 8 bytes with NEON.
} else { // NEON
// Limit the vector size to 8 bytes
int size = 8 / type2aelembytes(bt);
if (bt == T_BYTE) {
// To support vector api shuffle/rearrange.
size = 4;
} else if (bt == T_BOOLEAN) {
// To support vector api load/store mask.
size = 2;
}
if (size < 2) size = 2;
return size;
return MIN2(size,max_size);
}
}
@ -2489,6 +2506,9 @@ const uint Matcher::vector_ideal_reg(int len) {
return Op_VecA;
}
switch(len) {
// For 16-bit/32-bit mask vector, reuse VecD.
case 2:
case 4:
case 8: return Op_VecD;
case 16: return Op_VecX;
}
@ -3131,6 +3151,12 @@ encode %{
// END Non-volatile memory access
// Vector loads and stores
enc_class aarch64_enc_ldrvH(vecD dst, memory mem) %{
FloatRegister dst_reg = as_FloatRegister($dst$$reg);
loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::H,
$mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
%}
enc_class aarch64_enc_ldrvS(vecD dst, memory mem) %{
FloatRegister dst_reg = as_FloatRegister($dst$$reg);
loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::S,
@ -3149,6 +3175,12 @@ encode %{
$mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
%}
enc_class aarch64_enc_strvH(vecD src, memory mem) %{
FloatRegister src_reg = as_FloatRegister($src$$reg);
loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::H,
$mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
%}
enc_class aarch64_enc_strvS(vecD src, memory mem) %{
FloatRegister src_reg = as_FloatRegister($src$$reg);
loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::S,
@ -4252,6 +4284,26 @@ operand immI_31()
interface(CONST_INTER);
%}
operand immI_2()
%{
predicate(n->get_int() == 2);
match(ConI);
op_cost(0);
format %{ %}
interface(CONST_INTER);
%}
operand immI_4()
%{
predicate(n->get_int() == 4);
match(ConI);
op_cost(0);
format %{ %}
interface(CONST_INTER);
%}
operand immI_8()
%{
predicate(n->get_int() == 8);
@ -11222,6 +11274,7 @@ instruct rShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
%}
// BEGIN This section of the file is automatically generated. Do not edit --------------
// This section is generated from aarch64_ad.m4
// This pattern is automatically generated from aarch64_ad.m4
@ -16848,6 +16901,7 @@ instruct replicate2D(vecX dst, vRegD src)
instruct reduce_add2I(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, iRegINoSp tmp, iRegINoSp tmp2)
%{
predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
match(Set dst (AddReductionVI isrc vsrc));
ins_cost(INSN_COST);
effect(TEMP tmp, TEMP tmp2);
@ -16867,6 +16921,7 @@ instruct reduce_add2I(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, iRegINoSp tmp,
instruct reduce_add4I(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, vecX vtmp, iRegINoSp itmp)
%{
predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
match(Set dst (AddReductionVI isrc vsrc));
ins_cost(INSN_COST);
effect(TEMP vtmp, TEMP itmp);
@ -16885,6 +16940,7 @@ instruct reduce_add4I(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, vecX vtmp, iReg
instruct reduce_mul2I(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, iRegINoSp tmp)
%{
predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
match(Set dst (MulReductionVI isrc vsrc));
ins_cost(INSN_COST);
effect(TEMP tmp, TEMP dst);
@ -16904,6 +16960,7 @@ instruct reduce_mul2I(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, iRegINoSp tmp)
instruct reduce_mul4I(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, vecX vtmp, iRegINoSp itmp)
%{
predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
match(Set dst (MulReductionVI isrc vsrc));
ins_cost(INSN_COST);
effect(TEMP vtmp, TEMP itmp, TEMP dst);
@ -17985,8 +18042,7 @@ instruct vabs2F(vecD dst, vecD src)
ins_cost(INSN_COST * 3);
format %{ "fabs $dst,$src\t# vector (2S)" %}
ins_encode %{
__ fabs(as_FloatRegister($dst$$reg), __ T2S,
as_FloatRegister($src$$reg));
__ fabs(as_FloatRegister($dst$$reg), __ T2S, as_FloatRegister($src$$reg));
%}
ins_pipe(vunop_fp64);
%}
@ -17998,8 +18054,7 @@ instruct vabs4F(vecX dst, vecX src)
ins_cost(INSN_COST * 3);
format %{ "fabs $dst,$src\t# vector (4S)" %}
ins_encode %{
__ fabs(as_FloatRegister($dst$$reg), __ T4S,
as_FloatRegister($src$$reg));
__ fabs(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($src$$reg));
%}
ins_pipe(vunop_fp128);
%}
@ -18011,8 +18066,7 @@ instruct vabs2D(vecX dst, vecX src)
ins_cost(INSN_COST * 3);
format %{ "fabs $dst,$src\t# vector (2D)" %}
ins_encode %{
__ fabs(as_FloatRegister($dst$$reg), __ T2D,
as_FloatRegister($src$$reg));
__ fabs(as_FloatRegister($dst$$reg), __ T2D, as_FloatRegister($src$$reg));
%}
ins_pipe(vunop_fp128);
%}
@ -18153,7 +18207,8 @@ instruct vxor16B(vecX dst, vecX src1, vecX src2)
// ------------------------------ Shift ---------------------------------------
instruct vshiftcnt8B(vecD dst, iRegIorL2I cnt) %{
predicate(n->as_Vector()->length_in_bytes() == 8);
predicate(n->as_Vector()->length_in_bytes() == 4 ||
n->as_Vector()->length_in_bytes() == 8);
match(Set dst (LShiftCntV cnt));
match(Set dst (RShiftCntV cnt));
format %{ "dup $dst, $cnt\t# shift count vector (8B)" %}
@ -18977,12 +19032,12 @@ instruct vpopcount4I(vecX dst, vecX src) %{
"uaddlp $dst, $dst\t# vector (8H)"
%}
ins_encode %{
__ cnt(as_FloatRegister($dst$$reg), __ T16B,
as_FloatRegister($src$$reg));
__ uaddlp(as_FloatRegister($dst$$reg), __ T16B,
as_FloatRegister($dst$$reg));
__ uaddlp(as_FloatRegister($dst$$reg), __ T8H,
as_FloatRegister($dst$$reg));
__ cnt(as_FloatRegister($dst$$reg), __ T16B,
as_FloatRegister($src$$reg));
__ uaddlp(as_FloatRegister($dst$$reg), __ T16B,
as_FloatRegister($dst$$reg));
__ uaddlp(as_FloatRegister($dst$$reg), __ T8H,
as_FloatRegister($dst$$reg));
%}
ins_pipe(pipe_class_default);
%}
@ -18996,12 +19051,12 @@ instruct vpopcount2I(vecD dst, vecD src) %{
"uaddlp $dst, $dst\t# vector (4H)"
%}
ins_encode %{
__ cnt(as_FloatRegister($dst$$reg), __ T8B,
as_FloatRegister($src$$reg));
__ uaddlp(as_FloatRegister($dst$$reg), __ T8B,
as_FloatRegister($dst$$reg));
__ uaddlp(as_FloatRegister($dst$$reg), __ T4H,
as_FloatRegister($dst$$reg));
__ cnt(as_FloatRegister($dst$$reg), __ T8B,
as_FloatRegister($src$$reg));
__ uaddlp(as_FloatRegister($dst$$reg), __ T8B,
as_FloatRegister($dst$$reg));
__ uaddlp(as_FloatRegister($dst$$reg), __ T4H,
as_FloatRegister($dst$$reg));
%}
ins_pipe(pipe_class_default);
%}

View file

@ -1,4 +1,4 @@
dnl Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
dnl Copyright (c) 2019, 2020, Red Hat Inc. All rights reserved.
dnl DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
dnl
dnl This code is free software; you can redistribute it and/or modify it
@ -19,10 +19,14 @@ dnl Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
dnl or visit www.oracle.com if you need additional information or have any
dnl questions.
dnl
dnl
dnl Process this file with m4 aarch64_ad.m4 to generate the arithmetic
dnl and shift patterns patterns used in aarch64.ad.
dnl
dnl Process this file with m4 aarch64_ad.m4 to generate instructions used in
dnl aarch64.ad:
dnl 1. the arithmetic
dnl 2. shift patterns
dnl
// BEGIN This section of the file is automatically generated. Do not edit --------------
// This section is generated from aarch64_ad.m4
dnl
define(`ORL2I', `ifelse($1,I,orL2I)')
dnl

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -1371,6 +1371,21 @@ public:
#undef INSN
#define INSN(NAME, size, opc) \
void NAME(FloatRegister Rt, Register Rn) { \
starti; \
f(size, 31, 30), f(0b111100, 29, 24), f(opc, 23, 22), f(0, 21); \
f(0, 20, 12), f(0b01, 11, 10); \
rf(Rn, 5), rf((Register)Rt, 0); \
}
INSN(ldrs, 0b10, 0b01);
INSN(ldrd, 0b11, 0b01);
INSN(ldrq, 0b00, 0b11);
#undef INSN
#define INSN(NAME, opc, V) \
void NAME(address dest, prfop op = PLDL1KEEP) { \
int64_t offset = (dest - pc()) >> 2; \
@ -1508,6 +1523,21 @@ public:
#undef INSN
/* SIMD extensions
*
* We just use FloatRegister in the following. They are exactly the same
* as SIMD registers.
*/
public:
enum SIMD_Arrangement {
T8B, T16B, T4H, T8H, T2S, T4S, T1D, T2D, T1Q
};
enum SIMD_RegVariant {
B, H, S, D, Q
};
enum shift_kind { LSL, LSR, ASR, ROR };
void op_shifted_reg(unsigned decode,
@ -1887,6 +1917,30 @@ public:
i_fmovs(Vd, Vn);
}
private:
void _fcvt_narrow_extend(FloatRegister Vd, SIMD_Arrangement Ta,
FloatRegister Vn, SIMD_Arrangement Tb, bool do_extend) {
assert((do_extend && (Tb >> 1) + 1 == (Ta >> 1))
|| (!do_extend && (Ta >> 1) + 1 == (Tb >> 1)), "Incompatible arrangement");
starti;
int op30 = (do_extend ? Tb : Ta) & 1;
int op22 = ((do_extend ? Ta : Tb) >> 1) & 1;
f(0, 31), f(op30, 30), f(0b0011100, 29, 23), f(op22, 22);
f(0b100001011, 21, 13), f(do_extend ? 1 : 0, 12), f(0b10, 11, 10);
rf(Vn, 5), rf(Vd, 0);
}
public:
void fcvtl(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, SIMD_Arrangement Tb) {
assert(Tb == T4H || Tb == T8H|| Tb == T2S || Tb == T4S, "invalid arrangement");
_fcvt_narrow_extend(Vd, Ta, Vn, Tb, true);
}
void fcvtn(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, SIMD_Arrangement Tb) {
assert(Ta == T4H || Ta == T8H|| Ta == T2S || Ta == T4S, "invalid arrangement");
_fcvt_narrow_extend(Vd, Ta, Vn, Tb, false);
}
#undef INSN
// Floating-point data-processing (2 source)
@ -2023,6 +2077,43 @@ public:
#undef INSN
enum sign_kind { SIGNED, UNSIGNED };
private:
void _xcvtf_scalar_integer(sign_kind sign, unsigned sz,
FloatRegister Rd, FloatRegister Rn) {
starti;
f(0b01, 31, 30), f(sign == SIGNED ? 0 : 1, 29);
f(0b111100, 27, 23), f((sz >> 1) & 1, 22), f(0b100001110110, 21, 10);
rf(Rn, 5), rf(Rd, 0);
}
public:
#define INSN(NAME, sign, sz) \
void NAME(FloatRegister Rd, FloatRegister Rn) { \
_xcvtf_scalar_integer(sign, sz, Rd, Rn); \
}
INSN(scvtfs, SIGNED, 0);
INSN(scvtfd, SIGNED, 1);
#undef INSN
private:
void _xcvtf_vector_integer(sign_kind sign, SIMD_Arrangement T,
FloatRegister Rd, FloatRegister Rn) {
assert(T == T2S || T == T4S || T == T2D, "invalid arrangement");
starti;
f(0, 31), f(T & 1, 30), f(sign == SIGNED ? 0 : 1, 29);
f(0b011100, 28, 23), f((T >> 1) & 1, 22), f(0b100001110110, 21, 10);
rf(Rn, 5), rf(Rd, 0);
}
public:
void scvtfv(SIMD_Arrangement T, FloatRegister Rd, FloatRegister Rn) {
_xcvtf_vector_integer(SIGNED, T, Rd, Rn);
}
// Floating-point compare
void float_compare(unsigned op31, unsigned type,
unsigned op, unsigned op2,
@ -2152,21 +2243,6 @@ public:
INSN(frintzd, 0b01, 0b011);
#undef INSN
/* SIMD extensions
*
* We just use FloatRegister in the following. They are exactly the same
* as SIMD registers.
*/
public:
enum SIMD_Arrangement {
T8B, T16B, T4H, T8H, T2S, T4S, T1D, T2D, T1Q
};
enum SIMD_RegVariant {
B, H, S, D, Q
};
private:
static short SIMD_Size_in_bytes[];
@ -2324,6 +2400,11 @@ public:
INSN(smullv, 0, 0b110000, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
INSN(umullv, 1, 0b110000, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
INSN(umlalv, 1, 0b100000, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
INSN(maxv, 0, 0b011001, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
INSN(minv, 0, 0b011011, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
INSN(cmeq, 1, 0b100011, true); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
INSN(cmgt, 0, 0b001101, true); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
INSN(cmge, 0, 0b001111, true); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
#undef INSN
@ -2343,6 +2424,8 @@ public:
INSN(negr, 1, 0b100000101110, 3); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
INSN(notr, 1, 0b100000010110, 0); // accepted arrangements: T8B, T16B
INSN(addv, 0, 0b110001101110, 1); // accepted arrangements: T8B, T16B, T4H, T8H, T4S
INSN(smaxv, 0, 0b110000101010, 1); // accepted arrangements: T8B, T16B, T4H, T8H, T4S
INSN(sminv, 0, 0b110001101010, 1); // accepted arrangements: T8B, T16B, T4H, T8H, T4S
INSN(cls, 0, 0b100000010010, 2); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
INSN(clz, 1, 0b100000010010, 2); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
INSN(cnt, 0, 0b100000010110, 0); // accepted arrangements: T8B, T16B
@ -2407,6 +2490,9 @@ public:
INSN(fmls, 0, 1, 0b110011);
INSN(fmax, 0, 0, 0b111101);
INSN(fmin, 0, 1, 0b111101);
INSN(fcmeq, 0, 0, 0b111001);
INSN(fcmgt, 1, 1, 0b111001);
INSN(fcmge, 1, 0, 0b111001);
#undef INSN
@ -2506,10 +2592,20 @@ public:
rf(Vn, 5), rf(Vd, 0);
}
// (double) {a, b} -> (a + b)
void faddpd(FloatRegister Vd, FloatRegister Vn) {
// (long) {a, b} -> (a + b)
void addpd(FloatRegister Vd, FloatRegister Vn) {
starti;
f(0b0111111001110000110110, 31, 10);
f(0b0101111011110001101110, 31, 10);
rf(Vn, 5), rf(Vd, 0);
}
// (Floating-point) {a, b} -> (a + b)
void faddp(FloatRegister Vd, FloatRegister Vn, SIMD_RegVariant type) {
assert(type == D || type == S, "Wrong type for faddp");
starti;
f(0b011111100, 31, 23);
f(type == D ? 1 : 0, 22);
f(0b110000110110, 21, 10);
rf(Vn, 5), rf(Vd, 0);
}
@ -2576,29 +2672,48 @@ public:
#undef INSN
private:
void _ushll(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, SIMD_Arrangement Tb, int shift) {
void _xshll(sign_kind sign, FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, SIMD_Arrangement Tb, int shift) {
starti;
/* The encodings for the immh:immb fields (bits 22:16) are
* 0001 xxx 8H, 8B/16b shift = xxx
* 0001 xxx 8H, 8B/16B shift = xxx
* 001x xxx 4S, 4H/8H shift = xxxx
* 01xx xxx 2D, 2S/4S shift = xxxxx
* 1xxx xxx RESERVED
*/
assert((Tb >> 1) + 1 == (Ta >> 1), "Incompatible arrangement");
assert((1 << ((Tb>>1)+3)) > shift, "Invalid shift value");
f(0, 31), f(Tb & 1, 30), f(0b1011110, 29, 23), f((1 << ((Tb>>1)+3))|shift, 22, 16);
f(0, 31), f(Tb & 1, 30), f(sign == SIGNED ? 0 : 1, 29), f(0b011110, 28, 23);
f((1 << ((Tb>>1)+3))|shift, 22, 16);
f(0b101001, 15, 10), rf(Vn, 5), rf(Vd, 0);
}
public:
void ushll(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, SIMD_Arrangement Tb, int shift) {
assert(Tb == T8B || Tb == T4H || Tb == T2S, "invalid arrangement");
_ushll(Vd, Ta, Vn, Tb, shift);
_xshll(UNSIGNED, Vd, Ta, Vn, Tb, shift);
}
void ushll2(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, SIMD_Arrangement Tb, int shift) {
assert(Tb == T16B || Tb == T8H || Tb == T4S, "invalid arrangement");
_ushll(Vd, Ta, Vn, Tb, shift);
_xshll(UNSIGNED, Vd, Ta, Vn, Tb, shift);
}
void uxtl(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, SIMD_Arrangement Tb) {
ushll(Vd, Ta, Vn, Tb, 0);
}
void sshll(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, SIMD_Arrangement Tb, int shift) {
assert(Tb == T8B || Tb == T4H || Tb == T2S, "invalid arrangement");
_xshll(SIGNED, Vd, Ta, Vn, Tb, shift);
}
void sshll2(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, SIMD_Arrangement Tb, int shift) {
assert(Tb == T16B || Tb == T8H || Tb == T4S, "invalid arrangement");
_xshll(SIGNED, Vd, Ta, Vn, Tb, shift);
}
void sxtl(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, SIMD_Arrangement Tb) {
sshll(Vd, Ta, Vn, Tb, 0);
}
// Move from general purpose register
@ -2649,6 +2764,15 @@ public:
f(0b100001010010, 21, 10), rf(Vn, 5), rf(Vd, 0);
}
void xtn(FloatRegister Vd, SIMD_Arrangement Tb, FloatRegister Vn, SIMD_Arrangement Ta) {
starti;
int size_b = (int)Tb >> 1;
int size_a = (int)Ta >> 1;
assert(size_b < 3 && size_b == size_a - 1, "Invalid size specifier");
f(0, 31), f(Tb & 1, 30), f(0b001110, 29, 24), f(size_b, 23, 22);
f(0b100001001010, 21, 10), rf(Vn, 5), rf(Vd, 0);
}
void dup(FloatRegister Vd, SIMD_Arrangement T, Register Xs)
{
starti;

View file

@ -611,6 +611,16 @@ class StubGenerator: public StubCodeGenerator {
void array_overlap_test(Label& L_no_overlap, Address::sxtw sf) { __ b(L_no_overlap); }
// Generate indices for iota vector.
address generate_iota_indices(const char *stub_name) {
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", stub_name);
address start = __ pc();
__ emit_data64(0x0706050403020100, relocInfo::none);
__ emit_data64(0x0F0E0D0C0B0A0908, relocInfo::none);
return start;
}
// The inner part of zero_words(). This is the bulk operation,
// zeroing words in blocks, possibly using DC ZVA to do it. The
// caller is responsible for zeroing the last few words.
@ -5958,6 +5968,8 @@ class StubGenerator: public StubCodeGenerator {
SharedRuntime::
throw_NullPointerException_at_call));
StubRoutines::aarch64::_vector_iota_indices = generate_iota_indices("iota_indices");
// arraycopy stubs used by compilers
generate_arraycopy_stubs();

View file

@ -40,6 +40,7 @@ address StubRoutines::aarch64::_f2i_fixup = NULL;
address StubRoutines::aarch64::_f2l_fixup = NULL;
address StubRoutines::aarch64::_d2i_fixup = NULL;
address StubRoutines::aarch64::_d2l_fixup = NULL;
address StubRoutines::aarch64::_vector_iota_indices = NULL;
address StubRoutines::aarch64::_float_sign_mask = NULL;
address StubRoutines::aarch64::_float_sign_flip = NULL;
address StubRoutines::aarch64::_double_sign_mask = NULL;

View file

@ -51,6 +51,7 @@ class aarch64 {
static address _d2i_fixup;
static address _d2l_fixup;
static address _vector_iota_indices;
static address _float_sign_mask;
static address _float_sign_flip;
static address _double_sign_mask;
@ -106,6 +107,10 @@ class aarch64 {
return _d2l_fixup;
}
static address vector_iota_indices() {
return _vector_iota_indices;
}
static address float_sign_mask()
{
return _float_sign_mask;

View file

@ -993,6 +993,10 @@ const bool Matcher::has_predicated_vectors(void) {
return false;
}
bool Matcher::supports_vector_variable_shifts(void) {
return VM_Version::has_simd();
}
const int Matcher::float_pressure(int default_pressure_threshold) {
return default_pressure_threshold;
}

View file

@ -2161,6 +2161,10 @@ const bool Matcher::has_predicated_vectors(void) {
return false;
}
bool Matcher::supports_vector_variable_shifts(void) {
return false; // not supported
}
const int Matcher::float_pressure(int default_pressure_threshold) {
return default_pressure_threshold;
}

View file

@ -1573,6 +1573,10 @@ const bool Matcher::has_predicated_vectors(void) {
return false;
}
bool Matcher::supports_vector_variable_shifts(void) {
return false; // not supported
}
const int Matcher::float_pressure(int default_pressure_threshold) {
return default_pressure_threshold;
}

File diff suppressed because it is too large Load diff

View file

@ -588,6 +588,7 @@ class Assembler : public AbstractAssembler {
#endif
};
// Comparison predicates for integral types & FP types when using SSE
enum ComparisonPredicate {
eq = 0,
lt = 1,
@ -599,6 +600,51 @@ class Assembler : public AbstractAssembler {
_true = 7
};
// Comparison predicates for FP types when using AVX
// O means ordered. U is unordered. When using ordered, any NaN comparison is false. Otherwise, it is true.
// S means signaling. Q means non-signaling. When signaling is true, instruction signals #IA on NaN.
enum ComparisonPredicateFP {
EQ_OQ = 0,
LT_OS = 1,
LE_OS = 2,
UNORD_Q = 3,
NEQ_UQ = 4,
NLT_US = 5,
NLE_US = 6,
ORD_Q = 7,
EQ_UQ = 8,
NGE_US = 9,
NGT_US = 0xA,
FALSE_OQ = 0XB,
NEQ_OQ = 0xC,
GE_OS = 0xD,
GT_OS = 0xE,
TRUE_UQ = 0xF,
EQ_OS = 0x10,
LT_OQ = 0x11,
LE_OQ = 0x12,
UNORD_S = 0x13,
NEQ_US = 0x14,
NLT_UQ = 0x15,
NLE_UQ = 0x16,
ORD_S = 0x17,
EQ_US = 0x18,
NGE_UQ = 0x19,
NGT_UQ = 0x1A,
FALSE_OS = 0x1B,
NEQ_OS = 0x1C,
GE_OQ = 0x1D,
GT_OQ = 0x1E,
TRUE_US =0x1F
};
enum Width {
B = 0,
W = 1,
D = 2,
Q = 3
};
//---< calculate length of instruction >---
// As instruction size can't be found out easily on x86/x64,
// we just use '4' for len and maxlen.
@ -918,6 +964,7 @@ private:
void adcq(Register dst, Register src);
void addb(Address dst, int imm8);
void addw(Register dst, Register src);
void addw(Address dst, int imm16);
void addl(Address dst, int32_t imm32);
@ -968,6 +1015,8 @@ private:
void vaesdec(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vaesdeclast(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void andw(Register dst, Register src);
void andl(Address dst, int32_t imm32);
void andl(Register dst, int32_t imm32);
void andl(Register dst, Address src);
@ -1093,9 +1142,11 @@ private:
// Convert Packed Signed Doubleword Integers to Packed Double-Precision Floating-Point Value
void cvtdq2pd(XMMRegister dst, XMMRegister src);
void vcvtdq2pd(XMMRegister dst, XMMRegister src, int vector_len);
// Convert Packed Signed Doubleword Integers to Packed Single-Precision Floating-Point Value
void cvtdq2ps(XMMRegister dst, XMMRegister src);
void vcvtdq2ps(XMMRegister dst, XMMRegister src, int vector_len);
// Convert Scalar Single-Precision Floating-Point Value to Scalar Double-Precision Floating-Point Value
void cvtss2sd(XMMRegister dst, XMMRegister src);
@ -1111,8 +1162,25 @@ private:
void cvttss2sil(Register dst, XMMRegister src);
void cvttss2siq(Register dst, XMMRegister src);
// Convert vector double to int
void cvttpd2dq(XMMRegister dst, XMMRegister src);
// Convert vector float and double
void vcvtps2pd(XMMRegister dst, XMMRegister src, int vector_len);
void vcvtpd2ps(XMMRegister dst, XMMRegister src, int vector_len);
// Convert vector long to vector FP
void evcvtqq2ps(XMMRegister dst, XMMRegister src, int vector_len);
void evcvtqq2pd(XMMRegister dst, XMMRegister src, int vector_len);
// Evex casts with truncation
void evpmovwb(XMMRegister dst, XMMRegister src, int vector_len);
void evpmovdw(XMMRegister dst, XMMRegister src, int vector_len);
void evpmovdb(XMMRegister dst, XMMRegister src, int vector_len);
void evpmovqd(XMMRegister dst, XMMRegister src, int vector_len);
void evpmovqb(XMMRegister dst, XMMRegister src, int vector_len);
void evpmovqw(XMMRegister dst, XMMRegister src, int vector_len);
//Abs of packed Integer values
void pabsb(XMMRegister dst, XMMRegister src);
void pabsw(XMMRegister dst, XMMRegister src);
@ -1472,20 +1540,26 @@ private:
void vmovdqu(XMMRegister dst, XMMRegister src);
// Move Unaligned 512bit Vector
void evmovdqub(Address dst, XMMRegister src, int vector_len);
void evmovdqub(XMMRegister dst, Address src, int vector_len);
void evmovdqub(XMMRegister dst, XMMRegister src, int vector_len);
void evmovdqub(XMMRegister dst, KRegister mask, Address src, int vector_len);
void evmovdquw(Address dst, XMMRegister src, int vector_len);
void evmovdquw(Address dst, KRegister mask, XMMRegister src, int vector_len);
void evmovdquw(XMMRegister dst, Address src, int vector_len);
void evmovdquw(XMMRegister dst, KRegister mask, Address src, int vector_len);
void evmovdqub(Address dst, XMMRegister src, bool merge, int vector_len);
void evmovdqub(XMMRegister dst, Address src, bool merge, int vector_len);
void evmovdqub(XMMRegister dst, XMMRegister src, bool merge, int vector_len);
void evmovdqub(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len);
void evmovdquw(Address dst, XMMRegister src, bool merge, int vector_len);
void evmovdquw(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
void evmovdquw(XMMRegister dst, Address src, bool merge, int vector_len);
void evmovdquw(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len);
void evmovdqul(Address dst, XMMRegister src, int vector_len);
void evmovdqul(XMMRegister dst, Address src, int vector_len);
void evmovdqul(XMMRegister dst, XMMRegister src, int vector_len);
void evmovdqul(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
void evmovdqul(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len);
void evmovdqul(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
void evmovdquq(Address dst, XMMRegister src, int vector_len);
void evmovdquq(XMMRegister dst, Address src, int vector_len);
void evmovdquq(XMMRegister dst, XMMRegister src, int vector_len);
void evmovdquq(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
void evmovdquq(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len);
void evmovdquq(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
// Generic move instructions.
void evmovdqu(Address dst, KRegister mask, XMMRegister src, int vector_len, int type);
@ -1521,6 +1595,9 @@ private:
// Move Quadword
void movq(Address dst, XMMRegister src);
void movq(XMMRegister dst, Address src);
void movq(XMMRegister dst, XMMRegister src);
void movq(Register dst, XMMRegister src);
void movq(XMMRegister dst, Register src);
void movsbl(Register dst, Address src);
void movsbl(Register dst, Register src);
@ -1601,6 +1678,8 @@ private:
void btrq(Address dst, int imm8);
#endif
void orw(Register dst, Register src);
void orl(Address dst, int32_t imm32);
void orl(Register dst, int32_t imm32);
void orl(Register dst, Address src);
@ -1614,17 +1693,32 @@ private:
void orq(Register dst, Address src);
void orq(Register dst, Register src);
// Pack with signed saturation
void packsswb(XMMRegister dst, XMMRegister src);
void vpacksswb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void packssdw(XMMRegister dst, XMMRegister src);
void vpackssdw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
// Pack with unsigned saturation
void packuswb(XMMRegister dst, XMMRegister src);
void packuswb(XMMRegister dst, Address src);
void packusdw(XMMRegister dst, XMMRegister src);
void vpackuswb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vpackusdw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
// Pemutation of 64bit words
// Permutations
void vpermq(XMMRegister dst, XMMRegister src, int imm8, int vector_len);
void vpermq(XMMRegister dst, XMMRegister src, int imm8);
void vpermq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vpermb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vpermw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vpermd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
void vpermd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vperm2i128(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8);
void vperm2f128(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8);
void vpermilps(XMMRegister dst, XMMRegister src, int imm8, int vector_len);
void vpermilpd(XMMRegister dst, XMMRegister src, int imm8, int vector_len);
void vpermpd(XMMRegister dst, XMMRegister src, int imm8, int vector_len);
void evpermi2q(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void pause();
@ -1637,11 +1731,14 @@ private:
void pcmpestri(XMMRegister xmm1, Address src, int imm8);
void pcmpeqb(XMMRegister dst, XMMRegister src);
void vpcmpCCbwd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cond_encoding, int vector_len);
void vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void evpcmpeqb(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len);
void evpcmpeqb(KRegister kdst, XMMRegister nds, Address src, int vector_len);
void evpcmpeqb(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len);
void vpcmpgtb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void evpcmpgtb(KRegister kdst, XMMRegister nds, Address src, int vector_len);
void evpcmpgtb(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len);
@ -1654,16 +1751,22 @@ private:
void evpcmpeqw(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len);
void evpcmpeqw(KRegister kdst, XMMRegister nds, Address src, int vector_len);
void vpcmpgtw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void pcmpeqd(XMMRegister dst, XMMRegister src);
void vpcmpeqd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void evpcmpeqd(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len);
void evpcmpeqd(KRegister kdst, XMMRegister nds, Address src, int vector_len);
void evpcmpeqd(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src, int vector_len);
void evpcmpeqd(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len);
void pcmpeqq(XMMRegister dst, XMMRegister src);
void vpcmpCCq(XMMRegister dst, XMMRegister nds, XMMRegister src, int cond_encoding, int vector_len);
void vpcmpeqq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void evpcmpeqq(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len);
void evpcmpeqq(KRegister kdst, XMMRegister nds, Address src, int vector_len);
void pcmpgtq(XMMRegister dst, XMMRegister src);
void vpcmpgtq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void pmovmskb(Register dst, XMMRegister src);
void vpmovmskb(Register dst, XMMRegister src);
@ -1672,6 +1775,7 @@ private:
void pextrq(Register dst, XMMRegister src, int imm8);
void pextrd(Address dst, XMMRegister src, int imm8);
void pextrq(Address dst, XMMRegister src, int imm8);
void pextrb(Register dst, XMMRegister src, int imm8);
void pextrb(Address dst, XMMRegister src, int imm8);
// SSE 2 extract
void pextrw(Register dst, XMMRegister src, int imm8);
@ -1680,21 +1784,46 @@ private:
// SSE 4.1 insert
void pinsrd(XMMRegister dst, Register src, int imm8);
void pinsrq(XMMRegister dst, Register src, int imm8);
void pinsrb(XMMRegister dst, Register src, int imm8);
void pinsrd(XMMRegister dst, Address src, int imm8);
void pinsrq(XMMRegister dst, Address src, int imm8);
void pinsrb(XMMRegister dst, Address src, int imm8);
void insertps(XMMRegister dst, XMMRegister src, int imm8);
// SSE 2 insert
void pinsrw(XMMRegister dst, Register src, int imm8);
void pinsrw(XMMRegister dst, Address src, int imm8);
// SSE4.1 packed move
// AVX insert
void vpinsrd(XMMRegister dst, XMMRegister nds, Register src, int imm8);
void vpinsrb(XMMRegister dst, XMMRegister nds, Register src, int imm8);
void vpinsrq(XMMRegister dst, XMMRegister nds, Register src, int imm8);
void vpinsrw(XMMRegister dst, XMMRegister nds, Register src, int imm8);
void vinsertps(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8);
// Zero extend moves
void pmovzxbw(XMMRegister dst, XMMRegister src);
void pmovzxbw(XMMRegister dst, Address src);
void pmovzxbd(XMMRegister dst, XMMRegister src);
void vpmovzxbw( XMMRegister dst, Address src, int vector_len);
void pmovzxdq(XMMRegister dst, XMMRegister src);
void vpmovzxbw(XMMRegister dst, XMMRegister src, int vector_len);
void vpmovzxdq(XMMRegister dst, XMMRegister src, int vector_len);
void vpmovzxbd(XMMRegister dst, XMMRegister src, int vector_len);
void vpmovzxbq(XMMRegister dst, XMMRegister src, int vector_len);
void evpmovzxbw(XMMRegister dst, KRegister mask, Address src, int vector_len);
// Sign extend moves
void pmovsxbd(XMMRegister dst, XMMRegister src);
void pmovsxbq(XMMRegister dst, XMMRegister src);
void pmovsxbw(XMMRegister dst, XMMRegister src);
void pmovsxwd(XMMRegister dst, XMMRegister src);
void vpmovsxbd(XMMRegister dst, XMMRegister src, int vector_len);
void vpmovsxbq(XMMRegister dst, XMMRegister src, int vector_len);
void vpmovsxbw(XMMRegister dst, XMMRegister src, int vector_len);
void vpmovsxwd(XMMRegister dst, XMMRegister src, int vector_len);
void vpmovsxwq(XMMRegister dst, XMMRegister src, int vector_len);
void vpmovsxdq(XMMRegister dst, XMMRegister src, int vector_len);
void evpmovwb(Address dst, XMMRegister src, int vector_len);
void evpmovwb(Address dst, KRegister mask, XMMRegister src, int vector_len);
@ -1702,10 +1831,6 @@ private:
void evpmovdb(Address dst, XMMRegister src, int vector_len);
// Sign extend moves
void pmovsxbw(XMMRegister dst, XMMRegister src);
void vpmovsxbw(XMMRegister dst, XMMRegister src, int vector_len);
// Multiply add
void pmaddwd(XMMRegister dst, XMMRegister src);
void vpmaddwd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
@ -1749,10 +1874,17 @@ private:
void pshufd(XMMRegister dst, Address src, int mode);
void vpshufd(XMMRegister dst, XMMRegister src, int mode, int vector_len);
// Shuffle Packed Low Words
// Shuffle Packed High/Low Words
void pshufhw(XMMRegister dst, XMMRegister src, int mode);
void pshuflw(XMMRegister dst, XMMRegister src, int mode);
void pshuflw(XMMRegister dst, Address src, int mode);
//shuffle floats and doubles
void pshufps(XMMRegister, XMMRegister, int);
void pshufpd(XMMRegister, XMMRegister, int);
void vpshufps(XMMRegister, XMMRegister, XMMRegister, int, int);
void vpshufpd(XMMRegister, XMMRegister, XMMRegister, int, int);
// Shuffle packed values at 128 bit granularity
void evshufi64x2(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len);
@ -1768,6 +1900,9 @@ private:
void vptest(XMMRegister dst, XMMRegister src);
void vptest(XMMRegister dst, Address src);
// Vector compare
void vptest(XMMRegister dst, XMMRegister src, int vector_len);
// Interleave Low Bytes
void punpcklbw(XMMRegister dst, XMMRegister src);
void punpcklbw(XMMRegister dst, Address src);
@ -1841,6 +1976,7 @@ private:
void evalignq(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
void pblendw(XMMRegister dst, XMMRegister src, int imm8);
void vblendps(XMMRegister dst, XMMRegister src1, XMMRegister src2, int imm8, int vector_len);
void sha1rnds4(XMMRegister dst, XMMRegister src, int imm8);
void sha1nexte(XMMRegister dst, XMMRegister src);
@ -1959,6 +2095,7 @@ private:
void xorl(Register dst, Register src);
void xorb(Register dst, Address src);
void xorw(Register dst, Register src);
void xorq(Register dst, Address src);
void xorq(Register dst, Register src);
@ -1997,6 +2134,8 @@ private:
//====================VECTOR ARITHMETIC=====================================
void evpmovd2m(KRegister kdst, XMMRegister src, int vector_len);
void evpmovq2m(KRegister kdst, XMMRegister src, int vector_len);
// Add Packed Floating-Point Values
void addpd(XMMRegister dst, XMMRegister src);
@ -2106,13 +2245,41 @@ private:
// Multiply packed integers (only shorts and ints)
void pmullw(XMMRegister dst, XMMRegister src);
void pmulld(XMMRegister dst, XMMRegister src);
void pmuludq(XMMRegister dst, XMMRegister src);
void vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vpmullq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vpmuludq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
void vpmulld(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
void vpmullq(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
// Minimum of packed integers
void pminsb(XMMRegister dst, XMMRegister src);
void vpminsb(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
void pminsw(XMMRegister dst, XMMRegister src);
void vpminsw(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
void pminsd(XMMRegister dst, XMMRegister src);
void vpminsd(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
void vpminsq(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
void minps(XMMRegister dst, XMMRegister src);
void vminps(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
void minpd(XMMRegister dst, XMMRegister src);
void vminpd(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
// Maximum of packed integers
void pmaxsb(XMMRegister dst, XMMRegister src);
void vpmaxsb(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
void pmaxsw(XMMRegister dst, XMMRegister src);
void vpmaxsw(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
void pmaxsd(XMMRegister dst, XMMRegister src);
void vpmaxsd(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
void vpmaxsq(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
void maxps(XMMRegister dst, XMMRegister src);
void vmaxps(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
void maxpd(XMMRegister dst, XMMRegister src);
void vmaxpd(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
// Shift left packed integers
void psllw(XMMRegister dst, int shift);
void pslld(XMMRegister dst, int shift);
@ -2154,9 +2321,22 @@ private:
void vpsrad(XMMRegister dst, XMMRegister src, int shift, int vector_len);
void vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
void vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
void evpsravw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void evpsraq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
void evpsraq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
// Variable shift left packed integers
void vpsllvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
void vpsllvq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
// Variable shift right packed integers
void vpsrlvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
void vpsrlvq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
// Variable shift right arithmetic packed integers
void vpsravd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
void evpsravq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
void vpshldvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
void vpshrdvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
@ -2164,6 +2344,7 @@ private:
void pand(XMMRegister dst, XMMRegister src);
void vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
void evpandd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
void vpandq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
// Andn packed integers
@ -2176,10 +2357,15 @@ private:
void vpor(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
void vporq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void evpord(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
void evpord(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
// Xor packed integers
void pxor(XMMRegister dst, XMMRegister src);
void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vpxor(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
void vpxorq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void evpxord(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
void evpxorq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void evpxorq(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
@ -2257,7 +2443,21 @@ private:
void evpbroadcastd(XMMRegister dst, Register src, int vector_len);
void evpbroadcastq(XMMRegister dst, Register src, int vector_len);
void evpgatherdd(XMMRegister dst, KRegister k1, Address src, int vector_len);
// Gather AVX2 and AVX3
void vpgatherdd(XMMRegister dst, Address src, XMMRegister mask, int vector_len);
void vpgatherdq(XMMRegister dst, Address src, XMMRegister mask, int vector_len);
void vgatherdpd(XMMRegister dst, Address src, XMMRegister mask, int vector_len);
void vgatherdps(XMMRegister dst, Address src, XMMRegister mask, int vector_len);
void evpgatherdd(XMMRegister dst, KRegister mask, Address src, int vector_len);
void evpgatherdq(XMMRegister dst, KRegister mask, Address src, int vector_len);
void evgatherdpd(XMMRegister dst, KRegister mask, Address src, int vector_len);
void evgatherdps(XMMRegister dst, KRegister mask, Address src, int vector_len);
//Scatter AVX3 only
void evpscatterdd(Address dst, KRegister mask, XMMRegister src, int vector_len);
void evpscatterdq(Address dst, KRegister mask, XMMRegister src, int vector_len);
void evscatterdps(Address dst, KRegister mask, XMMRegister src, int vector_len);
void evscatterdpd(Address dst, KRegister mask, XMMRegister src, int vector_len);
// Carry-Less Multiplication Quadword
void pclmulqdq(XMMRegister dst, XMMRegister src, int mask);
@ -2270,14 +2470,56 @@ private:
// runtime code and native libraries.
void vzeroupper();
// AVX support for vectorized conditional move (float/double). The following two instructions used only coupled.
void blendvpb(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len);
void cmppd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len);
void blendvpd(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len);
void cmpps(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len);
void blendvps(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len);
void vpblendd(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len);
// Vector double compares
void vcmppd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len);
void evcmppd(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
ComparisonPredicateFP comparison, int vector_len);
// Vector float compares
void vcmpps(XMMRegister dst, XMMRegister nds, XMMRegister src, int comparison, int vector_len);
void evcmpps(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
ComparisonPredicateFP comparison, int vector_len);
// Vector integer compares
void vpcmpgtd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void evpcmpd(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
int comparison, int vector_len);
void evpcmpd(KRegister kdst, KRegister mask, XMMRegister nds, Address src,
int comparison, int vector_len);
// Vector long compares
void evpcmpq(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
int comparison, int vector_len);
void evpcmpq(KRegister kdst, KRegister mask, XMMRegister nds, Address src,
int comparison, int vector_len);
// Vector byte compares
void evpcmpb(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
int comparison, int vector_len);
void evpcmpb(KRegister kdst, KRegister mask, XMMRegister nds, Address src,
int comparison, int vector_len);
// Vector short compares
void evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
int comparison, int vector_len);
void evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, Address src,
int comparison, int vector_len);
// Vector blends
void blendvps(XMMRegister dst, XMMRegister src);
void blendvpd(XMMRegister dst, XMMRegister src);
void pblendvb(XMMRegister dst, XMMRegister src);
void blendvpb(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len);
void vblendvps(XMMRegister dst, XMMRegister nds, XMMRegister src, XMMRegister mask, int vector_len);
void vblendvpd(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len);
void vpblendvb(XMMRegister dst, XMMRegister nds, XMMRegister src, XMMRegister mask, int vector_len);
void vpblendd(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len);
void evblendmpd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
void evblendmps(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
void evpblendmb(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
void evpblendmw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
void evpblendmd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
void evpblendmq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
protected:
// Next instructions require address alignment 16 bytes SSE mode.
// They should be called only from corresponding MacroAssembler instructions.
@ -2373,7 +2615,8 @@ public:
// Internal encoding data used in compressed immediate offset programming
void set_evex_encoding(int value) { _evex_encoding = value; }
// Set the Evex.Z field to be used to clear all non directed XMM/YMM/ZMM components
// When the Evex.Z field is set (true), it is used to clear all non directed XMM/YMM/ZMM components.
// This method unsets it so that merge semantics are used instead.
void reset_is_clear_context(void) { _is_clear_context = false; }
// Map back to current asembler so that we can manage object level assocation

File diff suppressed because it is too large Load diff

View file

@ -28,6 +28,8 @@
// C2_MacroAssembler contains high-level macros for C2
public:
Assembler::AvxVectorLen vector_length_encoding(int vlen_in_bytes);
// special instructions for EVEX
void setvectmask(Register dst, Register src);
void restorevectmask();
@ -71,25 +73,69 @@ public:
void vabsnegd(int opcode, XMMRegister dst, XMMRegister src, int vector_len, Register scr);
void vabsnegf(int opcode, XMMRegister dst, XMMRegister src, Register scr);
void vabsnegf(int opcode, XMMRegister dst, XMMRegister src, int vector_len, Register scr);
void pminmax(int opcode, BasicType elem_bt, XMMRegister dst, XMMRegister src,
XMMRegister tmp = xnoreg);
void vpminmax(int opcode, BasicType elem_bt,
XMMRegister dst, XMMRegister src1, XMMRegister src2,
int vlen_enc);
void vminmax_fp(int opcode, BasicType elem_bt,
XMMRegister dst, XMMRegister a, XMMRegister b,
XMMRegister tmp, XMMRegister atmp, XMMRegister btmp,
int vlen_enc);
void evminmax_fp(int opcode, BasicType elem_bt,
XMMRegister dst, XMMRegister a, XMMRegister b,
KRegister ktmp, XMMRegister atmp, XMMRegister btmp,
int vlen_enc);
void vextendbw(bool sign, XMMRegister dst, XMMRegister src, int vector_len);
void vextendbw(bool sign, XMMRegister dst, XMMRegister src);
void vshiftd(int opcode, XMMRegister dst, XMMRegister src);
void vextendbd(bool sign, XMMRegister dst, XMMRegister src, int vector_len);
void vextendwd(bool sign, XMMRegister dst, XMMRegister src, int vector_len);
void vshiftd(int opcode, XMMRegister dst, XMMRegister shift);
void vshiftd_imm(int opcode, XMMRegister dst, int shift);
void vshiftd(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vshiftd(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc);
void vshiftd_imm(int opcode, XMMRegister dst, XMMRegister nds, int shift, int vector_len);
void vshiftw(int opcode, XMMRegister dst, XMMRegister src);
void vshiftw(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vshiftq(int opcode, XMMRegister dst, XMMRegister src);
void vshiftw(int opcode, XMMRegister dst, XMMRegister shift);
void vshiftw(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc);
void vshiftq(int opcode, XMMRegister dst, XMMRegister shift);
void vshiftq_imm(int opcode, XMMRegister dst, int shift);
void vshiftq(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vshiftq(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc);
void vshiftq_imm(int opcode, XMMRegister dst, XMMRegister nds, int shift, int vector_len);
void vprotate_imm(int opcode, BasicType etype, XMMRegister dst, XMMRegister src, int shift, int vector_len);
void vprotate_var(int opcode, BasicType etype, XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
// Reductions for vectors of ints, longs, floats, and doubles.
void varshiftd(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc);
void varshiftw(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc);
void varshiftq(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc, XMMRegister vtmp = xnoreg);
void varshiftbw(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len, XMMRegister vtmp, Register scratch);
void evarshiftb(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len, XMMRegister vtmp, Register scratch);
// dst = src1 + reduce(op, src2) using vtmp as temps
void insert(BasicType typ, XMMRegister dst, Register val, int idx);
void vinsert(BasicType typ, XMMRegister dst, XMMRegister src, Register val, int idx);
void vgather(BasicType typ, XMMRegister dst, Register base, XMMRegister idx, XMMRegister mask, int vector_len);
void evgather(BasicType typ, XMMRegister dst, KRegister mask, Register base, XMMRegister idx, int vector_len);
void evscatter(BasicType typ, Register base, XMMRegister idx, KRegister mask, XMMRegister src, int vector_len);
// extract
void extract(BasicType typ, Register dst, XMMRegister src, int idx);
XMMRegister get_lane(BasicType typ, XMMRegister dst, XMMRegister src, int elemindex);
void get_elem(BasicType typ, Register dst, XMMRegister src, int elemindex);
void get_elem(BasicType typ, XMMRegister dst, XMMRegister src, int elemindex, Register tmp = noreg, XMMRegister vtmp = xnoreg);
// blend
void evpcmp(BasicType typ, KRegister kdmask, KRegister ksmask, XMMRegister src1, AddressLiteral adr, int comparison, int vector_len, Register scratch = rscratch1);
void evpblend(BasicType typ, XMMRegister dst, KRegister kmask, XMMRegister src1, XMMRegister src2, bool merge, int vector_len);
void load_vector_mask(XMMRegister dst, XMMRegister src, int vlen_in_bytes, BasicType elem_bt);
void load_iota_indices(XMMRegister dst, Register scratch, int vlen_in_bytes);
// Reductions for vectors of bytes, shorts, ints, longs, floats, and doubles.
// dst = src1 reduce(op, src2) using vtmp as temps
void reduceI(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
#ifdef _LP64
void reduceL(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
@ -99,32 +145,62 @@ public:
void reduce_fp(int opcode, int vlen,
XMMRegister dst, XMMRegister src,
XMMRegister vtmp1, XMMRegister vtmp2 = xnoreg);
void reduceB(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
void mulreduceB(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
void reduceS(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
void reduceFloatMinMax(int opcode, int vlen, bool is_dst_valid,
XMMRegister dst, XMMRegister src,
XMMRegister tmp, XMMRegister atmp, XMMRegister btmp, XMMRegister xmm_0, XMMRegister xmm_1 = xnoreg);
void reduceDoubleMinMax(int opcode, int vlen, bool is_dst_valid,
XMMRegister dst, XMMRegister src,
XMMRegister tmp, XMMRegister atmp, XMMRegister btmp, XMMRegister xmm_0, XMMRegister xmm_1 = xnoreg);
private:
void reduceF(int opcode, int vlen, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
void reduceD(int opcode, int vlen, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
// Int Reduction
void reduce2I (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
void reduce4I (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
void reduce8I (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
void reduce16I(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
// Byte Reduction
void reduce8B (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
void reduce16B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
void reduce32B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
void reduce64B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
void mulreduce8B (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
void mulreduce16B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
void mulreduce32B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
void mulreduce64B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
// Short Reduction
void reduce4S (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
void reduce8S (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
void reduce16S(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
void reduce32S(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
// Long Reduction
#ifdef _LP64
void reduce2L(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
void reduce4L(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
void reduce8L(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
#endif // _LP64
// Float Reduction
void reduce2F (int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp);
void reduce4F (int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp);
void reduce8F (int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
void reduce16F(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
// Double Reduction
void reduce2D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp);
void reduce4D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
void reduce8D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
void reduce_operation_128(int opcode, XMMRegister dst, XMMRegister src);
void reduce_operation_256(int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2);
// Base reduction instruction
void reduce_operation_128(BasicType typ, int opcode, XMMRegister dst, XMMRegister src);
void reduce_operation_256(BasicType typ, int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2);
public:

View file

@ -112,6 +112,7 @@ void MacroAssembler::cmpklass(Address src1, Metadata* obj) {
cmp_literal32(src1, (int32_t)obj, metadata_Relocation::spec_for_immediate());
}
void MacroAssembler::cmpklass(Register src1, Metadata* obj) {
cmp_literal32(src1, (int32_t)obj, metadata_Relocation::spec_for_immediate());
}
@ -2495,6 +2496,7 @@ void MacroAssembler::movdqu(XMMRegister dst, Address src) {
void MacroAssembler::movdqu(XMMRegister dst, XMMRegister src) {
assert(((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15");
if (dst->encoding() == src->encoding()) return;
Assembler::movdqu(dst, src);
}
@ -2519,6 +2521,7 @@ void MacroAssembler::vmovdqu(XMMRegister dst, Address src) {
void MacroAssembler::vmovdqu(XMMRegister dst, XMMRegister src) {
assert(((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15");
if (dst->encoding() == src->encoding()) return;
Assembler::vmovdqu(dst, src);
}
@ -2532,6 +2535,64 @@ void MacroAssembler::vmovdqu(XMMRegister dst, AddressLiteral src, Register scrat
}
}
void MacroAssembler::kmovwl(KRegister dst, AddressLiteral src, Register scratch_reg) {
if (reachable(src)) {
kmovwl(dst, as_Address(src));
} else {
lea(scratch_reg, src);
kmovwl(dst, Address(scratch_reg, 0));
}
}
void MacroAssembler::evmovdqub(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge,
int vector_len, Register scratch_reg) {
if (reachable(src)) {
if (mask == k0) {
Assembler::evmovdqub(dst, as_Address(src), merge, vector_len);
} else {
Assembler::evmovdqub(dst, mask, as_Address(src), merge, vector_len);
}
} else {
lea(scratch_reg, src);
if (mask == k0) {
Assembler::evmovdqub(dst, Address(scratch_reg, 0), merge, vector_len);
} else {
Assembler::evmovdqub(dst, mask, Address(scratch_reg, 0), merge, vector_len);
}
}
}
void MacroAssembler::evmovdquw(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge,
int vector_len, Register scratch_reg) {
if (reachable(src)) {
Assembler::evmovdquw(dst, mask, as_Address(src), merge, vector_len);
} else {
lea(scratch_reg, src);
Assembler::evmovdquw(dst, mask, Address(scratch_reg, 0), merge, vector_len);
}
}
void MacroAssembler::evmovdqul(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge,
int vector_len, Register scratch_reg) {
if (reachable(src)) {
Assembler::evmovdqul(dst, mask, as_Address(src), merge, vector_len);
} else {
lea(scratch_reg, src);
Assembler::evmovdqul(dst, mask, Address(scratch_reg, 0), merge, vector_len);
}
}
void MacroAssembler::evmovdquq(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge,
int vector_len, Register scratch_reg) {
if (reachable(src)) {
Assembler::evmovdquq(dst, mask, as_Address(src), merge, vector_len);
} else {
lea(scratch_reg, src);
Assembler::evmovdquq(dst, mask, Address(scratch_reg, 0), merge, vector_len);
}
}
void MacroAssembler::evmovdquq(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch) {
if (reachable(src)) {
Assembler::evmovdquq(dst, as_Address(src), vector_len);
@ -3019,6 +3080,98 @@ void MacroAssembler::vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src,
Assembler::vpcmpeqw(dst, nds, src, vector_len);
}
void MacroAssembler::evpcmpeqd(KRegister kdst, KRegister mask, XMMRegister nds,
AddressLiteral src, int vector_len, Register scratch_reg) {
if (reachable(src)) {
Assembler::evpcmpeqd(kdst, mask, nds, as_Address(src), vector_len);
} else {
lea(scratch_reg, src);
Assembler::evpcmpeqd(kdst, mask, nds, Address(scratch_reg, 0), vector_len);
}
}
void MacroAssembler::evpcmpd(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src,
int comparison, int vector_len, Register scratch_reg) {
if (reachable(src)) {
Assembler::evpcmpd(kdst, mask, nds, as_Address(src), comparison, vector_len);
} else {
lea(scratch_reg, src);
Assembler::evpcmpd(kdst, mask, nds, Address(scratch_reg, 0), comparison, vector_len);
}
}
void MacroAssembler::evpcmpq(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src,
int comparison, int vector_len, Register scratch_reg) {
if (reachable(src)) {
Assembler::evpcmpq(kdst, mask, nds, as_Address(src), comparison, vector_len);
} else {
lea(scratch_reg, src);
Assembler::evpcmpq(kdst, mask, nds, Address(scratch_reg, 0), comparison, vector_len);
}
}
void MacroAssembler::evpcmpb(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src,
int comparison, int vector_len, Register scratch_reg) {
if (reachable(src)) {
Assembler::evpcmpb(kdst, mask, nds, as_Address(src), comparison, vector_len);
} else {
lea(scratch_reg, src);
Assembler::evpcmpb(kdst, mask, nds, Address(scratch_reg, 0), comparison, vector_len);
}
}
void MacroAssembler::evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src,
int comparison, int vector_len, Register scratch_reg) {
if (reachable(src)) {
Assembler::evpcmpw(kdst, mask, nds, as_Address(src), comparison, vector_len);
} else {
lea(scratch_reg, src);
Assembler::evpcmpw(kdst, mask, nds, Address(scratch_reg, 0), comparison, vector_len);
}
}
void MacroAssembler::vpcmpCC(XMMRegister dst, XMMRegister nds, XMMRegister src, int cond_encoding, Width width, int vector_len) {
if (width == Assembler::Q) {
Assembler::vpcmpCCq(dst, nds, src, cond_encoding, vector_len);
} else {
Assembler::vpcmpCCbwd(dst, nds, src, cond_encoding, vector_len);
}
}
void MacroAssembler::vpcmpCCW(XMMRegister dst, XMMRegister nds, XMMRegister src, ComparisonPredicate cond, Width width, int vector_len, Register scratch_reg) {
int eq_cond_enc = 0x29;
int gt_cond_enc = 0x37;
if (width != Assembler::Q) {
eq_cond_enc = 0x74 + width;
gt_cond_enc = 0x64 + width;
}
switch (cond) {
case eq:
vpcmpCC(dst, nds, src, eq_cond_enc, width, vector_len);
break;
case neq:
vpcmpCC(dst, nds, src, eq_cond_enc, width, vector_len);
vpxor(dst, dst, ExternalAddress(StubRoutines::x86::vector_all_bits_set()), vector_len, scratch_reg);
break;
case le:
vpcmpCC(dst, nds, src, gt_cond_enc, width, vector_len);
vpxor(dst, dst, ExternalAddress(StubRoutines::x86::vector_all_bits_set()), vector_len, scratch_reg);
break;
case nlt:
vpcmpCC(dst, src, nds, gt_cond_enc, width, vector_len);
vpxor(dst, dst, ExternalAddress(StubRoutines::x86::vector_all_bits_set()), vector_len, scratch_reg);
break;
case lt:
vpcmpCC(dst, src, nds, gt_cond_enc, width, vector_len);
break;
case nle:
vpcmpCC(dst, nds, src, gt_cond_enc, width, vector_len);
break;
default:
assert(false, "Should not reach here");
}
}
void MacroAssembler::vpmovzxbw(XMMRegister dst, Address src, int vector_len) {
assert(((dst->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
Assembler::vpmovzxbw(dst, src, vector_len);
@ -3143,6 +3296,16 @@ void MacroAssembler::vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src
}
}
void MacroAssembler::evpord(XMMRegister dst, KRegister mask, XMMRegister nds, AddressLiteral src,
bool merge, int vector_len, Register scratch_reg) {
if (reachable(src)) {
Assembler::evpord(dst, mask, nds, as_Address(src), merge, vector_len);
} else {
lea(scratch_reg, src);
Assembler::evpord(dst, mask, nds, Address(scratch_reg, 0), merge, vector_len);
}
}
void MacroAssembler::vdivsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
if (reachable(src)) {
vdivsd(dst, nds, as_Address(src));
@ -3239,7 +3402,14 @@ void MacroAssembler::vpxor(XMMRegister dst, XMMRegister nds, AddressLiteral src,
}
}
//-------------------------------------------------------------------------------------------
void MacroAssembler::vpermd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg) {
if (reachable(src)) {
Assembler::vpermd(dst, nds, as_Address(src), vector_len);
} else {
lea(scratch_reg, src);
Assembler::vpermd(dst, nds, Address(scratch_reg, 0), vector_len);
}
}
void MacroAssembler::clear_jweak_tag(Register possibly_jweak) {
const int32_t inverted_jweak_mask = ~static_cast<int32_t>(JNIHandles::weak_tag_mask);
@ -5765,7 +5935,7 @@ void MacroAssembler::vectorized_mismatch(Register obja, Register objb, Register
bind(VECTOR64_LOOP);
// AVX512 code to compare 64 byte vectors.
evmovdqub(rymm0, Address(obja, result), Assembler::AVX_512bit);
evmovdqub(rymm0, Address(obja, result), false, Assembler::AVX_512bit);
evpcmpeqb(k7, rymm0, Address(objb, result), Assembler::AVX_512bit);
kortestql(k7, k7);
jcc(Assembler::aboveEqual, VECTOR64_NOT_EQUAL); // mismatch
@ -5784,7 +5954,7 @@ void MacroAssembler::vectorized_mismatch(Register obja, Register objb, Register
notq(tmp2);
kmovql(k3, tmp2);
evmovdqub(rymm0, k3, Address(obja, result), Assembler::AVX_512bit);
evmovdqub(rymm0, k3, Address(obja, result), false, Assembler::AVX_512bit);
evpcmpeqb(k7, k3, rymm0, Address(objb, result), Assembler::AVX_512bit);
ktestql(k7, k3);
@ -7579,7 +7749,7 @@ void MacroAssembler::char_array_compress(Register src, Register dst, Register le
notl(result);
kmovdl(k3, result);
evmovdquw(tmp1Reg, k3, Address(src, 0), Assembler::AVX_512bit);
evmovdquw(tmp1Reg, k3, Address(src, 0), /*merge*/ false, Assembler::AVX_512bit);
evpcmpuw(k2, k3, tmp1Reg, tmp2Reg, Assembler::le, Assembler::AVX_512bit);
ktestd(k2, k3);
jcc(Assembler::carryClear, return_zero);
@ -7604,7 +7774,7 @@ void MacroAssembler::char_array_compress(Register src, Register dst, Register le
negptr(len);
bind(copy_32_loop);
evmovdquw(tmp1Reg, Address(src, len, Address::times_2), Assembler::AVX_512bit);
evmovdquw(tmp1Reg, Address(src, len, Address::times_2), /*merge*/ false, Assembler::AVX_512bit);
evpcmpuw(k2, tmp1Reg, tmp2Reg, Assembler::le, Assembler::AVX_512bit);
kortestdl(k2, k2);
jcc(Assembler::carryClear, return_zero);
@ -7629,7 +7799,7 @@ void MacroAssembler::char_array_compress(Register src, Register dst, Register le
kmovdl(k3, result);
evmovdquw(tmp1Reg, k3, Address(src, 0), Assembler::AVX_512bit);
evmovdquw(tmp1Reg, k3, Address(src, 0), /*merge*/ false, Assembler::AVX_512bit);
evpcmpuw(k2, k3, tmp1Reg, tmp2Reg, Assembler::le, Assembler::AVX_512bit);
ktestd(k2, k3);
jcc(Assembler::carryClear, return_zero);
@ -7774,7 +7944,7 @@ void MacroAssembler::byte_array_inflate(Register src, Register dst, Register len
// inflate 32 chars per iter
bind(copy_32_loop);
vpmovzxbw(tmp1, Address(src, len, Address::times_1), Assembler::AVX_512bit);
evmovdquw(Address(dst, len, Address::times_2), tmp1, Assembler::AVX_512bit);
evmovdquw(Address(dst, len, Address::times_2), tmp1, /*merge*/ false, Assembler::AVX_512bit);
addptr(len, 32);
jcc(Assembler::notZero, copy_32_loop);
@ -7789,7 +7959,7 @@ void MacroAssembler::byte_array_inflate(Register src, Register dst, Register len
notl(tmp3_aliased);
kmovdl(k2, tmp3_aliased);
evpmovzxbw(tmp1, k2, Address(src, 0), Assembler::AVX_512bit);
evmovdquw(Address(dst, 0), k2, tmp1, Assembler::AVX_512bit);
evmovdquw(Address(dst, 0), k2, tmp1, /*merge*/ true, Assembler::AVX_512bit);
jmp(done);
bind(avx3_threshold);

View file

@ -1076,15 +1076,59 @@ public:
void movdqu(XMMRegister dst, Address src);
void movdqu(XMMRegister dst, XMMRegister src);
void movdqu(XMMRegister dst, AddressLiteral src, Register scratchReg = rscratch1);
void kmovwl(KRegister dst, Register src) { Assembler::kmovwl(dst, src); }
void kmovwl(Register dst, KRegister src) { Assembler::kmovwl(dst, src); }
void kmovwl(KRegister dst, Address src) { Assembler::kmovwl(dst, src); }
void kmovwl(KRegister dst, AddressLiteral src, Register scratch_reg = rscratch1);
// AVX Unaligned forms
void vmovdqu(Address dst, XMMRegister src);
void vmovdqu(XMMRegister dst, Address src);
void vmovdqu(XMMRegister dst, XMMRegister src);
void vmovdqu(XMMRegister dst, AddressLiteral src, Register scratch_reg = rscratch1);
// AVX512 Unaligned
void evmovdqub(Address dst, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdqub(dst, src, merge, vector_len); }
void evmovdqub(XMMRegister dst, Address src, bool merge, int vector_len) { Assembler::evmovdqub(dst, src, merge, vector_len); }
void evmovdqub(XMMRegister dst, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdqub(dst, src, merge, vector_len); }
void evmovdqub(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) { Assembler::evmovdqub(dst, mask, src, merge, vector_len); }
void evmovdqub(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, int vector_len, Register scratch_reg);
void evmovdquw(Address dst, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdquw(dst, src, merge, vector_len); }
void evmovdquw(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdquw(dst, mask, src, merge, vector_len); }
void evmovdquw(XMMRegister dst, Address src, bool merge, int vector_len) { Assembler::evmovdquw(dst, src, merge, vector_len); }
void evmovdquw(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) { Assembler::evmovdquw(dst, mask, src, merge, vector_len); }
void evmovdquw(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, int vector_len, Register scratch_reg);
void evmovdqul(Address dst, XMMRegister src, int vector_len) { Assembler::evmovdqul(dst, src, vector_len); }
void evmovdqul(XMMRegister dst, Address src, int vector_len) { Assembler::evmovdqul(dst, src, vector_len); }
void evmovdqul(XMMRegister dst, XMMRegister src, int vector_len) {
if (dst->encoding() == src->encoding()) return;
Assembler::evmovdqul(dst, src, vector_len);
}
void evmovdqul(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdqul(dst, mask, src, merge, vector_len); }
void evmovdqul(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) { Assembler::evmovdqul(dst, mask, src, merge, vector_len); }
void evmovdqul(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
if (dst->encoding() == src->encoding() && mask == k0) return;
Assembler::evmovdqul(dst, mask, src, merge, vector_len);
}
void evmovdqul(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, int vector_len, Register scratch_reg);
void evmovdquq(XMMRegister dst, Address src, int vector_len) { Assembler::evmovdquq(dst, src, vector_len); }
void evmovdquq(XMMRegister dst, XMMRegister src, int vector_len) { Assembler::evmovdquq(dst, src, vector_len); }
void evmovdquq(Address dst, XMMRegister src, int vector_len) { Assembler::evmovdquq(dst, src, vector_len); }
void evmovdquq(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch);
void evmovdquq(XMMRegister dst, XMMRegister src, int vector_len) {
if (dst->encoding() == src->encoding()) return;
Assembler::evmovdquq(dst, src, vector_len);
}
void evmovdquq(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdquq(dst, mask, src, merge, vector_len); }
void evmovdquq(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) { Assembler::evmovdquq(dst, mask, src, merge, vector_len); }
void evmovdquq(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
if (dst->encoding() == src->encoding() && mask == k0) return;
Assembler::evmovdquq(dst, mask, src, merge, vector_len);
}
void evmovdquq(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, int vector_len, Register scratch_reg);
// Move Aligned Double Quadword
void movdqa(XMMRegister dst, Address src) { Assembler::movdqa(dst, src); }
@ -1206,6 +1250,30 @@ public:
void vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void evpcmpeqd(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg);
// Vector compares
void evpcmpd(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
int comparison, int vector_len) { Assembler::evpcmpd(kdst, mask, nds, src, comparison, vector_len); }
void evpcmpd(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src,
int comparison, int vector_len, Register scratch_reg);
void evpcmpq(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
int comparison, int vector_len) { Assembler::evpcmpq(kdst, mask, nds, src, comparison, vector_len); }
void evpcmpq(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src,
int comparison, int vector_len, Register scratch_reg);
void evpcmpb(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
int comparison, int vector_len) { Assembler::evpcmpb(kdst, mask, nds, src, comparison, vector_len); }
void evpcmpb(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src,
int comparison, int vector_len, Register scratch_reg);
void evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
int comparison, int vector_len) { Assembler::evpcmpw(kdst, mask, nds, src, comparison, vector_len); }
void evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src,
int comparison, int vector_len, Register scratch_reg);
// Emit comparison instruction for the specified comparison predicate.
void vpcmpCCW(XMMRegister dst, XMMRegister nds, XMMRegister src, ComparisonPredicate cond, Width width, int vector_len, Register scratch_reg);
void vpcmpCC(XMMRegister dst, XMMRegister nds, XMMRegister src, int cond_encoding, Width width, int vector_len);
void vpmovzxbw(XMMRegister dst, Address src, int vector_len);
void vpmovzxbw(XMMRegister dst, XMMRegister src, int vector_len) { Assembler::vpmovzxbw(dst, src, vector_len); }
@ -1234,6 +1302,7 @@ public:
void vpsllw(XMMRegister dst, XMMRegister nds, int shift, int vector_len);
void vptest(XMMRegister dst, XMMRegister src);
void vptest(XMMRegister dst, XMMRegister src, int vector_len) { Assembler::vptest(dst, src, vector_len); }
void punpcklbw(XMMRegister dst, XMMRegister src);
void punpcklbw(XMMRegister dst, Address src) { Assembler::punpcklbw(dst, src); }
@ -1252,6 +1321,8 @@ public:
void vandps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vandps(dst, nds, src, vector_len); }
void vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg = rscratch1);
void evpord(XMMRegister dst, KRegister mask, XMMRegister nds, AddressLiteral src, bool merge, int vector_len, Register scratch_reg);
void vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vdivsd(dst, nds, src); }
void vdivsd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vdivsd(dst, nds, src); }
void vdivsd(XMMRegister dst, XMMRegister nds, AddressLiteral src);
@ -1307,6 +1378,9 @@ public:
void vpxor(XMMRegister dst, XMMRegister src) { Assembler::vpxor(dst, dst, src, true); }
void vpxor(XMMRegister dst, Address src) { Assembler::vpxor(dst, dst, src, true); }
void vpermd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vpermd(dst, nds, src, vector_len); }
void vpermd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg);
void vinserti128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
if (UseAVX > 2 && VM_Version::supports_avx512novl()) {
Assembler::vinserti32x4(dst, dst, src, imm8);

View file

@ -587,6 +587,29 @@ class StubGenerator: public StubCodeGenerator {
return start;
}
address generate_iota_indices(const char *stub_name) {
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", stub_name);
address start = __ pc();
__ emit_data(0x03020100, relocInfo::none, 0);
__ emit_data(0x07060504, relocInfo::none, 0);
__ emit_data(0x0B0A0908, relocInfo::none, 0);
__ emit_data(0x0F0E0D0C, relocInfo::none, 0);
__ emit_data(0x13121110, relocInfo::none, 0);
__ emit_data(0x17161514, relocInfo::none, 0);
__ emit_data(0x1B1A1918, relocInfo::none, 0);
__ emit_data(0x1F1E1D1C, relocInfo::none, 0);
__ emit_data(0x23222120, relocInfo::none, 0);
__ emit_data(0x27262524, relocInfo::none, 0);
__ emit_data(0x2B2A2928, relocInfo::none, 0);
__ emit_data(0x2F2E2D2C, relocInfo::none, 0);
__ emit_data(0x33323130, relocInfo::none, 0);
__ emit_data(0x37363534, relocInfo::none, 0);
__ emit_data(0x3B3A3938, relocInfo::none, 0);
__ emit_data(0x3F3E3D3C, relocInfo::none, 0);
return start;
}
address generate_vector_mask_long_double(const char *stub_name, int32_t maskhi, int32_t masklo) {
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", stub_name);
@ -627,6 +650,40 @@ class StubGenerator: public StubCodeGenerator {
return start;
}
address generate_vector_custom_i32(const char *stub_name, Assembler::AvxVectorLen len,
int32_t val0, int32_t val1, int32_t val2, int32_t val3,
int32_t val4 = 0, int32_t val5 = 0, int32_t val6 = 0, int32_t val7 = 0,
int32_t val8 = 0, int32_t val9 = 0, int32_t val10 = 0, int32_t val11 = 0,
int32_t val12 = 0, int32_t val13 = 0, int32_t val14 = 0, int32_t val15 = 0) {
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", stub_name);
address start = __ pc();
assert(len != Assembler::AVX_NoVec, "vector len must be specified");
__ emit_data(val0, relocInfo::none, 0);
__ emit_data(val1, relocInfo::none, 0);
__ emit_data(val2, relocInfo::none, 0);
__ emit_data(val3, relocInfo::none, 0);
if (len >= Assembler::AVX_256bit) {
__ emit_data(val4, relocInfo::none, 0);
__ emit_data(val5, relocInfo::none, 0);
__ emit_data(val6, relocInfo::none, 0);
__ emit_data(val7, relocInfo::none, 0);
if (len >= Assembler::AVX_512bit) {
__ emit_data(val8, relocInfo::none, 0);
__ emit_data(val9, relocInfo::none, 0);
__ emit_data(val10, relocInfo::none, 0);
__ emit_data(val11, relocInfo::none, 0);
__ emit_data(val12, relocInfo::none, 0);
__ emit_data(val13, relocInfo::none, 0);
__ emit_data(val14, relocInfo::none, 0);
__ emit_data(val15, relocInfo::none, 0);
}
}
return start;
}
//----------------------------------------------------------------------------------------------------
// Non-destructive plausibility checks for oops
@ -3902,8 +3959,19 @@ class StubGenerator: public StubCodeGenerator {
StubRoutines::x86::_vector_double_sign_mask = generate_vector_mask_long_double("vector_double_sign_mask", 0x7FFFFFFF, 0xFFFFFFFF);
StubRoutines::x86::_vector_double_sign_flip = generate_vector_mask_long_double("vector_double_sign_flip", 0x80000000, 0x00000000);
StubRoutines::x86::_vector_short_to_byte_mask = generate_vector_mask("vector_short_to_byte_mask", 0x00ff00ff);
StubRoutines::x86::_vector_int_to_byte_mask = generate_vector_mask("vector_int_to_byte_mask", 0x000000ff);
StubRoutines::x86::_vector_int_to_short_mask = generate_vector_mask("vector_int_to_short_mask", 0x0000ffff);
StubRoutines::x86::_vector_32_bit_mask = generate_vector_custom_i32("vector_32_bit_mask", Assembler::AVX_512bit,
0xFFFFFFFF, 0, 0, 0);
StubRoutines::x86::_vector_64_bit_mask = generate_vector_custom_i32("vector_64_bit_mask", Assembler::AVX_512bit,
0xFFFFFFFF, 0xFFFFFFFF, 0, 0);
StubRoutines::x86::_vector_int_shuffle_mask = generate_vector_mask("vector_int_shuffle_mask", 0x03020100);
StubRoutines::x86::_vector_short_shuffle_mask = generate_vector_mask("vector_short_shuffle_mask", 0x01000100);
StubRoutines::x86::_vector_long_shuffle_mask = generate_vector_mask_long_double("vector_long_shuffle_mask", 0x00000001, 0x0);
StubRoutines::x86::_vector_byte_perm_mask = generate_vector_byte_perm_mask("vector_byte_perm_mask");
StubRoutines::x86::_vector_long_sign_mask = generate_vector_mask_long_double("vector_long_sign_mask", 0x80000000, 0x00000000);
StubRoutines::x86::_vector_all_bits_set = generate_vector_mask("vector_all_bits_set", 0xFFFFFFFF);
StubRoutines::x86::_vector_iota_indices = generate_iota_indices("iota_indices");
// support for verify_oop (must happen after universe_init)
StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop();

View file

@ -809,6 +809,21 @@ class StubGenerator: public StubCodeGenerator {
return start;
}
address generate_iota_indices(const char *stub_name) {
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", stub_name);
address start = __ pc();
__ emit_data64(0x0706050403020100, relocInfo::none);
__ emit_data64(0x0F0E0D0C0B0A0908, relocInfo::none);
__ emit_data64(0x1716151413121110, relocInfo::none);
__ emit_data64(0x1F1E1D1C1B1A1918, relocInfo::none);
__ emit_data64(0x2726252423222120, relocInfo::none);
__ emit_data64(0x2F2E2D2C2B2A2928, relocInfo::none);
__ emit_data64(0x3736353433323130, relocInfo::none);
__ emit_data64(0x3F3E3D3C3B3A3938, relocInfo::none);
return start;
}
address generate_fp_mask(const char *stub_name, int64_t mask) {
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", stub_name);
@ -854,6 +869,57 @@ class StubGenerator: public StubCodeGenerator {
return start;
}
address generate_vector_fp_mask(const char *stub_name, int64_t mask) {
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", stub_name);
address start = __ pc();
__ emit_data64(mask, relocInfo::none);
__ emit_data64(mask, relocInfo::none);
__ emit_data64(mask, relocInfo::none);
__ emit_data64(mask, relocInfo::none);
__ emit_data64(mask, relocInfo::none);
__ emit_data64(mask, relocInfo::none);
__ emit_data64(mask, relocInfo::none);
__ emit_data64(mask, relocInfo::none);
return start;
}
address generate_vector_custom_i32(const char *stub_name, Assembler::AvxVectorLen len,
int32_t val0, int32_t val1, int32_t val2, int32_t val3,
int32_t val4 = 0, int32_t val5 = 0, int32_t val6 = 0, int32_t val7 = 0,
int32_t val8 = 0, int32_t val9 = 0, int32_t val10 = 0, int32_t val11 = 0,
int32_t val12 = 0, int32_t val13 = 0, int32_t val14 = 0, int32_t val15 = 0) {
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", stub_name);
address start = __ pc();
assert(len != Assembler::AVX_NoVec, "vector len must be specified");
__ emit_data(val0, relocInfo::none, 0);
__ emit_data(val1, relocInfo::none, 0);
__ emit_data(val2, relocInfo::none, 0);
__ emit_data(val3, relocInfo::none, 0);
if (len >= Assembler::AVX_256bit) {
__ emit_data(val4, relocInfo::none, 0);
__ emit_data(val5, relocInfo::none, 0);
__ emit_data(val6, relocInfo::none, 0);
__ emit_data(val7, relocInfo::none, 0);
if (len >= Assembler::AVX_512bit) {
__ emit_data(val8, relocInfo::none, 0);
__ emit_data(val9, relocInfo::none, 0);
__ emit_data(val10, relocInfo::none, 0);
__ emit_data(val11, relocInfo::none, 0);
__ emit_data(val12, relocInfo::none, 0);
__ emit_data(val13, relocInfo::none, 0);
__ emit_data(val14, relocInfo::none, 0);
__ emit_data(val15, relocInfo::none, 0);
}
}
return start;
}
// Non-destructive plausibility checks for oops
//
// Arguments:
@ -6769,9 +6835,20 @@ address generate_avx_ghash_processBlocks() {
StubRoutines::x86::_vector_float_sign_flip = generate_vector_mask("vector_float_sign_flip", 0x8000000080000000);
StubRoutines::x86::_vector_double_sign_mask = generate_vector_mask("vector_double_sign_mask", 0x7FFFFFFFFFFFFFFF);
StubRoutines::x86::_vector_double_sign_flip = generate_vector_mask("vector_double_sign_flip", 0x8000000000000000);
StubRoutines::x86::_vector_all_bits_set = generate_vector_mask("vector_all_bits_set", 0xFFFFFFFFFFFFFFFF);
StubRoutines::x86::_vector_short_to_byte_mask = generate_vector_mask("vector_short_to_byte_mask", 0x00ff00ff00ff00ff);
StubRoutines::x86::_vector_byte_perm_mask = generate_vector_byte_perm_mask("vector_byte_perm_mask");
StubRoutines::x86::_vector_int_to_byte_mask = generate_vector_mask("vector_int_to_byte_mask", 0x000000ff000000ff);
StubRoutines::x86::_vector_int_to_short_mask = generate_vector_mask("vector_int_to_short_mask", 0x0000ffff0000ffff);
StubRoutines::x86::_vector_32_bit_mask = generate_vector_custom_i32("vector_32_bit_mask", Assembler::AVX_512bit,
0xFFFFFFFF, 0, 0, 0);
StubRoutines::x86::_vector_64_bit_mask = generate_vector_custom_i32("vector_64_bit_mask", Assembler::AVX_512bit,
0xFFFFFFFF, 0xFFFFFFFF, 0, 0);
StubRoutines::x86::_vector_int_shuffle_mask = generate_vector_mask("vector_int_shuffle_mask", 0x0302010003020100);
StubRoutines::x86::_vector_short_shuffle_mask = generate_vector_mask("vector_short_shuffle_mask", 0x0100010001000100);
StubRoutines::x86::_vector_long_shuffle_mask = generate_vector_mask("vector_long_shuffle_mask", 0x0000000100000000);
StubRoutines::x86::_vector_long_sign_mask = generate_vector_mask("vector_long_sign_mask", 0x8000000000000000);
StubRoutines::x86::_vector_iota_indices = generate_iota_indices("iota_indices");
// support for verify_oop (must happen after universe_init)
StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop();

View file

@ -44,12 +44,21 @@ address StubRoutines::x86::_upper_word_mask_addr = NULL;
address StubRoutines::x86::_shuffle_byte_flip_mask_addr = NULL;
address StubRoutines::x86::_k256_adr = NULL;
address StubRoutines::x86::_vector_short_to_byte_mask = NULL;
address StubRoutines::x86::_vector_int_to_byte_mask = NULL;
address StubRoutines::x86::_vector_int_to_short_mask = NULL;
address StubRoutines::x86::_vector_all_bits_set = NULL;
address StubRoutines::x86::_vector_short_shuffle_mask = NULL;
address StubRoutines::x86::_vector_int_shuffle_mask = NULL;
address StubRoutines::x86::_vector_long_shuffle_mask = NULL;
address StubRoutines::x86::_vector_float_sign_mask = NULL;
address StubRoutines::x86::_vector_float_sign_flip = NULL;
address StubRoutines::x86::_vector_double_sign_mask = NULL;
address StubRoutines::x86::_vector_double_sign_flip = NULL;
address StubRoutines::x86::_vector_byte_perm_mask = NULL;
address StubRoutines::x86::_vector_long_sign_mask = NULL;
address StubRoutines::x86::_vector_iota_indices = NULL;
address StubRoutines::x86::_vector_32_bit_mask = NULL;
address StubRoutines::x86::_vector_64_bit_mask = NULL;
#ifdef _LP64
address StubRoutines::x86::_k256_W_adr = NULL;
address StubRoutines::x86::_k512_W_addr = NULL;

View file

@ -146,8 +146,17 @@ class x86 {
static address _vector_float_sign_flip;
static address _vector_double_sign_mask;
static address _vector_double_sign_flip;
static address _vector_byte_perm_mask;
static address _vector_long_sign_mask;
static address _vector_all_bits_set;
static address _vector_byte_perm_mask;
static address _vector_int_to_byte_mask;
static address _vector_int_to_short_mask;
static address _vector_32_bit_mask;
static address _vector_64_bit_mask;
static address _vector_int_shuffle_mask;
static address _vector_short_shuffle_mask;
static address _vector_long_shuffle_mask;
static address _vector_iota_indices;
#ifdef _LP64
static juint _k256_W[];
static address _k256_W_adr;
@ -248,13 +257,50 @@ class x86 {
return _vector_double_sign_flip;
}
static address vector_all_bits_set() {
return _vector_all_bits_set;
}
static address vector_byte_perm_mask() {
return _vector_byte_perm_mask;
}
static address vector_int_to_byte_mask() {
return _vector_int_to_byte_mask;
}
static address vector_int_to_short_mask() {
return _vector_int_to_short_mask;
}
static address vector_32_bit_mask() {
return _vector_32_bit_mask;
}
static address vector_64_bit_mask() {
return _vector_64_bit_mask;
}
static address vector_int_shuffle_mask() {
return _vector_int_shuffle_mask;
}
static address vector_short_shuffle_mask() {
return _vector_short_shuffle_mask;
}
static address vector_long_shuffle_mask() {
return _vector_long_shuffle_mask;
}
static address vector_long_sign_mask() {
return _vector_long_sign_mask;
}
static address vector_iota_indices() {
return _vector_iota_indices;
}
#ifdef _LP64
static address k256_W_addr() { return _k256_W_adr; }
static address k512_W_addr() { return _k512_W_addr; }

File diff suppressed because it is too large Load diff

View file

@ -3315,7 +3315,7 @@ operand immI() %{
%}
// Constant for test vs zero
operand immI0() %{
operand immI_0() %{
predicate(n->get_int() == 0);
match(ConI);
@ -3325,7 +3325,7 @@ operand immI0() %{
%}
// Constant for increment
operand immI1() %{
operand immI_1() %{
predicate(n->get_int() == 1);
match(ConI);
@ -3419,15 +3419,6 @@ operand immI_32_63() %{
interface(CONST_INTER);
%}
operand immI_1() %{
predicate( n->get_int() == 1 );
match(ConI);
op_cost(0);
format %{ %}
interface(CONST_INTER);
%}
operand immI_2() %{
predicate( n->get_int() == 2 );
match(ConI);
@ -3446,6 +3437,26 @@ operand immI_3() %{
interface(CONST_INTER);
%}
operand immI_4()
%{
predicate(n->get_int() == 4);
match(ConI);
op_cost(0);
format %{ %}
interface(CONST_INTER);
%}
operand immI_8()
%{
predicate(n->get_int() == 8);
match(ConI);
op_cost(0);
format %{ %}
interface(CONST_INTER);
%}
// Pointer Immediate
operand immP() %{
match(ConP);
@ -3815,6 +3826,18 @@ operand eRegP() %{
interface(REG_INTER);
%}
operand rRegP() %{
constraint(ALLOC_IN_RC(int_reg));
match(RegP);
match(eAXRegP);
match(eBXRegP);
match(eCXRegP);
match(eDIRegP);
format %{ %}
interface(REG_INTER);
%}
// On windows95, EBP is not safe to use for implicit null tests.
operand eRegP_no_EBP() %{
constraint(ALLOC_IN_RC(int_reg_no_ebp));
@ -3947,6 +3970,15 @@ operand eADXRegL_low_only() %{
interface(REG_INTER);
%}
// Flags register, used as output of compare instructions
operand rFlagsReg() %{
constraint(ALLOC_IN_RC(int_flags));
match(RegFlags);
format %{ "EFLAGS" %}
interface(REG_INTER);
%}
// Flags register, used as output of compare instructions
operand eFlagsReg() %{
constraint(ALLOC_IN_RC(int_flags));
@ -4077,6 +4109,14 @@ operand regF() %{
interface(REG_INTER);
%}
operand legRegF() %{
predicate( UseSSE>=1 );
constraint(ALLOC_IN_RC(float_reg_legacy));
match(RegF);
format %{ %}
interface(REG_INTER);
%}
// Float register operands
operand vlRegF() %{
constraint(ALLOC_IN_RC(float_reg_vl));
@ -4096,6 +4136,14 @@ operand regD() %{
%}
// Double register operands
operand legRegD() %{
predicate( UseSSE>=2 );
constraint(ALLOC_IN_RC(double_reg_legacy));
match(RegD);
format %{ %}
interface(REG_INTER);
%}
operand vlRegD() %{
constraint(ALLOC_IN_RC(double_reg_vl));
match(RegD);
@ -5846,6 +5894,46 @@ instruct loadKlass(eRegP dst, memory mem) %{
ins_pipe( ialu_reg_mem );
%}
// Load Float
instruct MoveF2LEG(legRegF dst, regF src) %{
match(Set dst src);
format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
ins_encode %{
__ movflt($dst$$XMMRegister, $src$$XMMRegister);
%}
ins_pipe( fpu_reg_reg );
%}
// Load Float
instruct MoveLEG2F(regF dst, legRegF src) %{
match(Set dst src);
format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
ins_encode %{
__ movflt($dst$$XMMRegister, $src$$XMMRegister);
%}
ins_pipe( fpu_reg_reg );
%}
// Load Double
instruct MoveD2LEG(legRegD dst, regD src) %{
match(Set dst src);
format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
ins_encode %{
__ movdbl($dst$$XMMRegister, $src$$XMMRegister);
%}
ins_pipe( fpu_reg_reg );
%}
// Load Double
instruct MoveLEG2D(regD dst, legRegD src) %{
match(Set dst src);
format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
ins_encode %{
__ movdbl($dst$$XMMRegister, $src$$XMMRegister);
%}
ins_pipe( fpu_reg_reg );
%}
// Load Double
instruct loadDPR(regDPR dst, memory mem) %{
predicate(UseSSE<=1);
@ -5971,7 +6059,7 @@ instruct loadConI(rRegI dst, immI src) %{
%}
// Load Constant zero
instruct loadConI0(rRegI dst, immI0 src, eFlagsReg cr) %{
instruct loadConI0(rRegI dst, immI_0 src, eFlagsReg cr) %{
match(Set dst src);
effect(KILL cr);
@ -7083,7 +7171,7 @@ instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
ins_pipe( ialu_reg );
%}
instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{
instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{
predicate(UseIncDec);
match(Set dst (AddI dst src));
effect(KILL cr);
@ -7183,7 +7271,7 @@ instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
ins_pipe( ialu_mem_imm );
%}
instruct incI_mem(memory dst, immI1 src, eFlagsReg cr) %{
instruct incI_mem(memory dst, immI_1 src, eFlagsReg cr) %{
match(Set dst (StoreI dst (AddI (LoadI dst) src)));
effect(KILL cr);
@ -7552,7 +7640,7 @@ instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
%}
// Subtract from a pointer
instruct subP_eReg(eRegP dst, rRegI src, immI0 zero, eFlagsReg cr) %{
instruct subP_eReg(eRegP dst, rRegI src, immI_0 zero, eFlagsReg cr) %{
match(Set dst (AddP dst (SubI zero src)));
effect(KILL cr);
@ -7563,7 +7651,7 @@ instruct subP_eReg(eRegP dst, rRegI src, immI0 zero, eFlagsReg cr) %{
ins_pipe( ialu_reg_reg );
%}
instruct negI_eReg(rRegI dst, immI0 zero, eFlagsReg cr) %{
instruct negI_eReg(rRegI dst, immI_0 zero, eFlagsReg cr) %{
match(Set dst (SubI zero dst));
effect(KILL cr);
@ -8017,7 +8105,7 @@ instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlag
// Integer Shift Instructions
// Shift Left by one
instruct shlI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
instruct shlI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
match(Set dst (LShiftI dst shift));
effect(KILL cr);
@ -8053,7 +8141,7 @@ instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
%}
// Arithmetic shift right by one
instruct sarI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
instruct sarI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
match(Set dst (RShiftI dst shift));
effect(KILL cr);
@ -8065,7 +8153,7 @@ instruct sarI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
%}
// Arithmetic shift right by one
instruct sarI_mem_1(memory dst, immI1 shift, eFlagsReg cr) %{
instruct sarI_mem_1(memory dst, immI_1 shift, eFlagsReg cr) %{
match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
effect(KILL cr);
format %{ "SAR $dst,$shift" %}
@ -8110,7 +8198,7 @@ instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
%}
// Logical shift right by one
instruct shrI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
instruct shrI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
match(Set dst (URShiftI dst shift));
effect(KILL cr);
@ -8266,7 +8354,7 @@ instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1
ins_pipe(ialu_reg_mem);
%}
instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI0 imm_zero, eFlagsReg cr) %{
instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, eFlagsReg cr) %{
match(Set dst (AndI (SubI imm_zero src) src));
predicate(UseBMI1Instructions);
effect(KILL cr);
@ -8279,7 +8367,7 @@ instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI0 imm_zero, eFlagsReg cr) %{
ins_pipe(ialu_reg);
%}
instruct blsiI_rReg_mem(rRegI dst, memory src, immI0 imm_zero, eFlagsReg cr) %{
instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, eFlagsReg cr) %{
match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
predicate(UseBMI1Instructions);
effect(KILL cr);
@ -8431,7 +8519,7 @@ instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
// ROL/ROR
// ROL expand
instruct rolI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{
instruct rolI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
effect(USE_DEF dst, USE shift, KILL cr);
format %{ "ROL $dst, $shift" %}
@ -8460,7 +8548,7 @@ instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{
// end of ROL expand
// ROL 32bit by one once
instruct rolI_eReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, eFlagsReg cr) %{
instruct rolI_eReg_i1(rRegI dst, immI_1 lshift, immI_M1 rshift, eFlagsReg cr) %{
match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
expand %{
@ -8479,7 +8567,7 @@ instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{
%}
// ROL 32bit var by var once
instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{
instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{
match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
expand %{
@ -8497,7 +8585,7 @@ instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr
%}
// ROR expand
instruct rorI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{
instruct rorI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
effect(USE_DEF dst, USE shift, KILL cr);
format %{ "ROR $dst, $shift" %}
@ -8526,7 +8614,7 @@ instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{
// end of ROR expand
// ROR right once
instruct rorI_eReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, eFlagsReg cr) %{
instruct rorI_eReg_i1(rRegI dst, immI_1 rshift, immI_M1 lshift, eFlagsReg cr) %{
match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
expand %{
@ -8545,7 +8633,7 @@ instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{
%}
// ROR 32bit var by var once
instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{
instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{
match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
expand %{
@ -8713,7 +8801,7 @@ instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{
ins_pipe(pipe_slow);
%}
instruct cmpLTMask0(rRegI dst, immI0 zero, eFlagsReg cr) %{
instruct cmpLTMask0(rRegI dst, immI_0 zero, eFlagsReg cr) %{
match(Set dst (CmpLTMask dst zero));
effect(DEF dst, KILL cr);
ins_cost(100);
@ -8827,7 +8915,7 @@ instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2)
ins_pipe(ialu_reg_reg);
%}
instruct overflowNegI_rReg(eFlagsReg cr, immI0 zero, eAXRegI op2)
instruct overflowNegI_rReg(eFlagsReg cr, immI_0 zero, eAXRegI op2)
%{
match(Set cr (OverflowSubI zero op2));
effect(DEF cr, USE_KILL op2);
@ -11979,7 +12067,7 @@ instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{
ins_pipe( ialu_cr_reg_mem );
%}
instruct testI_reg( eFlagsReg cr, rRegI src, immI0 zero ) %{
instruct testI_reg( eFlagsReg cr, rRegI src, immI_0 zero ) %{
match(Set cr (CmpI src zero));
effect( DEF cr, USE src );
@ -11989,7 +12077,7 @@ instruct testI_reg( eFlagsReg cr, rRegI src, immI0 zero ) %{
ins_pipe( ialu_cr_reg_imm );
%}
instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI0 zero ) %{
instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI_0 zero ) %{
match(Set cr (CmpI (AndI src con) zero));
format %{ "TEST $src,$con" %}
@ -11998,7 +12086,7 @@ instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI0 zero ) %{
ins_pipe( ialu_cr_reg_imm );
%}
instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI0 zero ) %{
instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI_0 zero ) %{
match(Set cr (CmpI (AndI src mem) zero));
format %{ "TEST $src,$mem" %}
@ -12048,7 +12136,7 @@ instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{
// ins_encode( OpcP, RegMem( op1, op2) );
//%}
instruct testU_reg( eFlagsRegU cr, rRegI src, immI0 zero ) %{
instruct testU_reg( eFlagsRegU cr, rRegI src, immI_0 zero ) %{
match(Set cr (CmpU src zero));
format %{ "TESTu $src,$src" %}
@ -12125,7 +12213,7 @@ instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{
// Cisc-spilled version of testP_reg
// This will generate a signed flags result. This should be ok
// since any compare to a zero should be eq/neq.
instruct testP_Reg_mem( eFlagsReg cr, memory op, immI0 zero ) %{
instruct testP_Reg_mem( eFlagsReg cr, memory op, immI_0 zero ) %{
match(Set cr (CmpP (LoadP op) zero));
format %{ "TEST $op,0xFFFFFFFF" %}
@ -13496,7 +13584,7 @@ instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{
// match(Set dst (CopyI src));
// %}
//
// instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{
// instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{
// match(Set dst (AddI dst src));
// effect(KILL cr);
// %}

View file

@ -2871,7 +2871,7 @@ operand immI()
%}
// Constant for test vs zero
operand immI0()
operand immI_0()
%{
predicate(n->get_int() == 0);
match(ConI);
@ -2882,7 +2882,7 @@ operand immI0()
%}
// Constant for increment
operand immI1()
operand immI_1()
%{
predicate(n->get_int() == 1);
match(ConI);
@ -2903,6 +2903,36 @@ operand immI_M1()
interface(CONST_INTER);
%}
operand immI_2()
%{
predicate(n->get_int() == 2);
match(ConI);
op_cost(0);
format %{ %}
interface(CONST_INTER);
%}
operand immI_4()
%{
predicate(n->get_int() == 4);
match(ConI);
op_cost(0);
format %{ %}
interface(CONST_INTER);
%}
operand immI_8()
%{
predicate(n->get_int() == 8);
match(ConI);
op_cost(0);
format %{ %}
interface(CONST_INTER);
%}
// Valid scale values for addressing modes
operand immI2()
%{
@ -5217,19 +5247,19 @@ instruct maxF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp,
match(Set dst (MaxF a b));
effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
format %{
"blendvps $btmp,$b,$a,$b \n\t"
"blendvps $atmp,$a,$b,$b \n\t"
"vblendvps $btmp,$b,$a,$b \n\t"
"vblendvps $atmp,$a,$b,$b \n\t"
"vmaxss $tmp,$atmp,$btmp \n\t"
"cmpps.unordered $btmp,$atmp,$atmp \n\t"
"blendvps $dst,$tmp,$atmp,$btmp \n\t"
"vcmpps.unordered $btmp,$atmp,$atmp \n\t"
"vblendvps $dst,$tmp,$atmp,$btmp \n\t"
%}
ins_encode %{
int vector_len = Assembler::AVX_128bit;
__ blendvps($btmp$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vector_len);
__ blendvps($atmp$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $b$$XMMRegister, vector_len);
__ vblendvps($btmp$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vector_len);
__ vblendvps($atmp$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $b$$XMMRegister, vector_len);
__ vmaxss($tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister);
__ cmpps($btmp$$XMMRegister, $atmp$$XMMRegister, $atmp$$XMMRegister, Assembler::_false, vector_len);
__ blendvps($dst$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, vector_len);
__ vcmpps($btmp$$XMMRegister, $atmp$$XMMRegister, $atmp$$XMMRegister, Assembler::_false, vector_len);
__ vblendvps($dst$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
@ -5253,19 +5283,19 @@ instruct maxD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp,
match(Set dst (MaxD a b));
effect(USE a, USE b, TEMP atmp, TEMP btmp, TEMP tmp);
format %{
"blendvpd $btmp,$b,$a,$b \n\t"
"blendvpd $atmp,$a,$b,$b \n\t"
"vblendvpd $btmp,$b,$a,$b \n\t"
"vblendvpd $atmp,$a,$b,$b \n\t"
"vmaxsd $tmp,$atmp,$btmp \n\t"
"cmppd.unordered $btmp,$atmp,$atmp \n\t"
"blendvpd $dst,$tmp,$atmp,$btmp \n\t"
"vcmppd.unordered $btmp,$atmp,$atmp \n\t"
"vblendvpd $dst,$tmp,$atmp,$btmp \n\t"
%}
ins_encode %{
int vector_len = Assembler::AVX_128bit;
__ blendvpd($btmp$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vector_len);
__ blendvpd($atmp$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $b$$XMMRegister, vector_len);
__ vblendvpd($btmp$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vector_len);
__ vblendvpd($atmp$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $b$$XMMRegister, vector_len);
__ vmaxsd($tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister);
__ cmppd($btmp$$XMMRegister, $atmp$$XMMRegister, $atmp$$XMMRegister, Assembler::_false, vector_len);
__ blendvpd($dst$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, vector_len);
__ vcmppd($btmp$$XMMRegister, $atmp$$XMMRegister, $atmp$$XMMRegister, Assembler::_false, vector_len);
__ vblendvpd($dst$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
@ -5289,19 +5319,19 @@ instruct minF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp,
match(Set dst (MinF a b));
effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
format %{
"blendvps $atmp,$a,$b,$a \n\t"
"blendvps $btmp,$b,$a,$a \n\t"
"vblendvps $atmp,$a,$b,$a \n\t"
"vblendvps $btmp,$b,$a,$a \n\t"
"vminss $tmp,$atmp,$btmp \n\t"
"cmpps.unordered $btmp,$atmp,$atmp \n\t"
"blendvps $dst,$tmp,$atmp,$btmp \n\t"
"vcmpps.unordered $btmp,$atmp,$atmp \n\t"
"vblendvps $dst,$tmp,$atmp,$btmp \n\t"
%}
ins_encode %{
int vector_len = Assembler::AVX_128bit;
__ blendvps($atmp$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, vector_len);
__ blendvps($btmp$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $a$$XMMRegister, vector_len);
__ vblendvps($atmp$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, vector_len);
__ vblendvps($btmp$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $a$$XMMRegister, vector_len);
__ vminss($tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister);
__ cmpps($btmp$$XMMRegister, $atmp$$XMMRegister, $atmp$$XMMRegister, Assembler::_false, vector_len);
__ blendvps($dst$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, vector_len);
__ vcmpps($btmp$$XMMRegister, $atmp$$XMMRegister, $atmp$$XMMRegister, Assembler::_false, vector_len);
__ vblendvps($dst$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
@ -5325,19 +5355,19 @@ instruct minD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp,
match(Set dst (MinD a b));
effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
format %{
"blendvpd $atmp,$a,$b,$a \n\t"
"blendvpd $btmp,$b,$a,$a \n\t"
"vblendvpd $atmp,$a,$b,$a \n\t"
"vblendvpd $btmp,$b,$a,$a \n\t"
"vminsd $tmp,$atmp,$btmp \n\t"
"cmppd.unordered $btmp,$atmp,$atmp \n\t"
"blendvpd $dst,$tmp,$atmp,$btmp \n\t"
"vcmppd.unordered $btmp,$atmp,$atmp \n\t"
"vblendvpd $dst,$tmp,$atmp,$btmp \n\t"
%}
ins_encode %{
int vector_len = Assembler::AVX_128bit;
__ blendvpd($atmp$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, vector_len);
__ blendvpd($btmp$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $a$$XMMRegister, vector_len);
__ vblendvpd($atmp$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, vector_len);
__ vblendvpd($btmp$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $a$$XMMRegister, vector_len);
__ vminsd($tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister);
__ cmppd($btmp$$XMMRegister, $atmp$$XMMRegister, $atmp$$XMMRegister, Assembler::_false, vector_len);
__ blendvpd($dst$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, vector_len);
__ vcmppd($btmp$$XMMRegister, $atmp$$XMMRegister, $atmp$$XMMRegister, Assembler::_false, vector_len);
__ vblendvpd($dst$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
@ -5561,7 +5591,7 @@ instruct loadConI(rRegI dst, immI src)
ins_pipe(ialu_reg_fat); // XXX
%}
instruct loadConI0(rRegI dst, immI0 src, rFlagsReg cr)
instruct loadConI0(rRegI dst, immI_0 src, rFlagsReg cr)
%{
match(Set dst src);
effect(KILL cr);
@ -5997,7 +6027,7 @@ instruct storeImmNKlass(memory mem, immNKlass src)
%}
// Store Integer Immediate
instruct storeImmI0(memory mem, immI0 zero)
instruct storeImmI0(memory mem, immI_0 zero)
%{
predicate(UseCompressedOops && (CompressedOops::base() == NULL));
match(Set mem (StoreI mem zero));
@ -6047,7 +6077,7 @@ instruct storeImmL(memory mem, immL32 src)
%}
// Store Short/Char Immediate
instruct storeImmC0(memory mem, immI0 zero)
instruct storeImmC0(memory mem, immI_0 zero)
%{
predicate(UseCompressedOops && (CompressedOops::base() == NULL));
match(Set mem (StoreC mem zero));
@ -6073,7 +6103,7 @@ instruct storeImmI16(memory mem, immI16 src)
%}
// Store Byte Immediate
instruct storeImmB0(memory mem, immI0 zero)
instruct storeImmB0(memory mem, immI_0 zero)
%{
predicate(UseCompressedOops && (CompressedOops::base() == NULL));
match(Set mem (StoreB mem zero));
@ -6098,7 +6128,7 @@ instruct storeImmB(memory mem, immI8 src)
%}
// Store CMS card-mark Immediate
instruct storeImmCM0_reg(memory mem, immI0 zero)
instruct storeImmCM0_reg(memory mem, immI_0 zero)
%{
predicate(UseCompressedOops && (CompressedOops::base() == NULL));
match(Set mem (StoreCM mem zero));
@ -6111,7 +6141,7 @@ instruct storeImmCM0_reg(memory mem, immI0 zero)
ins_pipe(ialu_mem_reg);
%}
instruct storeImmCM0(memory mem, immI0 src)
instruct storeImmCM0(memory mem, immI_0 src)
%{
match(Set mem (StoreCM mem src));
@ -7196,7 +7226,7 @@ instruct addI_mem_imm(memory dst, immI src, rFlagsReg cr)
ins_pipe(ialu_mem_imm);
%}
instruct incI_rReg(rRegI dst, immI1 src, rFlagsReg cr)
instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
%{
predicate(UseIncDec);
match(Set dst (AddI dst src));
@ -7208,7 +7238,7 @@ instruct incI_rReg(rRegI dst, immI1 src, rFlagsReg cr)
ins_pipe(ialu_reg);
%}
instruct incI_mem(memory dst, immI1 src, rFlagsReg cr)
instruct incI_mem(memory dst, immI_1 src, rFlagsReg cr)
%{
predicate(UseIncDec);
match(Set dst (StoreI dst (AddI (LoadI dst) src)));
@ -8091,7 +8121,7 @@ instruct subL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
// Subtract from a pointer
// XXX hmpf???
instruct subP_rReg(rRegP dst, rRegI src, immI0 zero, rFlagsReg cr)
instruct subP_rReg(rRegP dst, rRegI src, immI_0 zero, rFlagsReg cr)
%{
match(Set dst (AddP dst (SubI zero src)));
effect(KILL cr);
@ -8102,7 +8132,7 @@ instruct subP_rReg(rRegP dst, rRegI src, immI0 zero, rFlagsReg cr)
ins_pipe(ialu_reg_reg);
%}
instruct negI_rReg(rRegI dst, immI0 zero, rFlagsReg cr)
instruct negI_rReg(rRegI dst, immI_0 zero, rFlagsReg cr)
%{
match(Set dst (SubI zero dst));
effect(KILL cr);
@ -8113,7 +8143,19 @@ instruct negI_rReg(rRegI dst, immI0 zero, rFlagsReg cr)
ins_pipe(ialu_reg);
%}
instruct negI_mem(memory dst, immI0 zero, rFlagsReg cr)
instruct negI_rReg_2(rRegI dst, rFlagsReg cr)
%{
match(Set dst (NegI dst));
effect(KILL cr);
format %{ "negl $dst\t# int" %}
ins_encode %{
__ negl($dst$$Register);
%}
ins_pipe(ialu_reg);
%}
instruct negI_mem(memory dst, immI_0 zero, rFlagsReg cr)
%{
match(Set dst (StoreI dst (SubI zero (LoadI dst))));
effect(KILL cr);
@ -8135,6 +8177,18 @@ instruct negL_rReg(rRegL dst, immL0 zero, rFlagsReg cr)
ins_pipe(ialu_reg);
%}
instruct negL_rReg_2(rRegL dst, rFlagsReg cr)
%{
match(Set dst (NegL dst));
effect(KILL cr);
format %{ "negq $dst\t# int" %}
ins_encode %{
__ negq($dst$$Register);
%}
ins_pipe(ialu_reg);
%}
instruct negL_mem(memory dst, immL0 zero, rFlagsReg cr)
%{
match(Set dst (StoreL dst (SubL zero (LoadL dst))));
@ -8460,7 +8514,7 @@ instruct modL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div,
// Integer Shift Instructions
// Shift Left by one
instruct salI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
instruct salI_rReg_1(rRegI dst, immI_1 shift, rFlagsReg cr)
%{
match(Set dst (LShiftI dst shift));
effect(KILL cr);
@ -8472,7 +8526,7 @@ instruct salI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
%}
// Shift Left by one
instruct salI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
instruct salI_mem_1(memory dst, immI_1 shift, rFlagsReg cr)
%{
match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
effect(KILL cr);
@ -8532,7 +8586,7 @@ instruct salI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
%}
// Arithmetic shift right by one
instruct sarI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
instruct sarI_rReg_1(rRegI dst, immI_1 shift, rFlagsReg cr)
%{
match(Set dst (RShiftI dst shift));
effect(KILL cr);
@ -8544,7 +8598,7 @@ instruct sarI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
%}
// Arithmetic shift right by one
instruct sarI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
instruct sarI_mem_1(memory dst, immI_1 shift, rFlagsReg cr)
%{
match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
effect(KILL cr);
@ -8604,7 +8658,7 @@ instruct sarI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
%}
// Logical shift right by one
instruct shrI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
instruct shrI_rReg_1(rRegI dst, immI_1 shift, rFlagsReg cr)
%{
match(Set dst (URShiftI dst shift));
effect(KILL cr);
@ -8616,7 +8670,7 @@ instruct shrI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
%}
// Logical shift right by one
instruct shrI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
instruct shrI_mem_1(memory dst, immI_1 shift, rFlagsReg cr)
%{
match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
effect(KILL cr);
@ -8677,7 +8731,7 @@ instruct shrI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
// Long Shift Instructions
// Shift Left by one
instruct salL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
instruct salL_rReg_1(rRegL dst, immI_1 shift, rFlagsReg cr)
%{
match(Set dst (LShiftL dst shift));
effect(KILL cr);
@ -8689,7 +8743,7 @@ instruct salL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
%}
// Shift Left by one
instruct salL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
instruct salL_mem_1(memory dst, immI_1 shift, rFlagsReg cr)
%{
match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
effect(KILL cr);
@ -8750,7 +8804,7 @@ instruct salL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
%}
// Arithmetic shift right by one
instruct sarL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
instruct sarL_rReg_1(rRegL dst, immI_1 shift, rFlagsReg cr)
%{
match(Set dst (RShiftL dst shift));
effect(KILL cr);
@ -8762,7 +8816,7 @@ instruct sarL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
%}
// Arithmetic shift right by one
instruct sarL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
instruct sarL_mem_1(memory dst, immI_1 shift, rFlagsReg cr)
%{
match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
effect(KILL cr);
@ -8823,7 +8877,7 @@ instruct sarL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
%}
// Logical shift right by one
instruct shrL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
instruct shrL_rReg_1(rRegL dst, immI_1 shift, rFlagsReg cr)
%{
match(Set dst (URShiftL dst shift));
effect(KILL cr);
@ -8835,7 +8889,7 @@ instruct shrL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
%}
// Logical shift right by one
instruct shrL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
instruct shrL_mem_1(memory dst, immI_1 shift, rFlagsReg cr)
%{
match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
effect(KILL cr);
@ -9207,7 +9261,7 @@ instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1
ins_pipe(ialu_reg);
%}
instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI0 imm_zero, rFlagsReg cr) %{
instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, rFlagsReg cr) %{
match(Set dst (AndI (SubI imm_zero src) src));
predicate(UseBMI1Instructions);
effect(KILL cr);
@ -9220,7 +9274,7 @@ instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI0 imm_zero, rFlagsReg cr) %{
ins_pipe(ialu_reg);
%}
instruct blsiI_rReg_mem(rRegI dst, memory src, immI0 imm_zero, rFlagsReg cr) %{
instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, rFlagsReg cr) %{
match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
predicate(UseBMI1Instructions);
effect(KILL cr);
@ -9903,7 +9957,7 @@ instruct cmpLTMask(rRegI dst, rRegI p, rRegI q, rFlagsReg cr)
ins_pipe(pipe_slow);
%}
instruct cmpLTMask0(rRegI dst, immI0 zero, rFlagsReg cr)
instruct cmpLTMask0(rRegI dst, immI_0 zero, rFlagsReg cr)
%{
match(Set dst (CmpLTMask dst zero));
effect(KILL cr);
@ -11250,7 +11304,7 @@ instruct overflowSubL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
ins_pipe(ialu_reg_reg);
%}
instruct overflowNegI_rReg(rFlagsReg cr, immI0 zero, rax_RegI op2)
instruct overflowNegI_rReg(rFlagsReg cr, immI_0 zero, rax_RegI op2)
%{
match(Set cr (OverflowSubI zero op2));
effect(DEF cr, USE_KILL op2);
@ -11359,7 +11413,7 @@ instruct compI_rReg_mem(rFlagsReg cr, rRegI op1, memory op2)
ins_pipe(ialu_cr_reg_mem);
%}
instruct testI_reg(rFlagsReg cr, rRegI src, immI0 zero)
instruct testI_reg(rFlagsReg cr, rRegI src, immI_0 zero)
%{
match(Set cr (CmpI src zero));
@ -11369,7 +11423,7 @@ instruct testI_reg(rFlagsReg cr, rRegI src, immI0 zero)
ins_pipe(ialu_cr_reg_imm);
%}
instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI0 zero)
instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI_0 zero)
%{
match(Set cr (CmpI (AndI src con) zero));
@ -11379,7 +11433,7 @@ instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI0 zero)
ins_pipe(ialu_cr_reg_imm);
%}
instruct testI_reg_mem(rFlagsReg cr, rRegI src, memory mem, immI0 zero)
instruct testI_reg_mem(rFlagsReg cr, rRegI src, memory mem, immI_0 zero)
%{
match(Set cr (CmpI (AndI src (LoadI mem)) zero));
@ -11433,7 +11487,7 @@ instruct compU_rReg_mem(rFlagsRegU cr, rRegI op1, memory op2)
// // ins_encode( OpcP, reg_mem( op1, op2) );
// //%}
instruct testU_reg(rFlagsRegU cr, rRegI src, immI0 zero)
instruct testU_reg(rFlagsRegU cr, rRegI src, immI_0 zero)
%{
match(Set cr (CmpU src zero));
@ -11771,7 +11825,7 @@ instruct compB_mem_imm(rFlagsReg cr, memory mem, immI8 imm)
ins_pipe(ialu_cr_reg_mem);
%}
instruct testUB_mem_imm(rFlagsReg cr, memory mem, immU7 imm, immI0 zero)
instruct testUB_mem_imm(rFlagsReg cr, memory mem, immU7 imm, immI_0 zero)
%{
match(Set cr (CmpI (AndI (LoadUB mem) imm) zero));
@ -11781,7 +11835,7 @@ instruct testUB_mem_imm(rFlagsReg cr, memory mem, immU7 imm, immI0 zero)
ins_pipe(ialu_cr_reg_mem);
%}
instruct testB_mem_imm(rFlagsReg cr, memory mem, immI8 imm, immI0 zero)
instruct testB_mem_imm(rFlagsReg cr, memory mem, immI8 imm, immI_0 zero)
%{
match(Set cr (CmpI (AndI (LoadB mem) imm) zero));
@ -12504,7 +12558,7 @@ instruct tlsLoadP(r15_RegP dst) %{
// match(Set dst (CopyI src));
// %}
//
// instruct incI_rReg(rRegI dst, immI1 src, rFlagsReg cr)
// instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
// %{
// match(Set dst (AddI dst src));
// effect(KILL cr);

View file

@ -268,6 +268,7 @@ Form::DataType Form::is_load_from_memory(const char *opType) const {
if( strcmp(opType,"LoadRange")==0 ) return Form::idealI;
if( strcmp(opType,"LoadS")==0 ) return Form::idealS;
if( strcmp(opType,"LoadVector")==0 ) return Form::idealV;
if( strcmp(opType,"LoadVectorGather")==0 ) return Form::idealV;
assert( strcmp(opType,"Load") != 0, "Must type Loads" );
return Form::none;
}
@ -284,6 +285,7 @@ Form::DataType Form::is_store_to_memory(const char *opType) const {
if( strcmp(opType,"StoreN")==0) return Form::idealN;
if( strcmp(opType,"StoreNKlass")==0) return Form::idealNKlass;
if( strcmp(opType,"StoreVector")==0 ) return Form::idealV;
if( strcmp(opType,"StoreVectorScatter")==0 ) return Form::idealV;
assert( strcmp(opType,"Store") != 0, "Must type Stores" );
return Form::none;
}

View file

@ -3484,7 +3484,7 @@ int MatchNode::needs_ideal_memory_edge(FormDict &globals) const {
"StoreB","StoreC","Store" ,"StoreFP",
"LoadI", "LoadL", "LoadP" ,"LoadN", "LoadD" ,"LoadF" ,
"LoadB" , "LoadUB", "LoadUS" ,"LoadS" ,"Load" ,
"StoreVector", "LoadVector",
"StoreVector", "LoadVector", "LoadVectorGather", "StoreVectorScatter",
"LoadRange", "LoadKlass", "LoadNKlass", "LoadL_unaligned", "LoadD_unaligned",
"LoadPLocked",
"StorePConditional", "StoreIConditional", "StoreLConditional",
@ -3801,6 +3801,7 @@ void MatchNode::count_commutative_op(int& count) {
"MaxV", "MinV",
"MulI","MulL","MulF","MulD",
"MulVB","MulVS","MulVI","MulVL","MulVF","MulVD",
"MinV","MaxV",
"OrI","OrL",
"OrV",
"XorI","XorL",
@ -4151,8 +4152,9 @@ bool MatchRule::is_vector() const {
"MulVB","MulVS","MulVI","MulVL","MulVF","MulVD",
"CMoveVD", "CMoveVF",
"DivVF","DivVD",
"MinV","MaxV",
"AbsVB","AbsVS","AbsVI","AbsVL","AbsVF","AbsVD",
"NegVF","NegVD",
"NegVF","NegVD","NegVI",
"SqrtVD","SqrtVF",
"AndV" ,"XorV" ,"OrV",
"MaxV", "MinV",
@ -4169,6 +4171,12 @@ bool MatchRule::is_vector() const {
"URShiftVB","URShiftVS","URShiftVI","URShiftVL",
"ReplicateB","ReplicateS","ReplicateI","ReplicateL","ReplicateF","ReplicateD",
"RoundDoubleModeV","RotateLeftV" , "RotateRightV", "LoadVector","StoreVector",
"LoadVectorGather", "StoreVectorScatter",
"VectorTest", "VectorLoadMask", "VectorStoreMask", "VectorBlend", "VectorInsert",
"VectorRearrange","VectorLoadShuffle", "VectorLoadConst",
"VectorCastB2X", "VectorCastS2X", "VectorCastI2X",
"VectorCastL2X", "VectorCastF2X", "VectorCastD2X",
"VectorMaskWrapper", "VectorMaskCmp", "VectorReinterpret",
"FmaVD", "FmaVF","PopCountVI",
// Next are not supported currently.
"PackB","PackS","PackI","PackL","PackF","PackD","Pack2L","Pack2D",

View file

@ -222,6 +222,7 @@ static bool trust_final_non_static_fields(ciInstanceKlass* holder) {
// Even if general trusting is disabled, trust system-built closures in these packages.
if (holder->is_in_package("java/lang/invoke") || holder->is_in_package("sun/invoke") ||
holder->is_in_package("jdk/internal/foreign") || holder->is_in_package("jdk/incubator/foreign") ||
holder->is_in_package("jdk/internal/vm/vector") || holder->is_in_package("jdk/incubator/vector") ||
holder->is_in_package("java/lang"))
return true;
// Trust hidden classes and VM unsafe anonymous classes. They are created via

View file

@ -1353,6 +1353,11 @@ bool ciMethod::is_unboxing_method() const {
return false;
}
bool ciMethod::is_vector_method() const {
return (holder() == ciEnv::current()->vector_VectorSupport_klass()) &&
(intrinsic_id() != vmIntrinsics::_none);
}
BCEscapeAnalyzer *ciMethod::get_bcea() {
#ifdef COMPILER2
if (_bcea == NULL) {

View file

@ -356,6 +356,7 @@ class ciMethod : public ciMetadata {
bool has_reserved_stack_access() const { return _has_reserved_stack_access; }
bool is_boxing_method() const;
bool is_unboxing_method() const;
bool is_vector_method() const;
bool is_object_initializer() const;
bool can_be_statically_bound(ciInstanceKlass* context) const;

View file

@ -4539,6 +4539,30 @@ void java_util_concurrent_locks_AbstractOwnableSynchronizer::serialize_offsets(S
}
#endif
int vector_VectorPayload::_payload_offset;
#define VECTORPAYLOAD_FIELDS_DO(macro) \
macro(_payload_offset, k, "payload", object_signature, false)
void vector_VectorPayload::compute_offsets() {
InstanceKlass* k = SystemDictionary::vector_VectorPayload_klass();
VECTORPAYLOAD_FIELDS_DO(FIELD_COMPUTE_OFFSET);
}
#if INCLUDE_CDS
void vector_VectorPayload::serialize_offsets(SerializeClosure* f) {
VECTORPAYLOAD_FIELDS_DO(FIELD_SERIALIZE_OFFSET);
}
#endif
void vector_VectorPayload::set_payload(oop o, oop val) {
o->obj_field_put(_payload_offset, val);
}
bool vector_VectorPayload::is_instance(oop obj) {
return obj != NULL && is_subclass(obj->klass());
}
int java_lang_Integer_IntegerCache::_static_cache_offset;
int java_lang_Long_LongCache::_static_cache_offset;
int java_lang_Character_CharacterCache::_static_cache_offset;

View file

@ -76,6 +76,7 @@ class RecordComponent;
f(java_util_concurrent_locks_AbstractOwnableSynchronizer) \
f(jdk_internal_misc_UnsafeConstants) \
f(java_lang_boxing_object) \
f(vector_VectorPayload) \
//end
#define BASIC_JAVA_CLASSES_DO(f) \
@ -1564,6 +1565,24 @@ class jdk_internal_misc_UnsafeConstants : AllStatic {
static void serialize_offsets(SerializeClosure* f) { }
};
// Interface to jdk.internal.vm.vector.VectorSupport.VectorPayload objects
class vector_VectorPayload : AllStatic {
private:
static int _payload_offset;
public:
static void set_payload(oop o, oop val);
static void compute_offsets();
static void serialize_offsets(SerializeClosure* f) NOT_CDS_RETURN;
// Testers
static bool is_subclass(Klass* klass) {
return klass->is_subclass_of(SystemDictionary::vector_VectorPayload_klass());
}
static bool is_instance(oop obj);
};
class java_lang_Integer : AllStatic {
public:
static jint value(oop obj);

View file

@ -43,6 +43,7 @@
#include "memory/metaspaceShared.hpp"
#include "memory/resourceArea.hpp"
#include "prims/jvmtiExport.hpp"
#include "runtime/globals_extension.hpp"
#include "runtime/handles.inline.hpp"
#include "runtime/javaCalls.hpp"
#include "runtime/jniHandles.inline.hpp"
@ -452,6 +453,24 @@ void Modules::define_module(jobject module, jboolean is_open, jstring version,
if (h_loader.is_null() && !ClassLoader::has_jrt_entry()) {
ClassLoader::add_to_exploded_build_list(module_symbol, CHECK);
}
#ifdef COMPILER2
// Special handling of jdk.incubator.vector
if (strcmp(module_name, "jdk.incubator.vector") == 0) {
if (FLAG_IS_DEFAULT(EnableVectorSupport)) {
FLAG_SET_DEFAULT(EnableVectorSupport, true);
}
if (EnableVectorSupport && FLAG_IS_DEFAULT(EnableVectorReboxing)) {
FLAG_SET_DEFAULT(EnableVectorReboxing, true);
}
if (EnableVectorSupport && EnableVectorReboxing && FLAG_IS_DEFAULT(EnableVectorAggressiveReboxing)) {
FLAG_SET_DEFAULT(EnableVectorAggressiveReboxing, true);
}
log_info(compilation)("EnableVectorSupport=%s", (EnableVectorSupport ? "true" : "false"));
log_info(compilation)("EnableVectorReboxing=%s", (EnableVectorReboxing ? "true" : "false"));
log_info(compilation)("EnableVectorAggressiveReboxing=%s", (EnableVectorAggressiveReboxing ? "true" : "false"));
}
#endif // COMPILER2
}
#if INCLUDE_CDS_JAVA_HEAP

View file

@ -226,6 +226,13 @@ class TableStatistics;
/* support for records */ \
do_klass(RecordComponent_klass, java_lang_reflect_RecordComponent ) \
\
/* support for vectors*/ \
do_klass(vector_VectorSupport_klass, jdk_internal_vm_vector_VectorSupport ) \
do_klass(vector_VectorPayload_klass, jdk_internal_vm_vector_VectorPayload ) \
do_klass(vector_Vector_klass, jdk_internal_vm_vector_Vector ) \
do_klass(vector_VectorMask_klass, jdk_internal_vm_vector_VectorMask ) \
do_klass(vector_VectorShuffle_klass, jdk_internal_vm_vector_VectorShuffle ) \
\
/*end*/
class SystemDictionary : AllStatic {

View file

@ -778,6 +778,122 @@
do_intrinsic(_getAndSetReference, jdk_internal_misc_Unsafe, getAndSetReference_name, getAndSetReference_signature, F_R) \
do_name( getAndSetReference_name, "getAndSetReference") \
do_signature(getAndSetReference_signature, "(Ljava/lang/Object;JLjava/lang/Object;)Ljava/lang/Object;" ) \
\
/* Vector API intrinsification support */ \
\
do_intrinsic(_VectorUnaryOp, jdk_internal_vm_vector_VectorSupport, vector_unary_op_name, vector_unary_op_sig, F_S) \
do_signature(vector_unary_op_sig, "(ILjava/lang/Class;Ljava/lang/Class;ILjava/lang/Object;Ljava/util/function/Function;)Ljava/lang/Object;") \
do_name(vector_unary_op_name, "unaryOp") \
\
do_intrinsic(_VectorBinaryOp, jdk_internal_vm_vector_VectorSupport, vector_binary_op_name, vector_binary_op_sig, F_S) \
do_signature(vector_binary_op_sig, "(ILjava/lang/Class;Ljava/lang/Class;ILjava/lang/Object;Ljava/lang/Object;" \
"Ljava/util/function/BiFunction;)Ljava/lang/Object;") \
do_name(vector_binary_op_name, "binaryOp") \
\
do_intrinsic(_VectorTernaryOp, jdk_internal_vm_vector_VectorSupport, vector_ternary_op_name, vector_ternary_op_sig, F_S) \
do_signature(vector_ternary_op_sig, "(ILjava/lang/Class;Ljava/lang/Class;ILjava/lang/Object;Ljava/lang/Object;" \
"Ljava/lang/Object;Ljdk/internal/vm/vector/VectorSupport$TernaryOperation;)Ljava/lang/Object;") \
do_name(vector_ternary_op_name, "ternaryOp") \
\
do_intrinsic(_VectorBroadcastCoerced, jdk_internal_vm_vector_VectorSupport, vector_broadcast_coerced_name, vector_broadcast_coerced_sig, F_S)\
do_signature(vector_broadcast_coerced_sig, "(Ljava/lang/Class;Ljava/lang/Class;IJLjdk/internal/vm/vector/VectorSupport$VectorSpecies;" \
"Ljdk/internal/vm/vector/VectorSupport$BroadcastOperation;)Ljava/lang/Object;") \
do_name(vector_broadcast_coerced_name, "broadcastCoerced") \
\
do_intrinsic(_VectorShuffleIota, jdk_internal_vm_vector_VectorSupport, vector_shuffle_step_iota_name, vector_shuffle_step_iota_sig, F_S) \
do_signature(vector_shuffle_step_iota_sig, "(Ljava/lang/Class;Ljava/lang/Class;Ljdk/internal/vm/vector/VectorSupport$VectorSpecies;" \
"IIIILjdk/internal/vm/vector/VectorSupport$ShuffleIotaOperation;)Ljdk/internal/vm/vector/VectorSupport$VectorShuffle;") \
do_name(vector_shuffle_step_iota_name, "shuffleIota") \
\
do_intrinsic(_VectorShuffleToVector, jdk_internal_vm_vector_VectorSupport, vector_shuffle_to_vector_name, vector_shuffle_to_vector_sig, F_S) \
do_signature(vector_shuffle_to_vector_sig, "(Ljava/lang/Class;Ljava/lang/Class;Ljava/lang/Class;Ljdk/internal/vm/vector/VectorSupport$VectorShuffle;" \
"ILjdk/internal/vm/vector/VectorSupport$ShuffleToVectorOperation;)Ljava/lang/Object;") \
do_name(vector_shuffle_to_vector_name, "shuffleToVector") \
\
do_intrinsic(_VectorLoadOp, jdk_internal_vm_vector_VectorSupport, vector_load_op_name, vector_load_op_sig, F_S) \
do_signature(vector_load_op_sig, "(Ljava/lang/Class;Ljava/lang/Class;ILjava/lang/Object;JLjava/lang/Object;" \
"ILjdk/internal/vm/vector/VectorSupport$VectorSpecies;Ljdk/internal/vm/vector/VectorSupport$LoadOperation;)Ljava/lang/Object;") \
do_name(vector_load_op_name, "load") \
\
do_intrinsic(_VectorStoreOp, jdk_internal_vm_vector_VectorSupport, vector_store_op_name, vector_store_op_sig, F_S) \
do_signature(vector_store_op_sig, "(Ljava/lang/Class;Ljava/lang/Class;ILjava/lang/Object;JLjdk/internal/vm/vector/VectorSupport$Vector;" \
"Ljava/lang/Object;ILjdk/internal/vm/vector/VectorSupport$StoreVectorOperation;)V") \
do_name(vector_store_op_name, "store") \
\
do_intrinsic(_VectorReductionCoerced, jdk_internal_vm_vector_VectorSupport, vector_reduction_coerced_name, vector_reduction_coerced_sig, F_S) \
do_signature(vector_reduction_coerced_sig, "(ILjava/lang/Class;Ljava/lang/Class;ILjdk/internal/vm/vector/VectorSupport$Vector;Ljava/util/function/Function;)J") \
do_name(vector_reduction_coerced_name, "reductionCoerced") \
\
do_intrinsic(_VectorTest, jdk_internal_vm_vector_VectorSupport, vector_test_name, vector_test_sig, F_S) \
do_signature(vector_test_sig, "(ILjava/lang/Class;Ljava/lang/Class;ILjava/lang/Object;Ljava/lang/Object;Ljava/util/function/BiFunction;)Z") \
do_name(vector_test_name, "test") \
\
do_intrinsic(_VectorBlend, jdk_internal_vm_vector_VectorSupport, vector_blend_name, vector_blend_sig, F_S) \
do_signature(vector_blend_sig, "(Ljava/lang/Class;Ljava/lang/Class;Ljava/lang/Class;I" \
"Ljdk/internal/vm/vector/VectorSupport$Vector;Ljdk/internal/vm/vector/VectorSupport$Vector;Ljdk/internal/vm/vector/VectorSupport$VectorMask;" \
"Ljdk/internal/vm/vector/VectorSupport$VectorBlendOp;)Ljdk/internal/vm/vector/VectorSupport$Vector;") \
do_name(vector_blend_name, "blend") \
\
do_intrinsic(_VectorCompare, jdk_internal_vm_vector_VectorSupport, vector_compare_name, vector_compare_sig, F_S) \
do_signature(vector_compare_sig, "(ILjava/lang/Class;Ljava/lang/Class;Ljava/lang/Class;I" \
"Ljdk/internal/vm/vector/VectorSupport$Vector;" "Ljdk/internal/vm/vector/VectorSupport$Vector;" \
"Ljdk/internal/vm/vector/VectorSupport$VectorCompareOp;" ")" "Ljdk/internal/vm/vector/VectorSupport$VectorMask;") \
do_name(vector_compare_name, "compare") \
\
do_intrinsic(_VectorRearrange, jdk_internal_vm_vector_VectorSupport, vector_rearrange_name, vector_rearrange_sig, F_S) \
do_signature(vector_rearrange_sig, "(Ljava/lang/Class;Ljava/lang/Class;Ljava/lang/Class;I" \
"Ljdk/internal/vm/vector/VectorSupport$Vector;Ljdk/internal/vm/vector/VectorSupport$VectorShuffle;" \
"Ljdk/internal/vm/vector/VectorSupport$VectorRearrangeOp;)Ljdk/internal/vm/vector/VectorSupport$Vector;") \
do_name(vector_rearrange_name, "rearrangeOp") \
\
do_intrinsic(_VectorExtract, jdk_internal_vm_vector_VectorSupport, vector_extract_name, vector_extract_sig, F_S) \
do_signature(vector_extract_sig, "(Ljava/lang/Class;Ljava/lang/Class;I" \
"Ljdk/internal/vm/vector/VectorSupport$Vector;I" \
"Ljdk/internal/vm/vector/VectorSupport$VecExtractOp;)J") \
do_name(vector_extract_name, "extract") \
\
do_intrinsic(_VectorInsert, jdk_internal_vm_vector_VectorSupport, vector_insert_name, vector_insert_sig, F_S) \
do_signature(vector_insert_sig, "(Ljava/lang/Class;Ljava/lang/Class;I" \
"Ljdk/internal/vm/vector/VectorSupport$Vector;IJ" \
"Ljdk/internal/vm/vector/VectorSupport$VecInsertOp;)Ljdk/internal/vm/vector/VectorSupport$Vector;") \
do_name(vector_insert_name, "insert") \
\
do_intrinsic(_VectorBroadcastInt, jdk_internal_vm_vector_VectorSupport, vector_broadcast_int_name, vector_broadcast_int_sig, F_S) \
do_signature(vector_broadcast_int_sig, "(ILjava/lang/Class;Ljava/lang/Class;I" \
"Ljdk/internal/vm/vector/VectorSupport$Vector;I" \
"Ljdk/internal/vm/vector/VectorSupport$VectorBroadcastIntOp;)Ljdk/internal/vm/vector/VectorSupport$Vector;") \
do_name(vector_broadcast_int_name, "broadcastInt") \
\
do_intrinsic(_VectorConvert, jdk_internal_vm_vector_VectorSupport, vector_convert_name, vector_convert_sig, F_S) \
do_signature(vector_convert_sig, "(ILjava/lang/Class;Ljava/lang/Class;I" \
"Ljava/lang/Class;Ljava/lang/Class;I" \
"Ljdk/internal/vm/vector/VectorSupport$VectorPayload;" \
"Ljdk/internal/vm/vector/VectorSupport$VectorSpecies;" \
"Ljdk/internal/vm/vector/VectorSupport$VectorConvertOp;)Ljdk/internal/vm/vector/VectorSupport$VectorPayload;") \
do_name(vector_convert_name, "convert") \
\
do_intrinsic(_VectorGatherOp, jdk_internal_vm_vector_VectorSupport, vector_gather_name, vector_gather_sig, F_S) \
do_signature(vector_gather_sig, "(Ljava/lang/Class;Ljava/lang/Class;ILjava/lang/Class;" \
"Ljava/lang/Object;J" \
"Ljdk/internal/vm/vector/VectorSupport$Vector;" \
"Ljava/lang/Object;I[II" \
"Ljdk/internal/vm/vector/VectorSupport$VectorSpecies;" \
"Ljdk/internal/vm/vector/VectorSupport$LoadVectorOperationWithMap;)" \
"Ljdk/internal/vm/vector/VectorSupport$Vector;") \
do_name(vector_gather_name, "loadWithMap") \
\
do_intrinsic(_VectorScatterOp, jdk_internal_vm_vector_VectorSupport, vector_scatter_name, vector_scatter_sig, F_S) \
do_signature(vector_scatter_sig, "(Ljava/lang/Class;Ljava/lang/Class;ILjava/lang/Class;" \
"Ljava/lang/Object;J" \
"Ljdk/internal/vm/vector/VectorSupport$Vector;Ljdk/internal/vm/vector/VectorSupport$Vector;" \
"Ljava/lang/Object;I[II" \
"Ljdk/internal/vm/vector/VectorSupport$StoreVectorOperationWithMap;)V") \
do_name(vector_scatter_name, "storeWithMap") \
\
do_intrinsic(_VectorRebox, jdk_internal_vm_vector_VectorSupport, vector_rebox_name, vector_rebox_sig, F_S) \
do_alias(vector_rebox_sig, object_object_signature) \
do_name(vector_rebox_name, "maybeRebox") \
\
\
/* (2) Bytecode intrinsics */ \
\

View file

@ -81,6 +81,16 @@
template(java_lang_Integer_IntegerCache, "java/lang/Integer$IntegerCache") \
template(java_lang_Long, "java/lang/Long") \
template(java_lang_Long_LongCache, "java/lang/Long$LongCache") \
\
template(jdk_internal_vm_vector_VectorSupport, "jdk/internal/vm/vector/VectorSupport") \
template(jdk_internal_vm_vector_VectorPayload, "jdk/internal/vm/vector/VectorSupport$VectorPayload") \
template(jdk_internal_vm_vector_Vector, "jdk/internal/vm/vector/VectorSupport$Vector") \
template(jdk_internal_vm_vector_VectorMask, "jdk/internal/vm/vector/VectorSupport$VectorMask") \
template(jdk_internal_vm_vector_VectorShuffle, "jdk/internal/vm/vector/VectorSupport$VectorShuffle") \
template(payload_name, "payload") \
template(ETYPE_name, "ETYPE") \
template(VLENGTH_name, "VLENGTH") \
\
template(java_lang_Shutdown, "java/lang/Shutdown") \
template(java_lang_ref_Reference, "java/lang/ref/Reference") \
template(java_lang_ref_SoftReference, "java/lang/ref/SoftReference") \
@ -768,7 +778,7 @@ class vmIntrinsics: AllStatic {
#undef VM_INTRINSIC_ENUM
ID_LIMIT,
LAST_COMPILER_INLINE = _getAndSetReference,
LAST_COMPILER_INLINE = _VectorScatterOp,
FIRST_MH_SIG_POLY = _invokeGeneric,
FIRST_MH_STATIC = _linkToVirtual,
LAST_MH_SIG_POLY = _linkToInterface,

View file

@ -42,6 +42,7 @@
// - ConstantValue describes a constant
class ConstantOopReadValue;
class LocationValue;
class ObjectValue;
class ScopeValue: public ResourceObj {
@ -67,6 +68,11 @@ class ScopeValue: public ResourceObj {
return (ObjectValue*)this;
}
LocationValue* as_LocationValue() {
assert(is_location(), "must be");
return (LocationValue*)this;
}
// Serialization of debugging information
virtual void write_on(DebugInfoWriteStream* stream) = 0;
static ScopeValue* read_from(DebugInfoReadStream* stream);

View file

@ -58,6 +58,7 @@ class Location {
lng, // Long held in one register
float_in_dbl, // Float held in double register
dbl, // Double held in one register
vector, // Vector in one register
addr, // JSR return address
narrowoop // Narrow Oop (please GC me!)
};

View file

@ -281,6 +281,30 @@ public:
virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
};
//------------------------------MaxLNode---------------------------------------
// MAXimum of 2 longs.
class MaxLNode : public MaxNode {
public:
MaxLNode(Node *in1, Node *in2) : MaxNode(in1, in2) {}
virtual int Opcode() const;
virtual const Type *add_ring(const Type*, const Type*) const { return TypeLong::LONG; }
virtual const Type *add_id() const { return TypeLong::make(min_jlong); }
virtual const Type *bottom_type() const { return TypeLong::LONG; }
virtual uint ideal_reg() const { return Op_RegL; }
};
//------------------------------MinLNode---------------------------------------
// MINimum of 2 longs.
class MinLNode : public MaxNode {
public:
MinLNode(Node *in1, Node *in2) : MaxNode(in1, in2) {}
virtual int Opcode() const;
virtual const Type *add_ring(const Type*, const Type*) const { return TypeLong::LONG; }
virtual const Type *add_id() const { return TypeLong::make(max_jlong); }
virtual const Type *bottom_type() const { return TypeLong::LONG; }
virtual uint ideal_reg() const { return Op_RegL; }
};
//------------------------------MaxFNode---------------------------------------
// Maximum of 2 floats.
class MaxFNode : public MaxNode {

View file

@ -743,6 +743,15 @@
product(bool, UseMontgomerySquareIntrinsic, false, DIAGNOSTIC, \
"Enables intrinsification of BigInteger.montgomerySquare()") \
\
product(bool, EnableVectorSupport, false, EXPERIMENTAL, \
"Enables VectorSupport intrinsics") \
\
product(bool, EnableVectorReboxing, false, EXPERIMENTAL, \
"Enables reboxing of vectors") \
\
product(bool, EnableVectorAggressiveReboxing, false, EXPERIMENTAL, \
"Enables aggressive reboxing of vectors") \
\
product(bool, UseTypeSpeculation, true, \
"Speculatively propagate types from profiles") \
\

View file

@ -649,6 +649,28 @@ bool C2Compiler::is_intrinsic_supported(const methodHandle& method, bool is_virt
case vmIntrinsics::_isCompileConstant:
case vmIntrinsics::_Preconditions_checkIndex:
break;
case vmIntrinsics::_VectorUnaryOp:
case vmIntrinsics::_VectorBinaryOp:
case vmIntrinsics::_VectorTernaryOp:
case vmIntrinsics::_VectorBroadcastCoerced:
case vmIntrinsics::_VectorShuffleIota:
case vmIntrinsics::_VectorShuffleToVector:
case vmIntrinsics::_VectorLoadOp:
case vmIntrinsics::_VectorStoreOp:
case vmIntrinsics::_VectorGatherOp:
case vmIntrinsics::_VectorScatterOp:
case vmIntrinsics::_VectorReductionCoerced:
case vmIntrinsics::_VectorTest:
case vmIntrinsics::_VectorBlend:
case vmIntrinsics::_VectorRearrange:
case vmIntrinsics::_VectorCompare:
case vmIntrinsics::_VectorBroadcastInt:
case vmIntrinsics::_VectorConvert:
case vmIntrinsics::_VectorInsert:
case vmIntrinsics::_VectorExtract:
return EnableVectorSupport;
default:
return false;
}

View file

@ -536,7 +536,7 @@ class LateInlineStringCallGenerator : public LateInlineCallGenerator {
C->add_string_late_inline(this);
JVMState* new_jvms = DirectCallGenerator::generate(jvms);
JVMState* new_jvms = DirectCallGenerator::generate(jvms);
return new_jvms;
}
@ -560,7 +560,7 @@ class LateInlineBoxingCallGenerator : public LateInlineCallGenerator {
C->add_boxing_late_inline(this);
JVMState* new_jvms = DirectCallGenerator::generate(jvms);
JVMState* new_jvms = DirectCallGenerator::generate(jvms);
return new_jvms;
}
};
@ -569,6 +569,28 @@ CallGenerator* CallGenerator::for_boxing_late_inline(ciMethod* method, CallGener
return new LateInlineBoxingCallGenerator(method, inline_cg);
}
class LateInlineVectorReboxingCallGenerator : public LateInlineCallGenerator {
public:
LateInlineVectorReboxingCallGenerator(ciMethod* method, CallGenerator* inline_cg) :
LateInlineCallGenerator(method, inline_cg, /*is_pure=*/true) {}
virtual JVMState* generate(JVMState* jvms) {
Compile *C = Compile::current();
C->log_inline_id(this);
C->add_vector_reboxing_late_inline(this);
JVMState* new_jvms = DirectCallGenerator::generate(jvms);
return new_jvms;
}
};
// static CallGenerator* for_vector_reboxing_late_inline(ciMethod* m, CallGenerator* inline_cg);
CallGenerator* CallGenerator::for_vector_reboxing_late_inline(ciMethod* method, CallGenerator* inline_cg) {
return new LateInlineVectorReboxingCallGenerator(method, inline_cg);
}
//---------------------------WarmCallGenerator--------------------------------
// Internal class which handles initial deferral of inlining decisions.
class WarmCallGenerator : public CallGenerator {

View file

@ -127,6 +127,7 @@ class CallGenerator : public ResourceObj {
static CallGenerator* for_mh_late_inline(ciMethod* caller, ciMethod* callee, bool input_not_const);
static CallGenerator* for_string_late_inline(ciMethod* m, CallGenerator* inline_cg);
static CallGenerator* for_boxing_late_inline(ciMethod* m, CallGenerator* inline_cg);
static CallGenerator* for_vector_reboxing_late_inline(ciMethod* m, CallGenerator* inline_cg);
// How to make a call but defer the decision whether to inline or not.
static CallGenerator* for_warm_call(WarmCallInfo* ci,

View file

@ -290,9 +290,17 @@ Node* CheckCastPPNode::Identity(PhaseGVN* phase) {
if (_carry_dependency) {
return this;
}
// Toned down to rescue meeting at a Phi 3 different oops all implementing
// the same interface.
return (phase->type(in(1)) == phase->type(this)) ? in(1) : this;
const Type* t = phase->type(in(1));
if (EnableVectorReboxing && in(1)->Opcode() == Op_VectorBox) {
if (t->higher_equal_speculative(phase->type(this))) {
return in(1);
}
} else if (t == phase->type(this)) {
// Toned down to rescue meeting at a Phi 3 different oops all implementing
// the same interface.
return in(1);
}
return this;
}
//------------------------------Value------------------------------------------

View file

@ -43,6 +43,7 @@
#include "opto/regmask.hpp"
#include "opto/runtime.hpp"
#include "opto/subnode.hpp"
#include "opto/vectornode.hpp"
#include "utilities/vmError.hpp"
// Portions of code courtesy of Clifford Click
@ -2387,6 +2388,47 @@ Node *PhiNode::Ideal(PhaseGVN *phase, bool can_reshape) {
}
#endif
// Phi (VB ... VB) => VB (Phi ...) (Phi ...)
if (EnableVectorReboxing && can_reshape && progress == NULL) {
PhaseIterGVN* igvn = phase->is_IterGVN();
bool all_inputs_are_equiv_vboxes = true;
for (uint i = 1; i < req(); ++i) {
Node* n = in(i);
if (in(i)->Opcode() != Op_VectorBox) {
all_inputs_are_equiv_vboxes = false;
break;
}
// Check that vector type of vboxes is equivalent
if (i != 1) {
if (Type::cmp(in(i-0)->in(VectorBoxNode::Value)->bottom_type(),
in(i-1)->in(VectorBoxNode::Value)->bottom_type()) != 0) {
all_inputs_are_equiv_vboxes = false;
break;
}
if (Type::cmp(in(i-0)->in(VectorBoxNode::Box)->bottom_type(),
in(i-1)->in(VectorBoxNode::Box)->bottom_type()) != 0) {
all_inputs_are_equiv_vboxes = false;
break;
}
}
}
if (all_inputs_are_equiv_vboxes) {
VectorBoxNode* vbox = static_cast<VectorBoxNode*>(in(1));
PhiNode* new_vbox_phi = new PhiNode(r, vbox->box_type());
PhiNode* new_vect_phi = new PhiNode(r, vbox->vec_type());
for (uint i = 1; i < req(); ++i) {
VectorBoxNode* old_vbox = static_cast<VectorBoxNode*>(in(i));
new_vbox_phi->set_req(i, old_vbox->in(VectorBoxNode::Box));
new_vect_phi->set_req(i, old_vbox->in(VectorBoxNode::Value));
}
igvn->register_new_node_with_optimizer(new_vbox_phi, this);
igvn->register_new_node_with_optimizer(new_vect_phi, this);
progress = new VectorBoxNode(igvn->C, new_vbox_phi, new_vect_phi, vbox->box_type(), vbox->vec_type());
}
}
return progress; // Return any progress
}

View file

@ -198,9 +198,10 @@ macro(LoopLimit)
macro(Mach)
macro(MachProj)
macro(MulAddS2I)
macro(MaxI)
macro(MaxL)
macro(MaxD)
macro(MaxF)
macro(MaxI)
macro(MemBarAcquire)
macro(LoadFence)
macro(SetVectMaskI)
@ -212,9 +213,10 @@ macro(MemBarReleaseLock)
macro(MemBarVolatile)
macro(MemBarStoreStore)
macro(MergeMem)
macro(MinD)
macro(MinF)
macro(MinI)
macro(MinL)
macro(MinF)
macro(MinD)
macro(ModD)
macro(ModF)
macro(ModI)
@ -229,6 +231,8 @@ macro(MulHiL)
macro(MulI)
macro(MulL)
macro(Multi)
macro(NegI)
macro(NegL)
macro(NegD)
macro(NegF)
macro(NeverBranch)
@ -324,6 +328,8 @@ macro(TailJump)
macro(MacroLogicV)
macro(ThreadLocal)
macro(Unlock)
macro(URShiftB)
macro(URShiftS)
macro(URShiftI)
macro(URShiftL)
macro(XorI)
@ -366,6 +372,7 @@ macro(AbsVI)
macro(AbsVL)
macro(AbsVF)
macro(AbsVD)
macro(NegVI)
macro(NegVF)
macro(NegVD)
macro(SqrtVD)
@ -395,7 +402,9 @@ macro(MaxV)
macro(MinReductionV)
macro(MaxReductionV)
macro(LoadVector)
macro(LoadVectorGather)
macro(StoreVector)
macro(StoreVectorScatter)
macro(Pack)
macro(PackB)
macro(PackS)
@ -424,3 +433,24 @@ macro(Digit)
macro(LowerCase)
macro(UpperCase)
macro(Whitespace)
macro(VectorBox)
macro(VectorBoxAllocate)
macro(VectorUnbox)
macro(VectorMaskWrapper)
macro(VectorMaskCmp)
macro(VectorTest)
macro(VectorBlend)
macro(VectorRearrange)
macro(VectorLoadMask)
macro(VectorLoadShuffle)
macro(VectorLoadConst)
macro(VectorStoreMask)
macro(VectorReinterpret)
macro(VectorCast)
macro(VectorCastB2X)
macro(VectorCastS2X)
macro(VectorCastI2X)
macro(VectorCastL2X)
macro(VectorCastF2X)
macro(VectorCastD2X)
macro(VectorInsert)

View file

@ -68,6 +68,7 @@
#include "opto/runtime.hpp"
#include "opto/stringopts.hpp"
#include "opto/type.hpp"
#include "opto/vector.hpp"
#include "opto/vectornode.hpp"
#include "runtime/arguments.hpp"
#include "runtime/globals_extension.hpp"
@ -412,6 +413,7 @@ void Compile::remove_useless_nodes(Unique_Node_List &useful) {
remove_useless_late_inlines(&_string_late_inlines, useful);
remove_useless_late_inlines(&_boxing_late_inlines, useful);
remove_useless_late_inlines(&_late_inlines, useful);
remove_useless_late_inlines(&_vector_reboxing_late_inlines, useful);
debug_only(verify_graph_edges(true/*check for no_dead_code*/);)
}
@ -545,6 +547,7 @@ Compile::Compile( ciEnv* ci_env, ciMethod* target, int osr_bci,
_late_inlines(comp_arena(), 2, 0, NULL),
_string_late_inlines(comp_arena(), 2, 0, NULL),
_boxing_late_inlines(comp_arena(), 2, 0, NULL),
_vector_reboxing_late_inlines(comp_arena(), 2, 0, NULL),
_late_inlines_pos(0),
_number_of_mh_late_inlines(0),
_print_inlining_stream(NULL),
@ -1962,6 +1965,8 @@ void Compile::inline_incrementally(PhaseIterGVN& igvn) {
inline_incrementally_cleanup(igvn);
print_method(PHASE_INCREMENTAL_INLINE_STEP, 3);
if (failing()) return;
}
assert( igvn._worklist.size() == 0, "should be done with igvn" );
@ -2096,6 +2101,16 @@ void Compile::Optimize() {
// so keep only the actual candidates for optimizations.
cleanup_expensive_nodes(igvn);
assert(EnableVectorSupport || !has_vbox_nodes(), "sanity");
if (EnableVectorSupport && has_vbox_nodes()) {
TracePhase tp("", &timers[_t_vector]);
PhaseVector pv(igvn);
pv.optimize_vector_boxes();
print_method(PHASE_ITER_GVN_AFTER_VECTOR, 2);
}
assert(!has_vbox_nodes(), "sanity");
if (!failing() && RenumberLiveNodes && live_nodes() + NodeLimitFudgeFactor < unique()) {
Compile::TracePhase tp("", &timers[_t_renumberLive]);
initial_gvn()->replace_with(&igvn);
@ -2272,6 +2287,35 @@ void Compile::Optimize() {
DEBUG_ONLY(set_phase_optimize_finished();)
}
void Compile::inline_vector_reboxing_calls() {
if (C->_vector_reboxing_late_inlines.length() > 0) {
PhaseGVN* gvn = C->initial_gvn();
_late_inlines_pos = C->_late_inlines.length();
while (_vector_reboxing_late_inlines.length() > 0) {
CallGenerator* cg = _vector_reboxing_late_inlines.pop();
cg->do_late_inline();
if (failing()) return;
print_method(PHASE_INLINE_VECTOR_REBOX, cg->call_node());
}
_vector_reboxing_late_inlines.trunc_to(0);
}
}
bool Compile::has_vbox_nodes() {
if (C->_vector_reboxing_late_inlines.length() > 0) {
return true;
}
for (int macro_idx = C->macro_count() - 1; macro_idx >= 0; macro_idx--) {
Node * n = C->macro_node(macro_idx);
assert(n->is_macro(), "only macro nodes expected here");
if (n->Opcode() == Op_VectorUnbox || n->Opcode() == Op_VectorBox || n->Opcode() == Op_VectorBoxAllocate) {
return true;
}
}
return false;
}
//---------------------------- Bitwise operation packing optimization ---------------------------
static bool is_vector_unary_bitwise_op(Node* n) {
@ -2618,8 +2662,8 @@ void Compile::Code_Gen() {
if (failing()) {
return;
}
print_method(PHASE_AFTER_MATCHING, 3);
}
// In debug mode can dump m._nodes.dump() for mapping of ideal to machine
// nodes. Mapping is only valid at the root of each matched subtree.
NOT_PRODUCT( verify_graph_edges(); )
@ -2798,7 +2842,8 @@ void Compile::final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &frc) {
// Check for commutative opcode
switch( nop ) {
case Op_AddI: case Op_AddF: case Op_AddD: case Op_AddL:
case Op_MaxI: case Op_MinI:
case Op_MaxI: case Op_MaxL: case Op_MaxF: case Op_MaxD:
case Op_MinI: case Op_MinL: case Op_MinF: case Op_MinD:
case Op_MulI: case Op_MulF: case Op_MulD: case Op_MulL:
case Op_AndL: case Op_XorL: case Op_OrL:
case Op_AndI: case Op_XorI: case Op_OrI: {
@ -3348,6 +3393,8 @@ void Compile::final_graph_reshaping_main_switch(Node* n, Final_Reshape_Counts& f
case Op_LoadVector:
case Op_StoreVector:
case Op_LoadVectorGather:
case Op_StoreVectorScatter:
break;
case Op_AddReductionVI:
@ -4568,26 +4615,43 @@ void Compile::sort_macro_nodes() {
}
}
void Compile::print_method(CompilerPhaseType cpt, int level, int idx) {
void Compile::print_method(CompilerPhaseType cpt, const char *name, int level, int idx) {
EventCompilerPhase event;
if (event.should_commit()) {
CompilerEvent::PhaseEvent::post(event, C->_latest_stage_start_counter, cpt, C->_compile_id, level);
}
#ifndef PRODUCT
if (should_print(level)) {
char output[1024];
if (idx != 0) {
jio_snprintf(output, sizeof(output), "%s:%d", CompilerPhaseTypeHelper::to_string(cpt), idx);
} else {
jio_snprintf(output, sizeof(output), "%s", CompilerPhaseTypeHelper::to_string(cpt));
}
_printer->print_method(output, level);
_printer->print_method(name, level);
}
#endif
C->_latest_stage_start_counter.stamp();
}
void Compile::print_method(CompilerPhaseType cpt, int level, int idx) {
char output[1024];
#ifndef PRODUCT
if (idx != 0) {
jio_snprintf(output, sizeof(output), "%s:%d", CompilerPhaseTypeHelper::to_string(cpt), idx);
} else {
jio_snprintf(output, sizeof(output), "%s", CompilerPhaseTypeHelper::to_string(cpt));
}
#endif
print_method(cpt, output, level, idx);
}
void Compile::print_method(CompilerPhaseType cpt, Node* n, int level) {
ResourceMark rm;
stringStream ss;
ss.print_raw(CompilerPhaseTypeHelper::to_string(cpt));
if (n != NULL) {
ss.print(": %d %s ", n->_idx, NodeClassNames[n->Opcode()]);
} else {
ss.print_raw(": NULL");
}
C->print_method(cpt, ss.as_string(), level);
}
void Compile::end_method(int level) {
EventCompilerPhase event;
if (event.should_commit()) {

View file

@ -382,6 +382,8 @@ class Compile : public Phase {
GrowableArray<CallGenerator*> _boxing_late_inlines; // same but for boxing operations
GrowableArray<CallGenerator*> _vector_reboxing_late_inlines; // same but for vector reboxing operations
int _late_inlines_pos; // Where in the queue should the next late inlining candidate go (emulate depth first inlining)
uint _number_of_mh_late_inlines; // number of method handle late inlining still pending
@ -644,7 +646,9 @@ class Compile : public Phase {
#endif
}
void print_method(CompilerPhaseType cpt, const char *name, int level = 1, int idx = 0);
void print_method(CompilerPhaseType cpt, int level = 1, int idx = 0);
void print_method(CompilerPhaseType cpt, Node* n, int level = 3);
#ifndef PRODUCT
void igv_print_method_to_file(const char* phase_name = "Debug", bool append = false);
@ -865,10 +869,13 @@ class Compile : public Phase {
bool allow_intrinsics = true);
bool should_delay_inlining(ciMethod* call_method, JVMState* jvms) {
return should_delay_string_inlining(call_method, jvms) ||
should_delay_boxing_inlining(call_method, jvms);
should_delay_boxing_inlining(call_method, jvms) ||
should_delay_vector_inlining(call_method, jvms);
}
bool should_delay_string_inlining(ciMethod* call_method, JVMState* jvms);
bool should_delay_boxing_inlining(ciMethod* call_method, JVMState* jvms);
bool should_delay_vector_inlining(ciMethod* call_method, JVMState* jvms);
bool should_delay_vector_reboxing_inlining(ciMethod* call_method, JVMState* jvms);
// Helper functions to identify inlining potential at call-site
ciMethod* optimize_virtual_call(ciMethod* caller, int bci, ciInstanceKlass* klass,
@ -940,6 +947,10 @@ class Compile : public Phase {
_boxing_late_inlines.push(cg);
}
void add_vector_reboxing_late_inline(CallGenerator* cg) {
_vector_reboxing_late_inlines.push(cg);
}
void remove_useless_late_inlines(GrowableArray<CallGenerator*>* inlines, Unique_Node_List &useful);
void process_print_inlining();
@ -969,6 +980,9 @@ class Compile : public Phase {
bool optimize_loops(PhaseIterGVN& igvn, LoopOptsMode mode);
void remove_root_to_sfpts_edges(PhaseIterGVN& igvn);
void inline_vector_reboxing_calls();
bool has_vbox_nodes();
// Matching, CFG layout, allocation, code generation
PhaseCFG* cfg() { return _cfg; }
bool has_java_calls() const { return _java_calls > 0; }

View file

@ -135,6 +135,8 @@ CallGenerator* Compile::call_generator(ciMethod* callee, int vtable_index, bool
if (cg->does_virtual_dispatch()) {
cg_intrinsic = cg;
cg = NULL;
} else if (should_delay_vector_inlining(callee, jvms)) {
return CallGenerator::for_late_inline(callee, cg);
} else {
return cg;
}
@ -185,6 +187,8 @@ CallGenerator* Compile::call_generator(ciMethod* callee, int vtable_index, bool
return CallGenerator::for_string_late_inline(callee, cg);
} else if (should_delay_boxing_inlining(callee, jvms)) {
return CallGenerator::for_boxing_late_inline(callee, cg);
} else if (should_delay_vector_reboxing_inlining(callee, jvms)) {
return CallGenerator::for_vector_reboxing_late_inline(callee, cg);
} else if ((should_delay || AlwaysIncrementalInline)) {
return CallGenerator::for_late_inline(callee, cg);
}
@ -422,6 +426,14 @@ bool Compile::should_delay_boxing_inlining(ciMethod* call_method, JVMState* jvms
return false;
}
bool Compile::should_delay_vector_inlining(ciMethod* call_method, JVMState* jvms) {
return EnableVectorSupport && call_method->is_vector_method();
}
bool Compile::should_delay_vector_reboxing_inlining(ciMethod* call_method, JVMState* jvms) {
return EnableVectorSupport && (call_method->intrinsic_id() == vmIntrinsics::_VectorRebox);
}
// uncommon-trap call-sites where callee is unloaded, uninitialized or will not link
bool Parse::can_not_compile_call_site(ciMethod *dest_method, ciInstanceKlass* klass) {
// Additional inputs to consider...

View file

@ -686,6 +686,7 @@ void PhaseCFG::adjust_register_pressure(Node* n, Block* block, intptr_t* recalc_
case Op_StoreP:
case Op_StoreN:
case Op_StoreVector:
case Op_StoreVectorScatter:
case Op_StoreNKlass:
for (uint k = 1; k < m->req(); k++) {
Node *in = m->in(k);

View file

@ -37,15 +37,13 @@
#include "opto/addnode.hpp"
#include "opto/arraycopynode.hpp"
#include "opto/c2compiler.hpp"
#include "opto/callGenerator.hpp"
#include "opto/castnode.hpp"
#include "opto/cfgnode.hpp"
#include "opto/convertnode.hpp"
#include "opto/countbitsnode.hpp"
#include "opto/intrinsicnode.hpp"
#include "opto/idealKit.hpp"
#include "opto/library_call.hpp"
#include "opto/mathexactnode.hpp"
#include "opto/movenode.hpp"
#include "opto/mulnode.hpp"
#include "opto/narrowptrnode.hpp"
#include "opto/opaquenode.hpp"
@ -60,291 +58,6 @@
#include "utilities/macros.hpp"
#include "utilities/powerOfTwo.hpp"
class LibraryIntrinsic : public InlineCallGenerator {
// Extend the set of intrinsics known to the runtime:
public:
private:
bool _is_virtual;
bool _does_virtual_dispatch;
int8_t _predicates_count; // Intrinsic is predicated by several conditions
int8_t _last_predicate; // Last generated predicate
vmIntrinsics::ID _intrinsic_id;
public:
LibraryIntrinsic(ciMethod* m, bool is_virtual, int predicates_count, bool does_virtual_dispatch, vmIntrinsics::ID id)
: InlineCallGenerator(m),
_is_virtual(is_virtual),
_does_virtual_dispatch(does_virtual_dispatch),
_predicates_count((int8_t)predicates_count),
_last_predicate((int8_t)-1),
_intrinsic_id(id)
{
}
virtual bool is_intrinsic() const { return true; }
virtual bool is_virtual() const { return _is_virtual; }
virtual bool is_predicated() const { return _predicates_count > 0; }
virtual int predicates_count() const { return _predicates_count; }
virtual bool does_virtual_dispatch() const { return _does_virtual_dispatch; }
virtual JVMState* generate(JVMState* jvms);
virtual Node* generate_predicate(JVMState* jvms, int predicate);
vmIntrinsics::ID intrinsic_id() const { return _intrinsic_id; }
};
// Local helper class for LibraryIntrinsic:
class LibraryCallKit : public GraphKit {
private:
LibraryIntrinsic* _intrinsic; // the library intrinsic being called
Node* _result; // the result node, if any
int _reexecute_sp; // the stack pointer when bytecode needs to be reexecuted
const TypeOopPtr* sharpen_unsafe_type(Compile::AliasType* alias_type, const TypePtr *adr_type);
public:
LibraryCallKit(JVMState* jvms, LibraryIntrinsic* intrinsic)
: GraphKit(jvms),
_intrinsic(intrinsic),
_result(NULL)
{
// Check if this is a root compile. In that case we don't have a caller.
if (!jvms->has_method()) {
_reexecute_sp = sp();
} else {
// Find out how many arguments the interpreter needs when deoptimizing
// and save the stack pointer value so it can used by uncommon_trap.
// We find the argument count by looking at the declared signature.
bool ignored_will_link;
ciSignature* declared_signature = NULL;
ciMethod* ignored_callee = caller()->get_method_at_bci(bci(), ignored_will_link, &declared_signature);
const int nargs = declared_signature->arg_size_for_bc(caller()->java_code_at_bci(bci()));
_reexecute_sp = sp() + nargs; // "push" arguments back on stack
}
}
virtual LibraryCallKit* is_LibraryCallKit() const { return (LibraryCallKit*)this; }
ciMethod* caller() const { return jvms()->method(); }
int bci() const { return jvms()->bci(); }
LibraryIntrinsic* intrinsic() const { return _intrinsic; }
vmIntrinsics::ID intrinsic_id() const { return _intrinsic->intrinsic_id(); }
ciMethod* callee() const { return _intrinsic->method(); }
bool try_to_inline(int predicate);
Node* try_to_predicate(int predicate);
void push_result() {
// Push the result onto the stack.
if (!stopped() && result() != NULL) {
BasicType bt = result()->bottom_type()->basic_type();
push_node(bt, result());
}
}
private:
void fatal_unexpected_iid(vmIntrinsics::ID iid) {
fatal("unexpected intrinsic %d: %s", iid, vmIntrinsics::name_at(iid));
}
void set_result(Node* n) { assert(_result == NULL, "only set once"); _result = n; }
void set_result(RegionNode* region, PhiNode* value);
Node* result() { return _result; }
virtual int reexecute_sp() { return _reexecute_sp; }
// Helper functions to inline natives
Node* generate_guard(Node* test, RegionNode* region, float true_prob);
Node* generate_slow_guard(Node* test, RegionNode* region);
Node* generate_fair_guard(Node* test, RegionNode* region);
Node* generate_negative_guard(Node* index, RegionNode* region,
// resulting CastII of index:
Node* *pos_index = NULL);
Node* generate_limit_guard(Node* offset, Node* subseq_length,
Node* array_length,
RegionNode* region);
void generate_string_range_check(Node* array, Node* offset,
Node* length, bool char_count);
Node* generate_current_thread(Node* &tls_output);
Node* load_mirror_from_klass(Node* klass);
Node* load_klass_from_mirror_common(Node* mirror, bool never_see_null,
RegionNode* region, int null_path,
int offset);
Node* load_klass_from_mirror(Node* mirror, bool never_see_null,
RegionNode* region, int null_path) {
int offset = java_lang_Class::klass_offset();
return load_klass_from_mirror_common(mirror, never_see_null,
region, null_path,
offset);
}
Node* load_array_klass_from_mirror(Node* mirror, bool never_see_null,
RegionNode* region, int null_path) {
int offset = java_lang_Class::array_klass_offset();
return load_klass_from_mirror_common(mirror, never_see_null,
region, null_path,
offset);
}
Node* generate_access_flags_guard(Node* kls,
int modifier_mask, int modifier_bits,
RegionNode* region);
Node* generate_interface_guard(Node* kls, RegionNode* region);
Node* generate_hidden_class_guard(Node* kls, RegionNode* region);
Node* generate_array_guard(Node* kls, RegionNode* region) {
return generate_array_guard_common(kls, region, false, false);
}
Node* generate_non_array_guard(Node* kls, RegionNode* region) {
return generate_array_guard_common(kls, region, false, true);
}
Node* generate_objArray_guard(Node* kls, RegionNode* region) {
return generate_array_guard_common(kls, region, true, false);
}
Node* generate_non_objArray_guard(Node* kls, RegionNode* region) {
return generate_array_guard_common(kls, region, true, true);
}
Node* generate_array_guard_common(Node* kls, RegionNode* region,
bool obj_array, bool not_array);
Node* generate_virtual_guard(Node* obj_klass, RegionNode* slow_region);
CallJavaNode* generate_method_call(vmIntrinsics::ID method_id,
bool is_virtual = false, bool is_static = false);
CallJavaNode* generate_method_call_static(vmIntrinsics::ID method_id) {
return generate_method_call(method_id, false, true);
}
CallJavaNode* generate_method_call_virtual(vmIntrinsics::ID method_id) {
return generate_method_call(method_id, true, false);
}
Node * load_field_from_object(Node * fromObj, const char * fieldName, const char * fieldTypeString, bool is_exact, bool is_static, ciInstanceKlass * fromKls);
Node * field_address_from_object(Node * fromObj, const char * fieldName, const char * fieldTypeString, bool is_exact, bool is_static, ciInstanceKlass * fromKls);
Node* make_string_method_node(int opcode, Node* str1_start, Node* cnt1, Node* str2_start, Node* cnt2, StrIntrinsicNode::ArgEnc ae);
bool inline_string_compareTo(StrIntrinsicNode::ArgEnc ae);
bool inline_string_indexOf(StrIntrinsicNode::ArgEnc ae);
bool inline_string_indexOfI(StrIntrinsicNode::ArgEnc ae);
Node* make_indexOf_node(Node* src_start, Node* src_count, Node* tgt_start, Node* tgt_count,
RegionNode* region, Node* phi, StrIntrinsicNode::ArgEnc ae);
bool inline_string_indexOfChar(StrIntrinsicNode::ArgEnc ae);
bool inline_string_equals(StrIntrinsicNode::ArgEnc ae);
bool inline_string_toBytesU();
bool inline_string_getCharsU();
bool inline_string_copy(bool compress);
bool inline_string_char_access(bool is_store);
Node* round_double_node(Node* n);
bool runtime_math(const TypeFunc* call_type, address funcAddr, const char* funcName);
bool inline_math_native(vmIntrinsics::ID id);
bool inline_math(vmIntrinsics::ID id);
bool inline_double_math(vmIntrinsics::ID id);
template <typename OverflowOp>
bool inline_math_overflow(Node* arg1, Node* arg2);
void inline_math_mathExact(Node* math, Node* test);
bool inline_math_addExactI(bool is_increment);
bool inline_math_addExactL(bool is_increment);
bool inline_math_multiplyExactI();
bool inline_math_multiplyExactL();
bool inline_math_multiplyHigh();
bool inline_math_negateExactI();
bool inline_math_negateExactL();
bool inline_math_subtractExactI(bool is_decrement);
bool inline_math_subtractExactL(bool is_decrement);
bool inline_min_max(vmIntrinsics::ID id);
bool inline_notify(vmIntrinsics::ID id);
Node* generate_min_max(vmIntrinsics::ID id, Node* x, Node* y);
// This returns Type::AnyPtr, RawPtr, or OopPtr.
int classify_unsafe_addr(Node* &base, Node* &offset, BasicType type);
Node* make_unsafe_address(Node*& base, Node* offset, DecoratorSet decorators, BasicType type = T_ILLEGAL, bool can_cast = false);
typedef enum { Relaxed, Opaque, Volatile, Acquire, Release } AccessKind;
DecoratorSet mo_decorator_for_access_kind(AccessKind kind);
bool inline_unsafe_access(bool is_store, BasicType type, AccessKind kind, bool is_unaligned);
static bool klass_needs_init_guard(Node* kls);
bool inline_unsafe_allocate();
bool inline_unsafe_newArray(bool uninitialized);
bool inline_unsafe_writeback0();
bool inline_unsafe_writebackSync0(bool is_pre);
bool inline_unsafe_copyMemory();
bool inline_native_currentThread();
bool inline_native_time_funcs(address method, const char* funcName);
#ifdef JFR_HAVE_INTRINSICS
bool inline_native_classID();
bool inline_native_getEventWriter();
#endif
bool inline_native_Class_query(vmIntrinsics::ID id);
bool inline_native_subtype_check();
bool inline_native_getLength();
bool inline_array_copyOf(bool is_copyOfRange);
bool inline_array_equals(StrIntrinsicNode::ArgEnc ae);
bool inline_preconditions_checkIndex();
void copy_to_clone(Node* obj, Node* alloc_obj, Node* obj_size, bool is_array);
bool inline_native_clone(bool is_virtual);
bool inline_native_Reflection_getCallerClass();
// Helper function for inlining native object hash method
bool inline_native_hashcode(bool is_virtual, bool is_static);
bool inline_native_getClass();
// Helper functions for inlining arraycopy
bool inline_arraycopy();
AllocateArrayNode* tightly_coupled_allocation(Node* ptr,
RegionNode* slow_region);
JVMState* arraycopy_restore_alloc_state(AllocateArrayNode* alloc, int& saved_reexecute_sp);
void arraycopy_move_allocation_here(AllocateArrayNode* alloc, Node* dest, JVMState* saved_jvms, int saved_reexecute_sp,
uint new_idx);
typedef enum { LS_get_add, LS_get_set, LS_cmp_swap, LS_cmp_swap_weak, LS_cmp_exchange } LoadStoreKind;
bool inline_unsafe_load_store(BasicType type, LoadStoreKind kind, AccessKind access_kind);
bool inline_unsafe_fence(vmIntrinsics::ID id);
bool inline_onspinwait();
bool inline_fp_conversions(vmIntrinsics::ID id);
bool inline_number_methods(vmIntrinsics::ID id);
bool inline_reference_get();
bool inline_Class_cast();
bool inline_aescrypt_Block(vmIntrinsics::ID id);
bool inline_cipherBlockChaining_AESCrypt(vmIntrinsics::ID id);
bool inline_electronicCodeBook_AESCrypt(vmIntrinsics::ID id);
bool inline_counterMode_AESCrypt(vmIntrinsics::ID id);
Node* inline_cipherBlockChaining_AESCrypt_predicate(bool decrypting);
Node* inline_electronicCodeBook_AESCrypt_predicate(bool decrypting);
Node* inline_counterMode_AESCrypt_predicate();
Node* get_key_start_from_aescrypt_object(Node* aescrypt_object);
Node* get_original_key_start_from_aescrypt_object(Node* aescrypt_object);
bool inline_ghash_processBlocks();
bool inline_base64_encodeBlock();
bool inline_digestBase_implCompress(vmIntrinsics::ID id);
bool inline_digestBase_implCompressMB(int predicate);
bool inline_digestBase_implCompressMB(Node* digestBaseObj, ciInstanceKlass* instklass,
bool long_state, address stubAddr, const char *stubName,
Node* src_start, Node* ofs, Node* limit);
Node* get_state_from_digest_object(Node *digestBase_object);
Node* get_long_state_from_digest_object(Node *digestBase_object);
Node* inline_digestBase_implCompressMB_predicate(int predicate);
bool inline_encodeISOArray();
bool inline_updateCRC32();
bool inline_updateBytesCRC32();
bool inline_updateByteBufferCRC32();
Node* get_table_from_crc32c_class(ciInstanceKlass *crc32c_class);
bool inline_updateBytesCRC32C();
bool inline_updateDirectByteBufferCRC32C();
bool inline_updateBytesAdler32();
bool inline_updateByteBufferAdler32();
bool inline_multiplyToLen();
bool inline_hasNegatives();
bool inline_squareToLen();
bool inline_mulAdd();
bool inline_montgomeryMultiply();
bool inline_montgomerySquare();
bool inline_bigIntegerShift(bool isRightShift);
bool inline_vectorizedMismatch();
bool inline_fma(vmIntrinsics::ID id);
bool inline_character_compare(vmIntrinsics::ID id);
bool inline_fp_min_max(vmIntrinsics::ID id);
bool inline_profileBoolean();
bool inline_isCompileConstant();
void clear_upper_avx() {
#ifdef X86
if (UseAVX >= 2) {
C->set_clear_upper_avx(true);
}
#endif
}
};
//---------------------------make_vm_intrinsic----------------------------
CallGenerator* Compile::make_vm_intrinsic(ciMethod* m, bool is_virtual) {
vmIntrinsics::ID id = m->intrinsic_id();
@ -453,6 +166,7 @@ JVMState* LibraryIntrinsic::generate(JVMState* jvms) {
}
C->gather_intrinsic_statistics(intrinsic_id(), is_virtual(), Compile::_intrinsic_failed);
C->print_inlining_update(this);
return NULL;
}
@ -530,7 +244,6 @@ bool LibraryCallKit::try_to_inline(int predicate) {
}
assert(merged_memory(), "");
switch (intrinsic_id()) {
case vmIntrinsics::_hashCode: return inline_native_hashcode(intrinsic()->is_virtual(), !is_static);
case vmIntrinsics::_identityHashCode: return inline_native_hashcode(/*!virtual*/ false, is_static);
@ -912,6 +625,45 @@ bool LibraryCallKit::try_to_inline(int predicate) {
case vmIntrinsics::_minD:
return inline_fp_min_max(intrinsic_id());
case vmIntrinsics::_VectorUnaryOp:
return inline_vector_nary_operation(1);
case vmIntrinsics::_VectorBinaryOp:
return inline_vector_nary_operation(2);
case vmIntrinsics::_VectorTernaryOp:
return inline_vector_nary_operation(3);
case vmIntrinsics::_VectorBroadcastCoerced:
return inline_vector_broadcast_coerced();
case vmIntrinsics::_VectorShuffleIota:
return inline_vector_shuffle_iota();
case vmIntrinsics::_VectorShuffleToVector:
return inline_vector_shuffle_to_vector();
case vmIntrinsics::_VectorLoadOp:
return inline_vector_mem_operation(/*is_store=*/false);
case vmIntrinsics::_VectorStoreOp:
return inline_vector_mem_operation(/*is_store=*/true);
case vmIntrinsics::_VectorGatherOp:
return inline_vector_gather_scatter(/*is_scatter*/ false);
case vmIntrinsics::_VectorScatterOp:
return inline_vector_gather_scatter(/*is_scatter*/ true);
case vmIntrinsics::_VectorReductionCoerced:
return inline_vector_reduction();
case vmIntrinsics::_VectorTest:
return inline_vector_test();
case vmIntrinsics::_VectorBlend:
return inline_vector_blend();
case vmIntrinsics::_VectorRearrange:
return inline_vector_rearrange();
case vmIntrinsics::_VectorCompare:
return inline_vector_compare();
case vmIntrinsics::_VectorBroadcastInt:
return inline_vector_broadcast_int();
case vmIntrinsics::_VectorConvert:
return inline_vector_convert();
case vmIntrinsics::_VectorInsert:
return inline_vector_insert();
case vmIntrinsics::_VectorExtract:
return inline_vector_extract();
default:
// If you get here, it may be that someone has added a new intrinsic
// to the list in vmSymbols.hpp without implementing it here.
@ -2255,7 +2007,7 @@ LibraryCallKit::classify_unsafe_addr(Node* &base, Node* &offset, BasicType type)
}
}
inline Node* LibraryCallKit::make_unsafe_address(Node*& base, Node* offset, DecoratorSet decorators, BasicType type, bool can_cast) {
Node* LibraryCallKit::make_unsafe_address(Node*& base, Node* offset, DecoratorSet decorators, BasicType type, bool can_cast) {
Node* uncasted_base = base;
int kind = classify_unsafe_addr(uncasted_base, offset, type);
if (kind == Type::RawPtr) {

View file

@ -0,0 +1,348 @@
/*
* Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#include "ci/ciMethod.hpp"
#include "classfile/javaClasses.hpp"
#include "opto/callGenerator.hpp"
#include "opto/graphKit.hpp"
#include "opto/castnode.hpp"
#include "opto/convertnode.hpp"
#include "opto/intrinsicnode.hpp"
#include "opto/movenode.hpp"
class LibraryIntrinsic : public InlineCallGenerator {
// Extend the set of intrinsics known to the runtime:
public:
private:
bool _is_virtual;
bool _does_virtual_dispatch;
int8_t _predicates_count; // Intrinsic is predicated by several conditions
int8_t _last_predicate; // Last generated predicate
vmIntrinsics::ID _intrinsic_id;
public:
LibraryIntrinsic(ciMethod* m, bool is_virtual, int predicates_count, bool does_virtual_dispatch, vmIntrinsics::ID id)
: InlineCallGenerator(m),
_is_virtual(is_virtual),
_does_virtual_dispatch(does_virtual_dispatch),
_predicates_count((int8_t)predicates_count),
_last_predicate((int8_t)-1),
_intrinsic_id(id)
{
}
virtual bool is_intrinsic() const { return true; }
virtual bool is_virtual() const { return _is_virtual; }
virtual bool is_predicated() const { return _predicates_count > 0; }
virtual int predicates_count() const { return _predicates_count; }
virtual bool does_virtual_dispatch() const { return _does_virtual_dispatch; }
virtual JVMState* generate(JVMState* jvms);
virtual Node* generate_predicate(JVMState* jvms, int predicate);
vmIntrinsics::ID intrinsic_id() const { return _intrinsic_id; }
};
// Local helper class for LibraryIntrinsic:
class LibraryCallKit : public GraphKit {
private:
LibraryIntrinsic* _intrinsic; // the library intrinsic being called
Node* _result; // the result node, if any
int _reexecute_sp; // the stack pointer when bytecode needs to be reexecuted
const TypeOopPtr* sharpen_unsafe_type(Compile::AliasType* alias_type, const TypePtr *adr_type);
public:
LibraryCallKit(JVMState* jvms, LibraryIntrinsic* intrinsic)
: GraphKit(jvms),
_intrinsic(intrinsic),
_result(NULL)
{
// Check if this is a root compile. In that case we don't have a caller.
if (!jvms->has_method()) {
_reexecute_sp = sp();
} else {
// Find out how many arguments the interpreter needs when deoptimizing
// and save the stack pointer value so it can used by uncommon_trap.
// We find the argument count by looking at the declared signature.
bool ignored_will_link;
ciSignature* declared_signature = NULL;
ciMethod* ignored_callee = caller()->get_method_at_bci(bci(), ignored_will_link, &declared_signature);
const int nargs = declared_signature->arg_size_for_bc(caller()->java_code_at_bci(bci()));
_reexecute_sp = sp() + nargs; // "push" arguments back on stack
}
}
virtual LibraryCallKit* is_LibraryCallKit() const { return (LibraryCallKit*)this; }
ciMethod* caller() const { return jvms()->method(); }
int bci() const { return jvms()->bci(); }
LibraryIntrinsic* intrinsic() const { return _intrinsic; }
vmIntrinsics::ID intrinsic_id() const { return _intrinsic->intrinsic_id(); }
ciMethod* callee() const { return _intrinsic->method(); }
bool try_to_inline(int predicate);
Node* try_to_predicate(int predicate);
void push_result() {
// Push the result onto the stack.
if (!stopped() && result() != NULL) {
BasicType bt = result()->bottom_type()->basic_type();
push_node(bt, result());
}
}
private:
void fatal_unexpected_iid(vmIntrinsics::ID iid) {
fatal("unexpected intrinsic %d: %s", iid, vmIntrinsics::name_at(iid));
}
void set_result(Node* n) { assert(_result == NULL, "only set once"); _result = n; }
void set_result(RegionNode* region, PhiNode* value);
Node* result() { return _result; }
virtual int reexecute_sp() { return _reexecute_sp; }
// Helper functions to inline natives
Node* generate_guard(Node* test, RegionNode* region, float true_prob);
Node* generate_slow_guard(Node* test, RegionNode* region);
Node* generate_fair_guard(Node* test, RegionNode* region);
Node* generate_negative_guard(Node* index, RegionNode* region,
// resulting CastII of index:
Node* *pos_index = NULL);
Node* generate_limit_guard(Node* offset, Node* subseq_length,
Node* array_length,
RegionNode* region);
void generate_string_range_check(Node* array, Node* offset,
Node* length, bool char_count);
Node* generate_current_thread(Node* &tls_output);
Node* load_mirror_from_klass(Node* klass);
Node* load_klass_from_mirror_common(Node* mirror, bool never_see_null,
RegionNode* region, int null_path,
int offset);
Node* load_klass_from_mirror(Node* mirror, bool never_see_null,
RegionNode* region, int null_path) {
int offset = java_lang_Class::klass_offset();
return load_klass_from_mirror_common(mirror, never_see_null,
region, null_path,
offset);
}
Node* load_array_klass_from_mirror(Node* mirror, bool never_see_null,
RegionNode* region, int null_path) {
int offset = java_lang_Class::array_klass_offset();
return load_klass_from_mirror_common(mirror, never_see_null,
region, null_path,
offset);
}
Node* generate_access_flags_guard(Node* kls,
int modifier_mask, int modifier_bits,
RegionNode* region);
Node* generate_interface_guard(Node* kls, RegionNode* region);
Node* generate_hidden_class_guard(Node* kls, RegionNode* region);
Node* generate_array_guard(Node* kls, RegionNode* region) {
return generate_array_guard_common(kls, region, false, false);
}
Node* generate_non_array_guard(Node* kls, RegionNode* region) {
return generate_array_guard_common(kls, region, false, true);
}
Node* generate_objArray_guard(Node* kls, RegionNode* region) {
return generate_array_guard_common(kls, region, true, false);
}
Node* generate_non_objArray_guard(Node* kls, RegionNode* region) {
return generate_array_guard_common(kls, region, true, true);
}
Node* generate_array_guard_common(Node* kls, RegionNode* region,
bool obj_array, bool not_array);
Node* generate_virtual_guard(Node* obj_klass, RegionNode* slow_region);
CallJavaNode* generate_method_call(vmIntrinsics::ID method_id,
bool is_virtual = false, bool is_static = false);
CallJavaNode* generate_method_call_static(vmIntrinsics::ID method_id) {
return generate_method_call(method_id, false, true);
}
CallJavaNode* generate_method_call_virtual(vmIntrinsics::ID method_id) {
return generate_method_call(method_id, true, false);
}
Node * load_field_from_object(Node * fromObj, const char * fieldName, const char * fieldTypeString, bool is_exact, bool is_static, ciInstanceKlass * fromKls);
Node * field_address_from_object(Node * fromObj, const char * fieldName, const char * fieldTypeString, bool is_exact, bool is_static, ciInstanceKlass * fromKls);
Node* make_string_method_node(int opcode, Node* str1_start, Node* cnt1, Node* str2_start, Node* cnt2, StrIntrinsicNode::ArgEnc ae);
bool inline_string_compareTo(StrIntrinsicNode::ArgEnc ae);
bool inline_string_indexOf(StrIntrinsicNode::ArgEnc ae);
bool inline_string_indexOfI(StrIntrinsicNode::ArgEnc ae);
Node* make_indexOf_node(Node* src_start, Node* src_count, Node* tgt_start, Node* tgt_count,
RegionNode* region, Node* phi, StrIntrinsicNode::ArgEnc ae);
bool inline_string_indexOfChar(StrIntrinsicNode::ArgEnc ae);
bool inline_string_equals(StrIntrinsicNode::ArgEnc ae);
bool inline_string_toBytesU();
bool inline_string_getCharsU();
bool inline_string_copy(bool compress);
bool inline_string_char_access(bool is_store);
Node* round_double_node(Node* n);
bool runtime_math(const TypeFunc* call_type, address funcAddr, const char* funcName);
bool inline_math_native(vmIntrinsics::ID id);
bool inline_math(vmIntrinsics::ID id);
bool inline_double_math(vmIntrinsics::ID id);
template <typename OverflowOp>
bool inline_math_overflow(Node* arg1, Node* arg2);
void inline_math_mathExact(Node* math, Node* test);
bool inline_math_addExactI(bool is_increment);
bool inline_math_addExactL(bool is_increment);
bool inline_math_multiplyExactI();
bool inline_math_multiplyExactL();
bool inline_math_multiplyHigh();
bool inline_math_negateExactI();
bool inline_math_negateExactL();
bool inline_math_subtractExactI(bool is_decrement);
bool inline_math_subtractExactL(bool is_decrement);
bool inline_min_max(vmIntrinsics::ID id);
bool inline_notify(vmIntrinsics::ID id);
Node* generate_min_max(vmIntrinsics::ID id, Node* x, Node* y);
// This returns Type::AnyPtr, RawPtr, or OopPtr.
int classify_unsafe_addr(Node* &base, Node* &offset, BasicType type);
Node* make_unsafe_address(Node*& base, Node* offset, DecoratorSet decorators, BasicType type = T_ILLEGAL, bool can_cast = false);
typedef enum { Relaxed, Opaque, Volatile, Acquire, Release } AccessKind;
DecoratorSet mo_decorator_for_access_kind(AccessKind kind);
bool inline_unsafe_access(bool is_store, BasicType type, AccessKind kind, bool is_unaligned);
static bool klass_needs_init_guard(Node* kls);
bool inline_unsafe_allocate();
bool inline_unsafe_newArray(bool uninitialized);
bool inline_unsafe_writeback0();
bool inline_unsafe_writebackSync0(bool is_pre);
bool inline_unsafe_copyMemory();
bool inline_native_currentThread();
bool inline_native_time_funcs(address method, const char* funcName);
#ifdef JFR_HAVE_INTRINSICS
bool inline_native_classID();
bool inline_native_getEventWriter();
#endif
bool inline_native_Class_query(vmIntrinsics::ID id);
bool inline_native_subtype_check();
bool inline_native_getLength();
bool inline_array_copyOf(bool is_copyOfRange);
bool inline_array_equals(StrIntrinsicNode::ArgEnc ae);
bool inline_preconditions_checkIndex();
void copy_to_clone(Node* obj, Node* alloc_obj, Node* obj_size, bool is_array);
bool inline_native_clone(bool is_virtual);
bool inline_native_Reflection_getCallerClass();
// Helper function for inlining native object hash method
bool inline_native_hashcode(bool is_virtual, bool is_static);
bool inline_native_getClass();
// Helper functions for inlining arraycopy
bool inline_arraycopy();
AllocateArrayNode* tightly_coupled_allocation(Node* ptr,
RegionNode* slow_region);
JVMState* arraycopy_restore_alloc_state(AllocateArrayNode* alloc, int& saved_reexecute_sp);
void arraycopy_move_allocation_here(AllocateArrayNode* alloc, Node* dest, JVMState* saved_jvms, int saved_reexecute_sp,
uint new_idx);
typedef enum { LS_get_add, LS_get_set, LS_cmp_swap, LS_cmp_swap_weak, LS_cmp_exchange } LoadStoreKind;
bool inline_unsafe_load_store(BasicType type, LoadStoreKind kind, AccessKind access_kind);
bool inline_unsafe_fence(vmIntrinsics::ID id);
bool inline_onspinwait();
bool inline_fp_conversions(vmIntrinsics::ID id);
bool inline_number_methods(vmIntrinsics::ID id);
bool inline_reference_get();
bool inline_Class_cast();
bool inline_aescrypt_Block(vmIntrinsics::ID id);
bool inline_cipherBlockChaining_AESCrypt(vmIntrinsics::ID id);
bool inline_electronicCodeBook_AESCrypt(vmIntrinsics::ID id);
bool inline_counterMode_AESCrypt(vmIntrinsics::ID id);
Node* inline_cipherBlockChaining_AESCrypt_predicate(bool decrypting);
Node* inline_electronicCodeBook_AESCrypt_predicate(bool decrypting);
Node* inline_counterMode_AESCrypt_predicate();
Node* get_key_start_from_aescrypt_object(Node* aescrypt_object);
Node* get_original_key_start_from_aescrypt_object(Node* aescrypt_object);
bool inline_ghash_processBlocks();
bool inline_base64_encodeBlock();
bool inline_digestBase_implCompress(vmIntrinsics::ID id);
bool inline_digestBase_implCompressMB(int predicate);
bool inline_digestBase_implCompressMB(Node* digestBaseObj, ciInstanceKlass* instklass,
bool long_state, address stubAddr, const char *stubName,
Node* src_start, Node* ofs, Node* limit);
Node* get_state_from_digest_object(Node *digestBase_object);
Node* get_long_state_from_digest_object(Node *digestBase_object);
Node* inline_digestBase_implCompressMB_predicate(int predicate);
bool inline_encodeISOArray();
bool inline_updateCRC32();
bool inline_updateBytesCRC32();
bool inline_updateByteBufferCRC32();
Node* get_table_from_crc32c_class(ciInstanceKlass *crc32c_class);
bool inline_updateBytesCRC32C();
bool inline_updateDirectByteBufferCRC32C();
bool inline_updateBytesAdler32();
bool inline_updateByteBufferAdler32();
bool inline_multiplyToLen();
bool inline_hasNegatives();
bool inline_squareToLen();
bool inline_mulAdd();
bool inline_montgomeryMultiply();
bool inline_montgomerySquare();
bool inline_bigIntegerShift(bool isRightShift);
bool inline_vectorizedMismatch();
bool inline_fma(vmIntrinsics::ID id);
bool inline_character_compare(vmIntrinsics::ID id);
bool inline_fp_min_max(vmIntrinsics::ID id);
bool inline_profileBoolean();
bool inline_isCompileConstant();
// Vector API support
bool inline_vector_nary_operation(int n);
bool inline_vector_broadcast_coerced();
bool inline_vector_shuffle_to_vector();
bool inline_vector_shuffle_iota();
bool inline_vector_mem_operation(bool is_store);
bool inline_vector_gather_scatter(bool is_scatter);
bool inline_vector_reduction();
bool inline_vector_test();
bool inline_vector_blend();
bool inline_vector_rearrange();
bool inline_vector_compare();
bool inline_vector_broadcast_int();
bool inline_vector_convert();
bool inline_vector_extract();
bool inline_vector_insert();
Node* box_vector(Node* in, const TypeInstPtr* vbox_type, BasicType bt, int num_elem);
Node* unbox_vector(Node* in, const TypeInstPtr* vbox_type, BasicType bt, int num_elem, bool shuffle_to_vector = false);
Node* shift_count(Node* cnt, int shift_op, BasicType bt, int num_elem);
enum VectorMaskUseType {
VecMaskUseLoad,
VecMaskUseStore,
VecMaskUseAll,
VecMaskNotUsed
};
bool arch_supports_vector(int op, int num_elem, BasicType type, VectorMaskUseType mask_use_type, bool has_scalar_args = false);
void clear_upper_avx() {
#ifdef X86
if (UseAVX >= 2) {
C->set_clear_upper_avx(true);
}
#endif
}
};

View file

@ -430,7 +430,7 @@ static RegMask *init_input_masks( uint size, RegMask &ret_adr, RegMask &fp ) {
return rms;
}
#define NOF_STACK_MASKS (3*6+6)
#define NOF_STACK_MASKS (3*12)
// Create the initial stack mask used by values spilling to the stack.
// Disallow any debug info in outgoing argument areas by setting the
@ -473,6 +473,20 @@ void Matcher::init_first_stack_mask() {
idealreg2spillmask [Op_VecY] = &rms[22];
idealreg2spillmask [Op_VecZ] = &rms[23];
idealreg2debugmask [Op_VecA] = &rms[24];
idealreg2debugmask [Op_VecS] = &rms[25];
idealreg2debugmask [Op_VecD] = &rms[26];
idealreg2debugmask [Op_VecX] = &rms[27];
idealreg2debugmask [Op_VecY] = &rms[28];
idealreg2debugmask [Op_VecZ] = &rms[29];
idealreg2mhdebugmask[Op_VecA] = &rms[30];
idealreg2mhdebugmask[Op_VecS] = &rms[31];
idealreg2mhdebugmask[Op_VecD] = &rms[32];
idealreg2mhdebugmask[Op_VecX] = &rms[33];
idealreg2mhdebugmask[Op_VecY] = &rms[34];
idealreg2mhdebugmask[Op_VecZ] = &rms[35];
OptoReg::Name i;
// At first, start with the empty mask
@ -520,13 +534,19 @@ void Matcher::init_first_stack_mask() {
if (Matcher::vector_size_supported(T_BYTE,4)) {
*idealreg2spillmask[Op_VecS] = *idealreg2regmask[Op_VecS];
idealreg2spillmask[Op_VecS]->OR(C->FIRST_STACK_mask());
} else {
*idealreg2spillmask[Op_VecS] = RegMask::Empty;
}
if (Matcher::vector_size_supported(T_FLOAT,2)) {
// For VecD we need dual alignment and 8 bytes (2 slots) for spills.
// RA guarantees such alignment since it is needed for Double and Long values.
*idealreg2spillmask[Op_VecD] = *idealreg2regmask[Op_VecD];
idealreg2spillmask[Op_VecD]->OR(aligned_stack_mask);
} else {
*idealreg2spillmask[Op_VecD] = RegMask::Empty;
}
if (Matcher::vector_size_supported(T_FLOAT,4)) {
// For VecX we need quadro alignment and 16 bytes (4 slots) for spills.
//
@ -544,7 +564,10 @@ void Matcher::init_first_stack_mask() {
assert(aligned_stack_mask.is_AllStack(), "should be infinite stack");
*idealreg2spillmask[Op_VecX] = *idealreg2regmask[Op_VecX];
idealreg2spillmask[Op_VecX]->OR(aligned_stack_mask);
} else {
*idealreg2spillmask[Op_VecX] = RegMask::Empty;
}
if (Matcher::vector_size_supported(T_FLOAT,8)) {
// For VecY we need octo alignment and 32 bytes (8 slots) for spills.
OptoReg::Name in = OptoReg::add(_in_arg_limit, -1);
@ -556,7 +579,10 @@ void Matcher::init_first_stack_mask() {
assert(aligned_stack_mask.is_AllStack(), "should be infinite stack");
*idealreg2spillmask[Op_VecY] = *idealreg2regmask[Op_VecY];
idealreg2spillmask[Op_VecY]->OR(aligned_stack_mask);
} else {
*idealreg2spillmask[Op_VecY] = RegMask::Empty;
}
if (Matcher::vector_size_supported(T_FLOAT,16)) {
// For VecZ we need enough alignment and 64 bytes (16 slots) for spills.
OptoReg::Name in = OptoReg::add(_in_arg_limit, -1);
@ -568,6 +594,8 @@ void Matcher::init_first_stack_mask() {
assert(aligned_stack_mask.is_AllStack(), "should be infinite stack");
*idealreg2spillmask[Op_VecZ] = *idealreg2regmask[Op_VecZ];
idealreg2spillmask[Op_VecZ]->OR(aligned_stack_mask);
} else {
*idealreg2spillmask[Op_VecZ] = RegMask::Empty;
}
if (Matcher::supports_scalable_vector()) {
@ -622,6 +650,13 @@ void Matcher::init_first_stack_mask() {
*idealreg2debugmask [Op_RegD] = *idealreg2spillmask[Op_RegD];
*idealreg2debugmask [Op_RegP] = *idealreg2spillmask[Op_RegP];
*idealreg2debugmask [Op_VecA] = *idealreg2spillmask[Op_VecA];
*idealreg2debugmask [Op_VecS] = *idealreg2spillmask[Op_VecS];
*idealreg2debugmask [Op_VecD] = *idealreg2spillmask[Op_VecD];
*idealreg2debugmask [Op_VecX] = *idealreg2spillmask[Op_VecX];
*idealreg2debugmask [Op_VecY] = *idealreg2spillmask[Op_VecY];
*idealreg2debugmask [Op_VecZ] = *idealreg2spillmask[Op_VecZ];
*idealreg2mhdebugmask[Op_RegN] = *idealreg2spillmask[Op_RegN];
*idealreg2mhdebugmask[Op_RegI] = *idealreg2spillmask[Op_RegI];
*idealreg2mhdebugmask[Op_RegL] = *idealreg2spillmask[Op_RegL];
@ -629,6 +664,13 @@ void Matcher::init_first_stack_mask() {
*idealreg2mhdebugmask[Op_RegD] = *idealreg2spillmask[Op_RegD];
*idealreg2mhdebugmask[Op_RegP] = *idealreg2spillmask[Op_RegP];
*idealreg2mhdebugmask[Op_VecA] = *idealreg2spillmask[Op_VecA];
*idealreg2mhdebugmask[Op_VecS] = *idealreg2spillmask[Op_VecS];
*idealreg2mhdebugmask[Op_VecD] = *idealreg2spillmask[Op_VecD];
*idealreg2mhdebugmask[Op_VecX] = *idealreg2spillmask[Op_VecX];
*idealreg2mhdebugmask[Op_VecY] = *idealreg2spillmask[Op_VecY];
*idealreg2mhdebugmask[Op_VecZ] = *idealreg2spillmask[Op_VecZ];
// Prevent stub compilations from attempting to reference
// callee-saved (SOE) registers from debug info
bool exclude_soe = !Compile::current()->is_method_compilation();
@ -642,12 +684,26 @@ void Matcher::init_first_stack_mask() {
idealreg2debugmask[Op_RegD]->SUBTRACT(*caller_save_mask);
idealreg2debugmask[Op_RegP]->SUBTRACT(*caller_save_mask);
idealreg2debugmask[Op_VecA]->SUBTRACT(*caller_save_mask);
idealreg2debugmask[Op_VecS]->SUBTRACT(*caller_save_mask);
idealreg2debugmask[Op_VecD]->SUBTRACT(*caller_save_mask);
idealreg2debugmask[Op_VecX]->SUBTRACT(*caller_save_mask);
idealreg2debugmask[Op_VecY]->SUBTRACT(*caller_save_mask);
idealreg2debugmask[Op_VecZ]->SUBTRACT(*caller_save_mask);
idealreg2mhdebugmask[Op_RegN]->SUBTRACT(*mh_caller_save_mask);
idealreg2mhdebugmask[Op_RegI]->SUBTRACT(*mh_caller_save_mask);
idealreg2mhdebugmask[Op_RegL]->SUBTRACT(*mh_caller_save_mask);
idealreg2mhdebugmask[Op_RegF]->SUBTRACT(*mh_caller_save_mask);
idealreg2mhdebugmask[Op_RegD]->SUBTRACT(*mh_caller_save_mask);
idealreg2mhdebugmask[Op_RegP]->SUBTRACT(*mh_caller_save_mask);
idealreg2mhdebugmask[Op_VecA]->SUBTRACT(*mh_caller_save_mask);
idealreg2mhdebugmask[Op_VecS]->SUBTRACT(*mh_caller_save_mask);
idealreg2mhdebugmask[Op_VecD]->SUBTRACT(*mh_caller_save_mask);
idealreg2mhdebugmask[Op_VecX]->SUBTRACT(*mh_caller_save_mask);
idealreg2mhdebugmask[Op_VecY]->SUBTRACT(*mh_caller_save_mask);
idealreg2mhdebugmask[Op_VecZ]->SUBTRACT(*mh_caller_save_mask);
}
//---------------------------is_save_on_entry----------------------------------
@ -1953,7 +2009,6 @@ bool Matcher::is_vshift_con_pattern(Node *n, Node *m) {
return false;
}
bool Matcher::clone_node(Node* n, Node* m, Matcher::MStack& mstack) {
// Must clone all producers of flags, or we will not match correctly.
// Suppose a compare setting int-flags is shared (e.g., a switch-tree)
@ -2308,8 +2363,28 @@ void Matcher::find_shared_post_visit(Node* n, uint opcode) {
n->del_req(3);
break;
}
case Op_VectorBlend:
case Op_VectorInsert: {
Node* pair = new BinaryNode(n->in(1), n->in(2));
n->set_req(1, pair);
n->set_req(2, n->in(3));
n->del_req(3);
break;
}
case Op_StoreVectorScatter: {
Node* pair = new BinaryNode(n->in(MemNode::ValueIn), n->in(MemNode::ValueIn+1));
n->set_req(MemNode::ValueIn, pair);
n->del_req(MemNode::ValueIn+1);
break;
}
case Op_VectorMaskCmp: {
n->set_req(1, new BinaryNode(n->in(1), n->in(2)));
n->set_req(2, n->in(3));
n->del_req(3);
break;
default:
break;
}
}
}

View file

@ -345,6 +345,9 @@ public:
// Vector ideal reg
static const uint vector_ideal_reg(int len);
// Does the CPU supports vector variable shift instructions?
static bool supports_vector_variable_shifts(void);
// CPU supports misaligned vectors store/load.
static const bool misaligned_vectors_ok();

View file

@ -641,7 +641,8 @@ Node* MemNode::find_previous_store(PhaseTransform* phase) {
}
if (st_offset != offset && st_offset != Type::OffsetBot) {
const int MAX_STORE = BytesPerLong;
const int MAX_STORE = MAX2(BytesPerLong, (int)MaxVectorSize);
assert(mem->as_Store()->memory_size() <= MAX_STORE, "");
if (st_offset >= offset + size_in_bytes ||
st_offset <= offset - MAX_STORE ||
st_offset <= offset - mem->as_Store()->memory_size()) {
@ -1111,11 +1112,16 @@ Node* MemNode::can_see_stored_value(Node* st, PhaseTransform* phase) const {
// (This is one of the few places where a generic PhaseTransform
// can create new nodes. Think of it as lazily manifesting
// virtually pre-existing constants.)
if (ReduceBulkZeroing || find_array_copy_clone(phase, ld_alloc, in(MemNode::Memory)) == NULL) {
// If ReduceBulkZeroing is disabled, we need to check if the allocation does not belong to an
// ArrayCopyNode clone. If it does, then we cannot assume zero since the initialization is done
// by the ArrayCopyNode.
return phase->zerocon(memory_type());
if (memory_type() != T_VOID) {
if (ReduceBulkZeroing || find_array_copy_clone(phase, ld_alloc, in(MemNode::Memory)) == NULL) {
// If ReduceBulkZeroing is disabled, we need to check if the allocation does not belong to an
// ArrayCopyNode clone. If it does, then we cannot assume zero since the initialization is done
// by the ArrayCopyNode.
return phase->zerocon(memory_type());
}
} else {
// TODO: materialize all-zero vector constant
assert(!isa_Load() || as_Load()->type()->isa_vect(), "");
}
}
@ -2561,6 +2567,8 @@ Node *StoreNode::Ideal(PhaseGVN *phase, bool can_reshape) {
assert(Opcode() == st->Opcode() ||
st->Opcode() == Op_StoreVector ||
Opcode() == Op_StoreVector ||
st->Opcode() == Op_StoreVectorScatter ||
Opcode() == Op_StoreVectorScatter ||
phase->C->get_alias_index(adr_type()) == Compile::AliasIdxRaw ||
(Opcode() == Op_StoreL && st->Opcode() == Op_StoreI) || // expanded ClearArrayNode
(Opcode() == Op_StoreI && st->Opcode() == Op_StoreL) || // initialization by arraycopy
@ -3744,7 +3752,7 @@ intptr_t InitializeNode::can_capture_store(StoreNode* st, PhaseGVN* phase, bool
int InitializeNode::captured_store_insertion_point(intptr_t start,
int size_in_bytes,
PhaseTransform* phase) {
const int FAIL = 0, MAX_STORE = BytesPerLong;
const int FAIL = 0, MAX_STORE = MAX2(BytesPerLong, (int)MaxVectorSize);
if (is_complete())
return FAIL; // arraycopy got here first; punt
@ -3774,6 +3782,7 @@ int InitializeNode::captured_store_insertion_point(intptr_t start,
}
return -(int)i; // not found; here is where to put it
} else if (st_off < start) {
assert(st->as_Store()->memory_size() <= MAX_STORE, "");
if (size_in_bytes != 0 &&
start < st_off + MAX_STORE &&
start < st_off + st->as_Store()->memory_size()) {

View file

@ -363,6 +363,14 @@ const Type* MoveL2DNode::Value(PhaseGVN* phase) const {
return TypeD::make( v.get_jdouble() );
}
//------------------------------Identity----------------------------------------
Node* MoveL2DNode::Identity(PhaseGVN* phase) {
if (in(1)->Opcode() == Op_MoveD2L) {
return in(1)->in(1);
}
return this;
}
//------------------------------Value------------------------------------------
const Type* MoveI2FNode::Value(PhaseGVN* phase) const {
const Type *t = phase->type( in(1) );
@ -374,6 +382,14 @@ const Type* MoveI2FNode::Value(PhaseGVN* phase) const {
return TypeF::make( v.get_jfloat() );
}
//------------------------------Identity----------------------------------------
Node* MoveI2FNode::Identity(PhaseGVN* phase) {
if (in(1)->Opcode() == Op_MoveF2I) {
return in(1)->in(1);
}
return this;
}
//------------------------------Value------------------------------------------
const Type* MoveF2INode::Value(PhaseGVN* phase) const {
const Type *t = phase->type( in(1) );
@ -385,6 +401,14 @@ const Type* MoveF2INode::Value(PhaseGVN* phase) const {
return TypeInt::make( v.get_jint() );
}
//------------------------------Identity----------------------------------------
Node* MoveF2INode::Identity(PhaseGVN* phase) {
if (in(1)->Opcode() == Op_MoveI2F) {
return in(1)->in(1);
}
return this;
}
//------------------------------Value------------------------------------------
const Type* MoveD2LNode::Value(PhaseGVN* phase) const {
const Type *t = phase->type( in(1) );
@ -396,6 +420,14 @@ const Type* MoveD2LNode::Value(PhaseGVN* phase) const {
return TypeLong::make( v.get_jlong() );
}
//------------------------------Identity----------------------------------------
Node* MoveD2LNode::Identity(PhaseGVN* phase) {
if (in(1)->Opcode() == Op_MoveL2D) {
return in(1)->in(1);
}
return this;
}
#ifndef PRODUCT
//----------------------------BinaryNode---------------------------------------
// The set of related nodes for a BinaryNode is all data inputs and all outputs

View file

@ -105,6 +105,7 @@ class MoveI2FNode : public Node {
virtual const Type *bottom_type() const { return Type::FLOAT; }
virtual uint ideal_reg() const { return Op_RegF; }
virtual const Type* Value(PhaseGVN* phase) const;
virtual Node* Identity(PhaseGVN* phase);
};
class MoveL2DNode : public Node {
@ -114,6 +115,7 @@ class MoveL2DNode : public Node {
virtual const Type *bottom_type() const { return Type::DOUBLE; }
virtual uint ideal_reg() const { return Op_RegD; }
virtual const Type* Value(PhaseGVN* phase) const;
virtual Node* Identity(PhaseGVN* phase);
};
class MoveF2INode : public Node {
@ -123,6 +125,7 @@ class MoveF2INode : public Node {
virtual const Type *bottom_type() const { return TypeInt::INT; }
virtual uint ideal_reg() const { return Op_RegI; }
virtual const Type* Value(PhaseGVN* phase) const;
virtual Node* Identity(PhaseGVN* phase);
};
class MoveD2LNode : public Node {
@ -132,6 +135,7 @@ class MoveD2LNode : public Node {
virtual const Type *bottom_type() const { return TypeLong::LONG; }
virtual uint ideal_reg() const { return Op_RegL; }
virtual const Type* Value(PhaseGVN* phase) const;
virtual Node* Identity(PhaseGVN* phase);
};
//------------------------------BinaryNode-------------------------------------

View file

@ -259,6 +259,25 @@ public:
virtual uint ideal_reg() const { return Op_RegL; }
};
//------------------------------URShiftBNode-----------------------------------
// Logical shift right
class URShiftBNode : public Node {
public:
URShiftBNode( Node *in1, Node *in2 ) : Node(0,in1,in2) {
ShouldNotReachHere(); // only vector variant is used
}
virtual int Opcode() const;
};
//------------------------------URShiftSNode-----------------------------------
// Logical shift right
class URShiftSNode : public Node {
public:
URShiftSNode( Node *in1, Node *in2 ) : Node(0,in1,in2) {
ShouldNotReachHere(); // only vector variant is used
}
virtual int Opcode() const;
};
//------------------------------URShiftINode-----------------------------------
// Logical shift right

View file

@ -152,7 +152,10 @@ class TypeNode;
class UnlockNode;
class VectorNode;
class LoadVectorNode;
class LoadVectorGatherNode;
class StoreVectorNode;
class StoreVectorScatterNode;
class VectorMaskCmpNode;
class VectorSet;
typedef void (*NFunc)(Node&,void*);
extern "C" {
@ -688,8 +691,10 @@ public:
DEFINE_CLASS_ID(Mem, Node, 4)
DEFINE_CLASS_ID(Load, Mem, 0)
DEFINE_CLASS_ID(LoadVector, Load, 0)
DEFINE_CLASS_ID(LoadVectorGather, LoadVector, 0)
DEFINE_CLASS_ID(Store, Mem, 1)
DEFINE_CLASS_ID(StoreVector, Store, 0)
DEFINE_CLASS_ID(StoreVectorScatter, StoreVector, 0)
DEFINE_CLASS_ID(LoadStore, Mem, 2)
DEFINE_CLASS_ID(LoadStoreConditional, LoadStore, 0)
DEFINE_CLASS_ID(CompareAndSwap, LoadStoreConditional, 0)
@ -714,6 +719,7 @@ public:
DEFINE_CLASS_ID(Add, Node, 11)
DEFINE_CLASS_ID(Mul, Node, 12)
DEFINE_CLASS_ID(Vector, Node, 13)
DEFINE_CLASS_ID(VectorMaskCmp, Vector, 0)
DEFINE_CLASS_ID(ClearArray, Node, 14)
DEFINE_CLASS_ID(Halt, Node, 15)
DEFINE_CLASS_ID(Opaque1, Node, 16)
@ -884,7 +890,10 @@ public:
DEFINE_CLASS_QUERY(Type)
DEFINE_CLASS_QUERY(Vector)
DEFINE_CLASS_QUERY(LoadVector)
DEFINE_CLASS_QUERY(LoadVectorGather)
DEFINE_CLASS_QUERY(StoreVector)
DEFINE_CLASS_QUERY(StoreVectorScatter)
DEFINE_CLASS_QUERY(VectorMaskCmp)
DEFINE_CLASS_QUERY(Unlock)
#undef DEFINE_CLASS_QUERY

View file

@ -892,6 +892,10 @@ void PhaseOutput::FillLocArray( int idx, MachSafePointNode* sfpt, Node *local,
? Location::int_in_long : Location::normal ));
} else if( t->base() == Type::NarrowOop ) {
array->append(new_loc_value( C->regalloc(), regnum, Location::narrowoop ));
} else if ( t->base() == Type::VectorS || t->base() == Type::VectorD ||
t->base() == Type::VectorX || t->base() == Type::VectorY ||
t->base() == Type::VectorZ) {
array->append(new_loc_value( C->regalloc(), regnum, Location::vector ));
} else {
array->append(new_loc_value( C->regalloc(), regnum, C->regalloc()->is_oop(local) ? Location::oop : Location::normal ));
}

View file

@ -78,6 +78,10 @@ void Phase::print_timers() {
}
}
tty->print_cr (" Renumber Live: %7.3f s", timers[_t_renumberLive].seconds());
tty->print_cr (" Vector: %7.3f s", timers[_t_vector].seconds());
tty->print_cr (" Box elimination: %7.3f s", timers[_t_vector_elimination].seconds());
tty->print_cr (" IGVN: %7.3f s", timers[_t_vector_igvn].seconds());
tty->print_cr (" Prune Useless: %7.3f s", timers[_t_vector_pru].seconds());
tty->print_cr (" IdealLoop: %7.3f s", timers[_t_idealLoop].seconds());
tty->print_cr (" IdealLoop Verify: %7.3f s", timers[_t_idealLoopVerify].seconds());
tty->print_cr (" Cond Const Prop: %7.3f s", timers[_t_ccp].seconds());

View file

@ -59,6 +59,7 @@ public:
Ideal_Loop, // Find idealized trip-counted loops
Macro_Expand, // Expand macro nodes
Peephole, // Apply peephole optimizations
Vector,
Output,
last_phase
};
@ -75,6 +76,10 @@ public:
_t_incrInline_igvn,
_t_incrInline_pru,
_t_incrInline_inline,
_t_vector,
_t_vector_elimination,
_t_vector_igvn,
_t_vector_pru,
_t_renumberLive,
_t_idealLoop,
_t_idealLoopVerify,

View file

@ -31,7 +31,14 @@ enum CompilerPhaseType {
PHASE_BEFORE_REMOVEUSELESS,
PHASE_AFTER_PARSING,
PHASE_ITER_GVN1,
PHASE_EXPAND_VUNBOX,
PHASE_SCALARIZE_VBOX,
PHASE_INLINE_VECTOR_REBOX,
PHASE_EXPAND_VBOX,
PHASE_ELIMINATE_VBOX_ALLOC,
PHASE_PHASEIDEAL_BEFORE_EA,
PHASE_ITER_GVN_AFTER_VECTOR,
PHASE_ITER_GVN_BEFORE_EA,
PHASE_ITER_GVN_AFTER_EA,
PHASE_ITER_GVN_AFTER_ELIMINATION,
PHASE_PHASEIDEALLOOP1,
@ -41,6 +48,7 @@ enum CompilerPhaseType {
PHASE_ITER_GVN2,
PHASE_PHASEIDEALLOOP_ITERATIONS,
PHASE_OPTIMIZE_FINISHED,
PHASE_AFTER_MATCHING,
PHASE_GLOBAL_CODE_MOTION,
PHASE_FINAL_CODE,
PHASE_AFTER_EA,
@ -51,6 +59,7 @@ enum CompilerPhaseType {
PHASE_BEFORE_MATCHING,
PHASE_MATCHING,
PHASE_INCREMENTAL_INLINE,
PHASE_INCREMENTAL_INLINE_STEP,
PHASE_INCREMENTAL_BOXING_INLINE,
PHASE_CALL_CATCH_CLEANUP,
PHASE_INSERT_BARRIER,
@ -73,7 +82,14 @@ class CompilerPhaseTypeHelper {
case PHASE_BEFORE_REMOVEUSELESS: return "Before RemoveUseless";
case PHASE_AFTER_PARSING: return "After Parsing";
case PHASE_ITER_GVN1: return "Iter GVN 1";
case PHASE_EXPAND_VUNBOX: return "Expand VectorUnbox";
case PHASE_SCALARIZE_VBOX: return "Scalarize VectorBox";
case PHASE_INLINE_VECTOR_REBOX: return "Inline Vector Rebox Calls";
case PHASE_EXPAND_VBOX: return "Expand VectorBox";
case PHASE_ELIMINATE_VBOX_ALLOC: return "Eliminate VectorBoxAllocate";
case PHASE_PHASEIDEAL_BEFORE_EA: return "PhaseIdealLoop before EA";
case PHASE_ITER_GVN_AFTER_VECTOR: return "Iter GVN after vector box elimination";
case PHASE_ITER_GVN_BEFORE_EA: return "Iter GVN before EA";
case PHASE_ITER_GVN_AFTER_EA: return "Iter GVN after EA";
case PHASE_ITER_GVN_AFTER_ELIMINATION: return "Iter GVN after eliminating allocations and locks";
case PHASE_PHASEIDEALLOOP1: return "PhaseIdealLoop 1";
@ -83,6 +99,7 @@ class CompilerPhaseTypeHelper {
case PHASE_ITER_GVN2: return "Iter GVN 2";
case PHASE_PHASEIDEALLOOP_ITERATIONS: return "PhaseIdealLoop iterations";
case PHASE_OPTIMIZE_FINISHED: return "Optimize finished";
case PHASE_AFTER_MATCHING: return "After Matching";
case PHASE_GLOBAL_CODE_MOTION: return "Global code motion";
case PHASE_FINAL_CODE: return "Final Code";
case PHASE_AFTER_EA: return "After Escape Analysis";
@ -93,6 +110,7 @@ class CompilerPhaseTypeHelper {
case PHASE_BEFORE_MATCHING: return "Before matching";
case PHASE_MATCHING: return "After matching";
case PHASE_INCREMENTAL_INLINE: return "Incremental Inline";
case PHASE_INCREMENTAL_INLINE_STEP: return "Incremental Inline Step";
case PHASE_INCREMENTAL_BOXING_INLINE: return "Incremental Boxing Inline";
case PHASE_CALL_CATCH_CLEANUP: return "Call catch cleanup";
case PHASE_INSERT_BARRIER: return "Insert barrier";

View file

@ -404,6 +404,28 @@ public:
NegNode( Node *in1 ) : Node(0,in1) {}
};
//------------------------------NegINode---------------------------------------
// Negate value an int. For int values, negation is the same as subtraction
// from zero
class NegINode : public NegNode {
public:
NegINode(Node *in1) : NegNode(in1) {}
virtual int Opcode() const;
const Type *bottom_type() const { return TypeInt::INT; }
virtual uint ideal_reg() const { return Op_RegI; }
};
//------------------------------NegLNode---------------------------------------
// Negate value an int. For int values, negation is the same as subtraction
// from zero
class NegLNode : public NegNode {
public:
NegLNode(Node *in1) : NegNode(in1) {}
virtual int Opcode() const;
const Type *bottom_type() const { return TypeLong::LONG; }
virtual uint ideal_reg() const { return Op_RegL; }
};
//------------------------------NegFNode---------------------------------------
// Negate value a float. Negating 0.0 returns -0.0, but subtracting from
// zero returns +0.0 (per JVM spec on 'fneg' bytecode). As subtraction

View file

@ -2767,7 +2767,7 @@ Node* SuperWord::vector_opd(Node_List* p, int opd_idx) {
}
}
// Move shift count into vector register.
cnt = VectorNode::shift_count(p0, cnt, vlen, velt_basic_type(p0));
cnt = VectorNode::shift_count(p0->Opcode(), cnt, vlen, velt_basic_type(p0));
_igvn.register_new_node_with_optimizer(cnt);
_phase->set_ctrl(cnt, _phase->get_ctrl(opd));
return cnt;

View file

@ -439,16 +439,22 @@ void Type::Initialize_shared(Compile* current) {
BOTTOM = make(Bottom); // Everything
HALF = make(Half); // Placeholder half of doublewide type
TypeF::MAX = TypeF::make(max_jfloat); // Float MAX
TypeF::MIN = TypeF::make(min_jfloat); // Float MIN
TypeF::ZERO = TypeF::make(0.0); // Float 0 (positive zero)
TypeF::ONE = TypeF::make(1.0); // Float 1
TypeF::POS_INF = TypeF::make(jfloat_cast(POSITIVE_INFINITE_F));
TypeF::NEG_INF = TypeF::make(-jfloat_cast(POSITIVE_INFINITE_F));
TypeD::MAX = TypeD::make(max_jdouble); // Double MAX
TypeD::MIN = TypeD::make(min_jdouble); // Double MIN
TypeD::ZERO = TypeD::make(0.0); // Double 0 (positive zero)
TypeD::ONE = TypeD::make(1.0); // Double 1
TypeD::POS_INF = TypeD::make(jdouble_cast(POSITIVE_INFINITE_D));
TypeD::NEG_INF = TypeD::make(-jdouble_cast(POSITIVE_INFINITE_D));
TypeInt::MAX = TypeInt::make(max_jint); // Int MAX
TypeInt::MIN = TypeInt::make(min_jint); // Int MIN
TypeInt::MINUS_1 = TypeInt::make(-1); // -1
TypeInt::ZERO = TypeInt::make( 0); // 0
TypeInt::ONE = TypeInt::make( 1); // 1
@ -477,6 +483,8 @@ void Type::Initialize_shared(Compile* current) {
assert( TypeInt::CC_GE == TypeInt::BOOL, "types must match for CmpL to work" );
assert( (juint)(TypeInt::CC->_hi - TypeInt::CC->_lo) <= SMALLINT, "CC is truly small");
TypeLong::MAX = TypeLong::make(max_jlong); // Long MAX
TypeLong::MIN = TypeLong::make(min_jlong); // Long MIN
TypeLong::MINUS_1 = TypeLong::make(-1); // -1
TypeLong::ZERO = TypeLong::make( 0); // 0
TypeLong::ONE = TypeLong::make( 1); // 1
@ -1119,6 +1127,8 @@ void Type::typerr( const Type *t ) const {
//=============================================================================
// Convenience common pre-built types.
const TypeF *TypeF::MAX; // Floating point max
const TypeF *TypeF::MIN; // Floating point min
const TypeF *TypeF::ZERO; // Floating point zero
const TypeF *TypeF::ONE; // Floating point one
const TypeF *TypeF::POS_INF; // Floating point positive infinity
@ -1229,6 +1239,8 @@ bool TypeF::empty(void) const {
//=============================================================================
// Convenience common pre-built types.
const TypeD *TypeD::MAX; // Floating point max
const TypeD *TypeD::MIN; // Floating point min
const TypeD *TypeD::ZERO; // Floating point zero
const TypeD *TypeD::ONE; // Floating point one
const TypeD *TypeD::POS_INF; // Floating point positive infinity
@ -1335,6 +1347,8 @@ bool TypeD::empty(void) const {
//=============================================================================
// Convience common pre-built types.
const TypeInt *TypeInt::MAX; // INT_MAX
const TypeInt *TypeInt::MIN; // INT_MIN
const TypeInt *TypeInt::MINUS_1;// -1
const TypeInt *TypeInt::ZERO; // 0
const TypeInt *TypeInt::ONE; // 1
@ -1604,6 +1618,8 @@ bool TypeInt::empty(void) const {
//=============================================================================
// Convenience common pre-built types.
const TypeLong *TypeLong::MAX;
const TypeLong *TypeLong::MIN;
const TypeLong *TypeLong::MINUS_1;// -1
const TypeLong *TypeLong::ZERO; // 0
const TypeLong *TypeLong::ONE; // 1

View file

@ -483,6 +483,8 @@ public:
virtual const Type *xmeet( const Type *t ) const;
virtual const Type *xdual() const; // Compute dual right now.
// Convenience common pre-built types.
static const TypeF *MAX;
static const TypeF *MIN;
static const TypeF *ZERO; // positive zero only
static const TypeF *ONE;
static const TypeF *POS_INF;
@ -512,6 +514,8 @@ public:
virtual const Type *xmeet( const Type *t ) const;
virtual const Type *xdual() const; // Compute dual right now.
// Convenience common pre-built types.
static const TypeD *MAX;
static const TypeD *MIN;
static const TypeD *ZERO; // positive zero only
static const TypeD *ONE;
static const TypeD *POS_INF;
@ -555,6 +559,8 @@ public:
virtual const Type *narrow( const Type *t ) const;
// Do not kill _widen bits.
// Convenience common pre-built types.
static const TypeInt *MAX;
static const TypeInt *MIN;
static const TypeInt *MINUS_1;
static const TypeInt *ZERO;
static const TypeInt *ONE;
@ -620,6 +626,8 @@ public:
virtual const Type *widen( const Type *t, const Type* limit_type ) const;
virtual const Type *narrow( const Type *t ) const;
// Convenience common pre-built types.
static const TypeLong *MAX;
static const TypeLong *MIN;
static const TypeLong *MINUS_1;
static const TypeLong *ZERO;
static const TypeLong *ONE;

View file

@ -0,0 +1,466 @@
/*
* Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#include "precompiled.hpp"
#include "opto/castnode.hpp"
#include "opto/graphKit.hpp"
#include "opto/phaseX.hpp"
#include "opto/rootnode.hpp"
#include "opto/vector.hpp"
#include "utilities/macros.hpp"
void PhaseVector::optimize_vector_boxes() {
Compile::TracePhase tp("vector_elimination", &timers[_t_vector_elimination]);
// Signal GraphKit it's post-parse phase.
assert(C->inlining_incrementally() == false, "sanity");
C->set_inlining_incrementally(true);
C->for_igvn()->clear();
C->initial_gvn()->replace_with(&_igvn);
expand_vunbox_nodes();
scalarize_vbox_nodes();
C->inline_vector_reboxing_calls();
expand_vbox_nodes();
eliminate_vbox_alloc_nodes();
C->set_inlining_incrementally(false);
do_cleanup();
}
void PhaseVector::do_cleanup() {
if (C->failing()) return;
{
Compile::TracePhase tp("vector_pru", &timers[_t_vector_pru]);
ResourceMark rm;
PhaseRemoveUseless pru(C->initial_gvn(), C->for_igvn());
if (C->failing()) return;
}
{
Compile::TracePhase tp("incrementalInline_igvn", &timers[_t_vector_igvn]);
_igvn = PhaseIterGVN(C->initial_gvn());
_igvn.optimize();
if (C->failing()) return;
}
C->print_method(PHASE_ITER_GVN_BEFORE_EA, 3);
}
void PhaseVector::scalarize_vbox_nodes() {
if (C->failing()) return;
if (!EnableVectorReboxing) {
return; // don't scalarize vector boxes
}
int macro_idx = C->macro_count() - 1;
while (macro_idx >= 0) {
Node * n = C->macro_node(macro_idx);
assert(n->is_macro(), "only macro nodes expected here");
if (n->Opcode() == Op_VectorBox) {
VectorBoxNode* vbox = static_cast<VectorBoxNode*>(n);
scalarize_vbox_node(vbox);
if (C->failing()) return;
C->print_method(PHASE_SCALARIZE_VBOX, vbox, 3);
}
if (C->failing()) return;
macro_idx = MIN2(macro_idx - 1, C->macro_count() - 1);
}
}
void PhaseVector::expand_vbox_nodes() {
if (C->failing()) return;
int macro_idx = C->macro_count() - 1;
while (macro_idx >= 0) {
Node * n = C->macro_node(macro_idx);
assert(n->is_macro(), "only macro nodes expected here");
if (n->Opcode() == Op_VectorBox) {
VectorBoxNode* vbox = static_cast<VectorBoxNode*>(n);
expand_vbox_node(vbox);
if (C->failing()) return;
}
if (C->failing()) return;
macro_idx = MIN2(macro_idx - 1, C->macro_count() - 1);
}
}
void PhaseVector::expand_vunbox_nodes() {
if (C->failing()) return;
int macro_idx = C->macro_count() - 1;
while (macro_idx >= 0) {
Node * n = C->macro_node(macro_idx);
assert(n->is_macro(), "only macro nodes expected here");
if (n->Opcode() == Op_VectorUnbox) {
VectorUnboxNode* vec_unbox = static_cast<VectorUnboxNode*>(n);
expand_vunbox_node(vec_unbox);
if (C->failing()) return;
C->print_method(PHASE_EXPAND_VUNBOX, vec_unbox, 3);
}
if (C->failing()) return;
macro_idx = MIN2(macro_idx - 1, C->macro_count() - 1);
}
}
void PhaseVector::eliminate_vbox_alloc_nodes() {
if (C->failing()) return;
int macro_idx = C->macro_count() - 1;
while (macro_idx >= 0) {
Node * n = C->macro_node(macro_idx);
assert(n->is_macro(), "only macro nodes expected here");
if (n->Opcode() == Op_VectorBoxAllocate) {
VectorBoxAllocateNode* vbox_alloc = static_cast<VectorBoxAllocateNode*>(n);
eliminate_vbox_alloc_node(vbox_alloc);
if (C->failing()) return;
C->print_method(PHASE_ELIMINATE_VBOX_ALLOC, vbox_alloc, 3);
}
if (C->failing()) return;
macro_idx = MIN2(macro_idx - 1, C->macro_count() - 1);
}
}
static JVMState* clone_jvms(Compile* C, SafePointNode* sfpt) {
JVMState* new_jvms = sfpt->jvms()->clone_shallow(C);
uint size = sfpt->req();
SafePointNode* map = new SafePointNode(size, new_jvms);
for (uint i = 0; i < size; i++) {
map->init_req(i, sfpt->in(i));
}
new_jvms->set_map(map);
return new_jvms;
}
void PhaseVector::scalarize_vbox_node(VectorBoxNode* vec_box) {
Node* vec_value = vec_box->in(VectorBoxNode::Value);
PhaseGVN& gvn = *C->initial_gvn();
// Process merged VBAs
if (EnableVectorAggressiveReboxing) {
Unique_Node_List calls(C->comp_arena());
for (DUIterator_Fast imax, i = vec_box->fast_outs(imax); i < imax; i++) {
Node* use = vec_box->fast_out(i);
if (use->is_CallJava()) {
CallJavaNode* call = use->as_CallJava();
if (call->has_non_debug_use(vec_box) && vec_box->in(VectorBoxNode::Box)->is_Phi()) {
calls.push(call);
}
}
}
while (calls.size() > 0) {
CallJavaNode* call = calls.pop()->as_CallJava();
// Attach new VBA to the call and use it instead of Phi (VBA ... VBA).
JVMState* jvms = clone_jvms(C, call);
GraphKit kit(jvms);
PhaseGVN& gvn = kit.gvn();
// Adjust JVMS from post-call to pre-call state: put args on stack
uint nargs = call->method()->arg_size();
kit.ensure_stack(kit.sp() + nargs);
for (uint i = TypeFunc::Parms; i < call->tf()->domain()->cnt(); i++) {
kit.push(call->in(i));
}
jvms = kit.sync_jvms();
Node* new_vbox = NULL;
{
PreserveReexecuteState prs(&kit);
kit.jvms()->set_should_reexecute(true);
const TypeInstPtr* vbox_type = vec_box->box_type();
const TypeVect* vect_type = vec_box->vec_type();
Node* vect = vec_box->in(VectorBoxNode::Value);
VectorBoxAllocateNode* alloc = new VectorBoxAllocateNode(C, vbox_type);
kit.set_edges_for_java_call(alloc, /*must_throw=*/false, /*separate_io_proj=*/true);
kit.make_slow_call_ex(alloc, C->env()->Throwable_klass(), /*separate_io_proj=*/true, /*deoptimize=*/true);
kit.set_i_o(gvn.transform( new ProjNode(alloc, TypeFunc::I_O) ));
kit.set_all_memory(gvn.transform( new ProjNode(alloc, TypeFunc::Memory) ));
Node* ret = gvn.transform(new ProjNode(alloc, TypeFunc::Parms));
new_vbox = gvn.transform(new VectorBoxNode(C, ret, vect, vbox_type, vect_type));
kit.replace_in_map(vec_box, new_vbox);
}
kit.dec_sp(nargs);
jvms = kit.sync_jvms();
call->set_req(TypeFunc::Control , kit.control());
call->set_req(TypeFunc::I_O , kit.i_o());
call->set_req(TypeFunc::Memory , kit.reset_memory());
call->set_req(TypeFunc::FramePtr, kit.frameptr());
call->replace_edge(vec_box, new_vbox);
C->record_for_igvn(call);
}
}
// Process debug uses at safepoints
Unique_Node_List safepoints(C->comp_arena());
for (DUIterator_Fast imax, i = vec_box->fast_outs(imax); i < imax; i++) {
Node* use = vec_box->fast_out(i);
if (use->is_SafePoint()) {
SafePointNode* sfpt = use->as_SafePoint();
if (!sfpt->is_Call() || !sfpt->as_Call()->has_non_debug_use(vec_box)) {
safepoints.push(sfpt);
}
}
}
while (safepoints.size() > 0) {
SafePointNode* sfpt = safepoints.pop()->as_SafePoint();
uint first_ind = (sfpt->req() - sfpt->jvms()->scloff());
Node* sobj = new SafePointScalarObjectNode(vec_box->box_type(),
#ifdef ASSERT
NULL,
#endif // ASSERT
first_ind, /*n_fields=*/1);
sobj->init_req(0, C->root());
sfpt->add_req(vec_value);
sobj = gvn.transform(sobj);
JVMState *jvms = sfpt->jvms();
jvms->set_endoff(sfpt->req());
// Now make a pass over the debug information replacing any references
// to the allocated object with "sobj"
int start = jvms->debug_start();
int end = jvms->debug_end();
sfpt->replace_edges_in_range(vec_box, sobj, start, end);
C->record_for_igvn(sfpt);
}
}
void PhaseVector::expand_vbox_node(VectorBoxNode* vec_box) {
if (vec_box->outcnt() > 0) {
Node* vbox = vec_box->in(VectorBoxNode::Box);
Node* vect = vec_box->in(VectorBoxNode::Value);
Node* result = expand_vbox_node_helper(vbox, vect, vec_box->box_type(), vec_box->vec_type());
C->gvn_replace_by(vec_box, result);
C->print_method(PHASE_EXPAND_VBOX, vec_box, 3);
}
C->remove_macro_node(vec_box);
}
Node* PhaseVector::expand_vbox_node_helper(Node* vbox,
Node* vect,
const TypeInstPtr* box_type,
const TypeVect* vect_type) {
if (vbox->is_Phi() && vect->is_Phi()) {
assert(vbox->as_Phi()->region() == vect->as_Phi()->region(), "");
Node* new_phi = new PhiNode(vbox->as_Phi()->region(), box_type);
for (uint i = 1; i < vbox->req(); i++) {
Node* new_box = expand_vbox_node_helper(vbox->in(i), vect->in(i), box_type, vect_type);
new_phi->set_req(i, new_box);
}
new_phi = C->initial_gvn()->transform(new_phi);
return new_phi;
} else if (vbox->is_Proj() && vbox->in(0)->Opcode() == Op_VectorBoxAllocate) {
VectorBoxAllocateNode* vbox_alloc = static_cast<VectorBoxAllocateNode*>(vbox->in(0));
return expand_vbox_alloc_node(vbox_alloc, vect, box_type, vect_type);
} else {
assert(!vbox->is_Phi(), "");
// TODO: assert that expanded vbox is initialized with the same value (vect).
return vbox; // already expanded
}
}
static bool is_vector_mask(ciKlass* klass) {
return klass->is_subclass_of(ciEnv::current()->vector_VectorMask_klass());
}
static bool is_vector_shuffle(ciKlass* klass) {
return klass->is_subclass_of(ciEnv::current()->vector_VectorShuffle_klass());
}
Node* PhaseVector::expand_vbox_alloc_node(VectorBoxAllocateNode* vbox_alloc,
Node* value,
const TypeInstPtr* box_type,
const TypeVect* vect_type) {
JVMState* jvms = clone_jvms(C, vbox_alloc);
GraphKit kit(jvms);
PhaseGVN& gvn = kit.gvn();
ciInstanceKlass* box_klass = box_type->klass()->as_instance_klass();
BasicType bt = vect_type->element_basic_type();
int num_elem = vect_type->length();
bool is_mask = is_vector_mask(box_klass);
if (is_mask && bt != T_BOOLEAN) {
value = gvn.transform(VectorStoreMaskNode::make(gvn, value, bt, num_elem));
// Although type of mask depends on its definition, in terms of storage everything is stored in boolean array.
bt = T_BOOLEAN;
assert(value->as_Vector()->bottom_type()->is_vect()->element_basic_type() == bt,
"must be consistent with mask representation");
}
// Generate array allocation for the field which holds the values.
const TypeKlassPtr* array_klass = TypeKlassPtr::make(ciTypeArrayKlass::make(bt));
Node* arr = kit.new_array(kit.makecon(array_klass), kit.intcon(num_elem), 1);
// Store the vector value into the array.
// (The store should be captured by InitializeNode and turned into initialized store later.)
Node* arr_adr = kit.array_element_address(arr, kit.intcon(0), bt);
const TypePtr* arr_adr_type = arr_adr->bottom_type()->is_ptr();
Node* arr_mem = kit.memory(arr_adr);
Node* vstore = gvn.transform(StoreVectorNode::make(0,
kit.control(),
arr_mem,
arr_adr,
arr_adr_type,
value,
num_elem));
kit.set_memory(vstore, arr_adr_type);
C->set_max_vector_size(MAX2(C->max_vector_size(), vect_type->length_in_bytes()));
// Generate the allocate for the Vector object.
const TypeKlassPtr* klass_type = box_type->as_klass_type();
Node* klass_node = kit.makecon(klass_type);
Node* vec_obj = kit.new_instance(klass_node);
// Store the allocated array into object.
ciField* field = ciEnv::current()->vector_VectorPayload_klass()->get_field_by_name(ciSymbol::payload_name(),
ciSymbol::object_signature(),
false);
assert(field != NULL, "");
Node* vec_field = kit.basic_plus_adr(vec_obj, field->offset_in_bytes());
const TypePtr* vec_adr_type = vec_field->bottom_type()->is_ptr();
// The store should be captured by InitializeNode and turned into initialized store later.
Node* field_store = gvn.transform(kit.access_store_at(vec_obj,
vec_field,
vec_adr_type,
arr,
TypeOopPtr::make_from_klass(field->type()->as_klass()),
T_OBJECT,
IN_HEAP));
kit.set_memory(field_store, vec_adr_type);
kit.replace_call(vbox_alloc, vec_obj, true);
C->remove_macro_node(vbox_alloc);
return vec_obj;
}
void PhaseVector::expand_vunbox_node(VectorUnboxNode* vec_unbox) {
if (vec_unbox->outcnt() > 0) {
GraphKit kit;
PhaseGVN& gvn = kit.gvn();
Node* obj = vec_unbox->obj();
const TypeInstPtr* tinst = gvn.type(obj)->isa_instptr();
ciInstanceKlass* from_kls = tinst->klass()->as_instance_klass();
BasicType bt = vec_unbox->vect_type()->element_basic_type();
BasicType masktype = bt;
BasicType elem_bt;
if (is_vector_mask(from_kls)) {
bt = T_BOOLEAN;
} else if (is_vector_shuffle(from_kls)) {
if (vec_unbox->is_shuffle_to_vector() == true) {
elem_bt = bt;
}
bt = T_BYTE;
}
ciField* field = ciEnv::current()->vector_VectorPayload_klass()->get_field_by_name(ciSymbol::payload_name(),
ciSymbol::object_signature(),
false);
assert(field != NULL, "");
int offset = field->offset_in_bytes();
Node* vec_adr = kit.basic_plus_adr(obj, offset);
Node* mem = vec_unbox->mem();
Node* ctrl = vec_unbox->in(0);
Node* vec_field_ld = LoadNode::make(gvn,
ctrl,
mem,
vec_adr,
vec_adr->bottom_type()->is_ptr(),
TypeOopPtr::make_from_klass(field->type()->as_klass()),
T_OBJECT,
MemNode::unordered);
vec_field_ld = gvn.transform(vec_field_ld);
// For proper aliasing, attach concrete payload type.
ciKlass* payload_klass = ciTypeArrayKlass::make(bt);
const Type* payload_type = TypeAryPtr::make_from_klass(payload_klass)->cast_to_ptr_type(TypePtr::NotNull);
vec_field_ld = gvn.transform(new CastPPNode(vec_field_ld, payload_type));
Node* adr = kit.array_element_address(vec_field_ld, gvn.intcon(0), bt);
const TypePtr* adr_type = adr->bottom_type()->is_ptr();
const TypeVect* vt = vec_unbox->bottom_type()->is_vect();
int num_elem = vt->length();
Node* vec_val_load = LoadVectorNode::make(0,
ctrl,
mem,
adr,
adr_type,
num_elem,
bt);
vec_val_load = gvn.transform(vec_val_load);
C->set_max_vector_size(MAX2(C->max_vector_size(), vt->length_in_bytes()));
if (is_vector_mask(from_kls) && masktype != T_BOOLEAN) {
assert(vec_unbox->bottom_type()->is_vect()->element_basic_type() == masktype, "expect mask type consistency");
vec_val_load = gvn.transform(new VectorLoadMaskNode(vec_val_load, TypeVect::make(masktype, num_elem)));
} else if (is_vector_shuffle(from_kls)) {
if (vec_unbox->is_shuffle_to_vector() == false) {
assert(vec_unbox->bottom_type()->is_vect()->element_basic_type() == masktype, "expect shuffle type consistency");
vec_val_load = gvn.transform(new VectorLoadShuffleNode(vec_val_load, TypeVect::make(masktype, num_elem)));
} else if (elem_bt != T_BYTE) {
vec_val_load = gvn.transform(VectorCastNode::make(Op_VectorCastB2X, vec_val_load, elem_bt, num_elem));
}
}
gvn.hash_delete(vec_unbox);
vec_unbox->disconnect_inputs(C);
C->gvn_replace_by(vec_unbox, vec_val_load);
}
C->remove_macro_node(vec_unbox);
}
void PhaseVector::eliminate_vbox_alloc_node(VectorBoxAllocateNode* vbox_alloc) {
JVMState* jvms = clone_jvms(C, vbox_alloc);
GraphKit kit(jvms);
// Remove VBA, but leave a safepoint behind.
// Otherwise, it may end up with a loop without any safepoint polls.
kit.replace_call(vbox_alloc, kit.map(), true);
C->remove_macro_node(vbox_alloc);
}

View file

@ -0,0 +1,62 @@
/*
* Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#ifndef SHARE_OPTO_VECTOR_HPP
#define SHARE_OPTO_VECTOR_HPP
#include "opto/node.hpp"
#include "opto/phaseX.hpp"
#include "opto/type.hpp"
#include "opto/vectornode.hpp"
class PhaseVector : public Phase {
private:
PhaseIterGVN& _igvn;
void expand_vbox_nodes();
void expand_vbox_node(VectorBoxNode* vec_box);
Node* expand_vbox_node_helper(Node* vbox,
Node* vect,
const TypeInstPtr* box_type,
const TypeVect* vect_type);
Node* expand_vbox_alloc_node(VectorBoxAllocateNode* vbox_alloc,
Node* value,
const TypeInstPtr* box_type,
const TypeVect* vect_type);
void scalarize_vbox_nodes();
void scalarize_vbox_node(VectorBoxNode* vec_box);
void expand_vunbox_nodes();
void expand_vunbox_node(VectorUnboxNode* vec_box);
void eliminate_vbox_alloc_nodes();
void eliminate_vbox_alloc_node(VectorBoxAllocateNode* vbox_alloc);
void do_cleanup();
void scalarize_vector_boxes();
void expand_vector_boxes();
public:
PhaseVector(PhaseIterGVN& igvn) : Phase(Vector), _igvn(igvn) {}
void optimize_vector_boxes();
};
#endif // SHARE_OPTO_VECTOR_HPP

File diff suppressed because it is too large Load diff

View file

@ -120,12 +120,51 @@ int VectorNode::opcode(int sopc, BasicType bt) {
case Op_AbsL:
assert(bt == T_LONG, "must be");
return Op_AbsVL;
case Op_MinI:
switch (bt) {
case T_BOOLEAN:
case T_CHAR: return 0;
case T_BYTE:
case T_SHORT:
case T_INT: return Op_MinV;
default: ShouldNotReachHere(); return 0;
}
case Op_MinL:
assert(bt == T_LONG, "must be");
return Op_MinV;
case Op_MinF:
assert(bt == T_FLOAT, "must be");
return Op_MinV;
case Op_MinD:
assert(bt == T_DOUBLE, "must be");
return Op_MinV;
case Op_MaxI:
switch (bt) {
case T_BOOLEAN:
case T_CHAR: return 0;
case T_BYTE:
case T_SHORT:
case T_INT: return Op_MaxV;
default: ShouldNotReachHere(); return 0;
}
case Op_MaxL:
assert(bt == T_LONG, "must be");
return Op_MaxV;
case Op_MaxF:
assert(bt == T_FLOAT, "must be");
return Op_MaxV;
case Op_MaxD:
assert(bt == T_DOUBLE, "must be");
return Op_MaxV;
case Op_AbsF:
assert(bt == T_FLOAT, "must be");
return Op_AbsVF;
case Op_AbsD:
assert(bt == T_DOUBLE, "must be");
return Op_AbsVD;
case Op_NegI:
assert(bt == T_INT, "must be");
return Op_NegVI;
case Op_NegF:
assert(bt == T_FLOAT, "must be");
return Op_NegVF;
@ -178,6 +217,12 @@ int VectorNode::opcode(int sopc, BasicType bt) {
case Op_RShiftL:
assert(bt == T_LONG, "must be");
return Op_RShiftVL;
case Op_URShiftB:
assert(bt == T_BYTE, "must be");
return Op_URShiftVB;
case Op_URShiftS:
assert(bt == T_SHORT, "must be");
return Op_URShiftVS;
case Op_URShiftI:
switch (bt) {
case T_BOOLEAN:return Op_URShiftVB;
@ -203,18 +248,6 @@ int VectorNode::opcode(int sopc, BasicType bt) {
case Op_XorI:
case Op_XorL:
return Op_XorV;
case Op_MinF:
assert(bt == T_FLOAT, "must be");
return Op_MinV;
case Op_MinD:
assert(bt == T_DOUBLE, "must be");
return Op_MinV;
case Op_MaxF:
assert(bt == T_FLOAT, "must be");
return Op_MaxV;
case Op_MaxD:
assert(bt == T_DOUBLE, "must be");
return Op_MaxV;
case Op_LoadB:
case Op_LoadUB:
@ -241,6 +274,28 @@ int VectorNode::opcode(int sopc, BasicType bt) {
}
}
int VectorNode::replicate_opcode(BasicType bt) {
switch(bt) {
case T_BOOLEAN:
case T_BYTE:
return Op_ReplicateB;
case T_SHORT:
case T_CHAR:
return Op_ReplicateS;
case T_INT:
return Op_ReplicateI;
case T_LONG:
return Op_ReplicateL;
case T_FLOAT:
return Op_ReplicateF;
case T_DOUBLE:
return Op_ReplicateD;
default:
assert(false, "wrong type: %s", type2name(bt));
return 0;
}
}
// Also used to check if the code generator
// supports the vector operation.
bool VectorNode::implemented(int opc, uint vlen, BasicType bt) {
@ -331,6 +386,16 @@ bool VectorNode::is_shift(Node* n) {
}
}
bool VectorNode::is_vshift_cnt(Node* n) {
switch (n->Opcode()) {
case Op_LShiftCntV:
case Op_RShiftCntV:
return true;
default:
return false;
}
}
// Check if input is loop invariant vector.
bool VectorNode::is_invariant_vector(Node* n) {
// Only Replicate vector nodes are loop invariant for now.
@ -397,12 +462,10 @@ void VectorNode::vector_operands(Node* n, uint* start, uint* end) {
}
}
// Return the vector version of a scalar operation node.
VectorNode* VectorNode::make(int opc, Node* n1, Node* n2, uint vlen, BasicType bt) {
const TypeVect* vt = TypeVect::make(bt, vlen);
int vopc = VectorNode::opcode(opc, bt);
// Make a vector node for binary operation
VectorNode* VectorNode::make(int vopc, Node* n1, Node* n2, const TypeVect* vt) {
// This method should not be called for unimplemented vectors.
guarantee(vopc > 0, "Vector for '%s' is not implemented", NodeClassNames[opc]);
guarantee(vopc > 0, "vopc must be > 0");
switch (vopc) {
case Op_AddVB: return new AddVBNode(n1, n2, vt);
case Op_AddVS: return new AddVSNode(n1, n2, vt);
@ -428,13 +491,17 @@ VectorNode* VectorNode::make(int opc, Node* n1, Node* n2, uint vlen, BasicType b
case Op_DivVF: return new DivVFNode(n1, n2, vt);
case Op_DivVD: return new DivVDNode(n1, n2, vt);
case Op_MinV: return new MinVNode(n1, n2, vt);
case Op_MaxV: return new MaxVNode(n1, n2, vt);
case Op_AbsVF: return new AbsVFNode(n1, vt);
case Op_AbsVD: return new AbsVDNode(n1, vt);
case Op_AbsVB: return new AbsVBNode(n1, vt);
case Op_AbsVS: return new AbsVSNode(n1, vt);
case Op_AbsVI: return new AbsVINode(n1, vt);
case Op_AbsVL: return new AbsVLNode(n1, vt);
case Op_AbsVF: return new AbsVFNode(n1, vt);
case Op_AbsVD: return new AbsVDNode(n1, vt);
case Op_NegVI: return new NegVINode(n1, vt);
case Op_NegVF: return new NegVFNode(n1, vt);
case Op_NegVD: return new NegVDNode(n1, vt);
@ -464,9 +531,6 @@ VectorNode* VectorNode::make(int opc, Node* n1, Node* n2, uint vlen, BasicType b
case Op_OrV: return new OrVNode (n1, n2, vt);
case Op_XorV: return new XorVNode(n1, n2, vt);
case Op_MinV: return new MinVNode(n1, n2, vt);
case Op_MaxV: return new MaxVNode(n1, n2, vt);
case Op_RoundDoubleModeV: return new RoundDoubleModeVNode(n1, n2, vt);
case Op_MulAddVS2VI: return new MulAddVS2VINode(n1, n2, vt);
@ -476,11 +540,19 @@ VectorNode* VectorNode::make(int opc, Node* n1, Node* n2, uint vlen, BasicType b
}
}
VectorNode* VectorNode::make(int opc, Node* n1, Node* n2, Node* n3, uint vlen, BasicType bt) {
// Return the vector version of a scalar binary operation node.
VectorNode* VectorNode::make(int opc, Node* n1, Node* n2, uint vlen, BasicType bt) {
const TypeVect* vt = TypeVect::make(bt, vlen);
int vopc = VectorNode::opcode(opc, bt);
// This method should not be called for unimplemented vectors.
guarantee(vopc > 0, "Vector for '%s' is not implemented", NodeClassNames[opc]);
return make(vopc, n1, n2, vt);
}
// Make a vector node for ternary operation
VectorNode* VectorNode::make(int vopc, Node* n1, Node* n2, Node* n3, const TypeVect* vt) {
// This method should not be called for unimplemented vectors.
guarantee(vopc > 0, "vopc must be > 0");
switch (vopc) {
case Op_FmaVD: return new FmaVDNode(n1, n2, n3, vt);
case Op_FmaVF: return new FmaVFNode(n1, n2, n3, vt);
@ -490,6 +562,15 @@ VectorNode* VectorNode::make(int opc, Node* n1, Node* n2, Node* n3, uint vlen, B
}
}
// Return the vector version of a scalar ternary operation node.
VectorNode* VectorNode::make(int opc, Node* n1, Node* n2, Node* n3, uint vlen, BasicType bt) {
const TypeVect* vt = TypeVect::make(bt, vlen);
int vopc = VectorNode::opcode(opc, bt);
// This method should not be called for unimplemented vectors.
guarantee(vopc > 0, "Vector for '%s' is not implemented", NodeClassNames[opc]);
return make(vopc, n1, n2, n3, vt);
}
// Scalar promotion
VectorNode* VectorNode::scalar2vector(Node* s, uint vlen, const Type* opd_t) {
BasicType bt = opd_t->array_element_basic_type();
@ -516,21 +597,22 @@ VectorNode* VectorNode::scalar2vector(Node* s, uint vlen, const Type* opd_t) {
}
}
VectorNode* VectorNode::shift_count(Node* shift, Node* cnt, uint vlen, BasicType bt) {
assert(VectorNode::is_shift(shift), "sanity");
VectorNode* VectorNode::shift_count(int opc, Node* cnt, uint vlen, BasicType bt) {
// Match shift count type with shift vector type.
const TypeVect* vt = TypeVect::make(bt, vlen);
switch (shift->Opcode()) {
switch (opc) {
case Op_LShiftI:
case Op_LShiftL:
return new LShiftCntVNode(cnt, vt);
case Op_RShiftI:
case Op_RShiftL:
case Op_URShiftB:
case Op_URShiftS:
case Op_URShiftI:
case Op_URShiftL:
return new RShiftCntVNode(cnt, vt);
default:
fatal("Missed vector creation for '%s'", NodeClassNames[shift->Opcode()]);
fatal("Missed vector creation for '%s'", NodeClassNames[opc]);
return NULL;
}
}
@ -677,29 +759,37 @@ StoreVectorNode* StoreVectorNode::make(int opc, Node* ctl, Node* mem,
return new StoreVectorNode(ctl, mem, adr, atyp, val);
}
int ExtractNode::opcode(BasicType bt) {
switch (bt) {
case T_BOOLEAN: return Op_ExtractUB;
case T_BYTE: return Op_ExtractB;
case T_CHAR: return Op_ExtractC;
case T_SHORT: return Op_ExtractS;
case T_INT: return Op_ExtractI;
case T_LONG: return Op_ExtractL;
case T_FLOAT: return Op_ExtractF;
case T_DOUBLE: return Op_ExtractD;
default:
assert(false, "wrong type: %s", type2name(bt));
return 0;
}
}
// Extract a scalar element of vector.
Node* ExtractNode::make(Node* v, uint position, BasicType bt) {
assert((int)position < Matcher::max_vector_size(bt), "pos in range");
ConINode* pos = ConINode::make((int)position);
switch (bt) {
case T_BOOLEAN:
return new ExtractUBNode(v, pos);
case T_BYTE:
return new ExtractBNode(v, pos);
case T_CHAR:
return new ExtractCNode(v, pos);
case T_SHORT:
return new ExtractSNode(v, pos);
case T_INT:
return new ExtractINode(v, pos);
case T_LONG:
return new ExtractLNode(v, pos);
case T_FLOAT:
return new ExtractFNode(v, pos);
case T_DOUBLE:
return new ExtractDNode(v, pos);
case T_BOOLEAN: return new ExtractUBNode(v, pos);
case T_BYTE: return new ExtractBNode(v, pos);
case T_CHAR: return new ExtractCNode(v, pos);
case T_SHORT: return new ExtractSNode(v, pos);
case T_INT: return new ExtractINode(v, pos);
case T_LONG: return new ExtractLNode(v, pos);
case T_FLOAT: return new ExtractFNode(v, pos);
case T_DOUBLE: return new ExtractDNode(v, pos);
default:
fatal("Type '%s' is not supported for vectors", type2name(bt));
assert(false, "wrong type: %s", type2name(bt));
return NULL;
}
}
@ -708,8 +798,16 @@ int ReductionNode::opcode(int opc, BasicType bt) {
int vopc = opc;
switch (opc) {
case Op_AddI:
assert(bt == T_INT, "must be");
vopc = Op_AddReductionVI;
switch (bt) {
case T_BOOLEAN:
case T_CHAR: return 0;
case T_BYTE:
case T_SHORT:
case T_INT:
vopc = Op_AddReductionVI;
break;
default: ShouldNotReachHere(); return 0;
}
break;
case Op_AddL:
assert(bt == T_LONG, "must be");
@ -724,8 +822,16 @@ int ReductionNode::opcode(int opc, BasicType bt) {
vopc = Op_AddReductionVD;
break;
case Op_MulI:
assert(bt == T_INT, "must be");
vopc = Op_MulReductionVI;
switch (bt) {
case T_BOOLEAN:
case T_CHAR: return 0;
case T_BYTE:
case T_SHORT:
case T_INT:
vopc = Op_MulReductionVI;
break;
default: ShouldNotReachHere(); return 0;
}
break;
case Op_MulL:
assert(bt == T_LONG, "must be");
@ -739,6 +845,22 @@ int ReductionNode::opcode(int opc, BasicType bt) {
assert(bt == T_DOUBLE, "must be");
vopc = Op_MulReductionVD;
break;
case Op_MinI:
switch (bt) {
case T_BOOLEAN:
case T_CHAR: return 0;
case T_BYTE:
case T_SHORT:
case T_INT:
vopc = Op_MinReductionV;
break;
default: ShouldNotReachHere(); return 0;
}
break;
case Op_MinL:
assert(bt == T_LONG, "must be");
vopc = Op_MinReductionV;
break;
case Op_MinF:
assert(bt == T_FLOAT, "must be");
vopc = Op_MinReductionV;
@ -747,6 +869,22 @@ int ReductionNode::opcode(int opc, BasicType bt) {
assert(bt == T_DOUBLE, "must be");
vopc = Op_MinReductionV;
break;
case Op_MaxI:
switch (bt) {
case T_BOOLEAN:
case T_CHAR: return 0;
case T_BYTE:
case T_SHORT:
case T_INT:
vopc = Op_MaxReductionV;
break;
default: ShouldNotReachHere(); return 0;
}
break;
case Op_MaxL:
assert(bt == T_LONG, "must be");
vopc = Op_MaxReductionV;
break;
case Op_MaxF:
assert(bt == T_FLOAT, "must be");
vopc = Op_MaxReductionV;
@ -756,24 +894,48 @@ int ReductionNode::opcode(int opc, BasicType bt) {
vopc = Op_MaxReductionV;
break;
case Op_AndI:
assert(bt == T_INT, "must be");
vopc = Op_AndReductionV;
switch (bt) {
case T_BOOLEAN:
case T_CHAR: return 0;
case T_BYTE:
case T_SHORT:
case T_INT:
vopc = Op_AndReductionV;
break;
default: ShouldNotReachHere(); return 0;
}
break;
case Op_AndL:
assert(bt == T_LONG, "must be");
vopc = Op_AndReductionV;
break;
case Op_OrI:
assert(bt == T_INT, "must be");
vopc = Op_OrReductionV;
switch(bt) {
case T_BOOLEAN:
case T_CHAR: return 0;
case T_BYTE:
case T_SHORT:
case T_INT:
vopc = Op_OrReductionV;
break;
default: ShouldNotReachHere(); return 0;
}
break;
case Op_OrL:
assert(bt == T_LONG, "must be");
vopc = Op_OrReductionV;
break;
case Op_XorI:
assert(bt == T_INT, "must be");
vopc = Op_XorReductionV;
switch(bt) {
case T_BOOLEAN:
case T_CHAR: return 0;
case T_BYTE:
case T_SHORT:
case T_INT:
vopc = Op_XorReductionV;
break;
default: ShouldNotReachHere(); return 0;
}
break;
case Op_XorL:
assert(bt == T_LONG, "must be");
@ -808,11 +970,116 @@ ReductionNode* ReductionNode::make(int opc, Node *ctrl, Node* n1, Node* n2, Basi
case Op_OrReductionV: return new OrReductionVNode(ctrl, n1, n2);
case Op_XorReductionV: return new XorReductionVNode(ctrl, n1, n2);
default:
fatal("Missed vector creation for '%s'", NodeClassNames[vopc]);
assert(false, "unknown node: %s", NodeClassNames[vopc]);
return NULL;
}
}
VectorStoreMaskNode* VectorStoreMaskNode::make(PhaseGVN& gvn, Node* in, BasicType in_type, uint num_elem) {
assert(in->bottom_type()->isa_vect(), "sanity");
const TypeVect* vt = TypeVect::make(T_BOOLEAN, num_elem);
int elem_size = type2aelembytes(in_type);
return new VectorStoreMaskNode(in, gvn.intcon(elem_size), vt);
}
VectorCastNode* VectorCastNode::make(int vopc, Node* n1, BasicType bt, uint vlen) {
const TypeVect* vt = TypeVect::make(bt, vlen);
switch (vopc) {
case Op_VectorCastB2X: return new VectorCastB2XNode(n1, vt);
case Op_VectorCastS2X: return new VectorCastS2XNode(n1, vt);
case Op_VectorCastI2X: return new VectorCastI2XNode(n1, vt);
case Op_VectorCastL2X: return new VectorCastL2XNode(n1, vt);
case Op_VectorCastF2X: return new VectorCastF2XNode(n1, vt);
case Op_VectorCastD2X: return new VectorCastD2XNode(n1, vt);
default:
assert(false, "unknown node: %s", NodeClassNames[vopc]);
return NULL;
}
}
int VectorCastNode::opcode(BasicType bt) {
switch (bt) {
case T_BYTE: return Op_VectorCastB2X;
case T_SHORT: return Op_VectorCastS2X;
case T_INT: return Op_VectorCastI2X;
case T_LONG: return Op_VectorCastL2X;
case T_FLOAT: return Op_VectorCastF2X;
case T_DOUBLE: return Op_VectorCastD2X;
default:
assert(false, "unknown type: %s", type2name(bt));
return 0;
}
}
Node* ReductionNode::make_reduction_input(PhaseGVN& gvn, int opc, BasicType bt) {
int vopc = opcode(opc, bt);
guarantee(vopc != opc, "Vector reduction for '%s' is not implemented", NodeClassNames[opc]);
switch (vopc) {
case Op_AndReductionV:
switch (bt) {
case T_BYTE:
case T_SHORT:
case T_INT:
return gvn.makecon(TypeInt::MINUS_1);
case T_LONG:
return gvn.makecon(TypeLong::MINUS_1);
default:
fatal("Missed vector creation for '%s' as the basic type is not correct.", NodeClassNames[vopc]);
return NULL;
}
break;
case Op_AddReductionVI: // fallthrough
case Op_AddReductionVL: // fallthrough
case Op_AddReductionVF: // fallthrough
case Op_AddReductionVD:
case Op_OrReductionV:
case Op_XorReductionV:
return gvn.zerocon(bt);
case Op_MulReductionVI:
return gvn.makecon(TypeInt::ONE);
case Op_MulReductionVL:
return gvn.makecon(TypeLong::ONE);
case Op_MulReductionVF:
return gvn.makecon(TypeF::ONE);
case Op_MulReductionVD:
return gvn.makecon(TypeD::ONE);
case Op_MinReductionV:
switch (bt) {
case T_BYTE:
case T_SHORT:
case T_INT:
return gvn.makecon(TypeInt::MAX);
case T_LONG:
return gvn.makecon(TypeLong::MAX);
case T_FLOAT:
return gvn.makecon(TypeF::POS_INF);
case T_DOUBLE:
return gvn.makecon(TypeD::POS_INF);
default: Unimplemented(); return NULL;
}
break;
case Op_MaxReductionV:
switch (bt) {
case T_BYTE:
case T_SHORT:
case T_INT:
return gvn.makecon(TypeInt::MIN);
case T_LONG:
return gvn.makecon(TypeLong::MIN);
case T_FLOAT:
return gvn.makecon(TypeF::NEG_INF);
case T_DOUBLE:
return gvn.makecon(TypeD::NEG_INF);
default: Unimplemented(); return NULL;
}
break;
default:
fatal("Missed vector creation for '%s'", NodeClassNames[vopc]);
return NULL;
}
}
bool ReductionNode::implemented(int opc, uint vlen, BasicType bt) {
if (is_java_primitive(bt) &&
(vlen > 1) && is_power_of_2(vlen) &&
@ -824,7 +1091,7 @@ bool ReductionNode::implemented(int opc, uint vlen, BasicType bt) {
}
MacroLogicVNode* MacroLogicVNode::make(PhaseGVN& gvn, Node* in1, Node* in2, Node* in3,
uint truth_table, const TypeVect* vt) {
uint truth_table, const TypeVect* vt) {
assert(truth_table <= 0xFF, "invalid");
assert(in1->bottom_type()->is_vect()->length_in_bytes() == vt->length_in_bytes(), "mismatch");
assert(in2->bottom_type()->is_vect()->length_in_bytes() == vt->length_in_bytes(), "mismatch");
@ -895,3 +1162,51 @@ Node* RotateRightVNode::Ideal(PhaseGVN* phase, bool can_reshape) {
return NULL;
}
#ifndef PRODUCT
void VectorMaskCmpNode::dump_spec(outputStream *st) const {
st->print(" %d #", _predicate); _type->dump_on(st);
}
#endif // PRODUCT
Node* VectorReinterpretNode::Identity(PhaseGVN *phase) {
Node* n = in(1);
if (n->Opcode() == Op_VectorReinterpret) {
if (Type::cmp(bottom_type(), n->in(1)->bottom_type()) == 0) {
return n->in(1);
}
}
return this;
}
Node* VectorInsertNode::make(Node* vec, Node* new_val, int position) {
assert(position < (int)vec->bottom_type()->is_vect()->length(), "pos in range");
ConINode* pos = ConINode::make(position);
return new VectorInsertNode(vec, new_val, pos, vec->bottom_type()->is_vect());
}
Node* VectorUnboxNode::Identity(PhaseGVN *phase) {
Node* n = obj()->uncast();
if (EnableVectorReboxing && n->Opcode() == Op_VectorBox) {
if (Type::cmp(bottom_type(), n->in(VectorBoxNode::Value)->bottom_type()) == 0) {
return n->in(VectorBoxNode::Value);
}
}
return this;
}
const TypeFunc* VectorBoxNode::vec_box_type(const TypeInstPtr* box_type) {
const Type** fields = TypeTuple::fields(0);
const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms, fields);
fields = TypeTuple::fields(1);
fields[TypeFunc::Parms+0] = box_type;
const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+1, fields);
return TypeFunc::make(domain, range);
}
#ifndef PRODUCT
void VectorBoxAllocateNode::dump_spec(outputStream *st) const {
CallStaticJavaNode::dump_spec(st);
}
#endif // !PRODUCT

View file

@ -24,6 +24,7 @@
#ifndef SHARE_OPTO_VECTORNODE_HPP
#define SHARE_OPTO_VECTORNODE_HPP
#include "opto/callnode.hpp"
#include "opto/matcher.hpp"
#include "opto/memnode.hpp"
#include "opto/node.hpp"
@ -68,13 +69,17 @@ class VectorNode : public TypeNode {
virtual uint ideal_reg() const { return Matcher::vector_ideal_reg(vect_type()->length_in_bytes()); }
static VectorNode* scalar2vector(Node* s, uint vlen, const Type* opd_t);
static VectorNode* shift_count(Node* shift, Node* cnt, uint vlen, BasicType bt);
static VectorNode* shift_count(int opc, Node* cnt, uint vlen, BasicType bt);
static VectorNode* make(int opc, Node* n1, Node* n2, uint vlen, BasicType bt);
static VectorNode* make(int vopc, Node* n1, Node* n2, const TypeVect* vt);
static VectorNode* make(int opc, Node* n1, Node* n2, Node* n3, uint vlen, BasicType bt);
static VectorNode* make(int vopc, Node* n1, Node* n2, Node* n3, const TypeVect* vt);
static int opcode(int opc, BasicType bt);
static int replicate_opcode(BasicType bt);
static bool implemented(int opc, uint vlen, BasicType bt);
static bool is_shift(Node* n);
static bool is_vshift_cnt(Node* n);
static bool is_type_transition_short_to_int(Node* n);
static bool is_type_transition_to_int(Node* n);
static bool is_muladds2i(Node* n);
@ -160,9 +165,10 @@ class ReductionNode : public Node {
static ReductionNode* make(int opc, Node *ctrl, Node* in1, Node* in2, BasicType bt);
static int opcode(int opc, BasicType bt);
static bool implemented(int opc, uint vlen, BasicType bt);
static Node* make_reduction_input(PhaseGVN& gvn, int opc, BasicType bt);
virtual const Type* bottom_type() const {
BasicType vbt = in(2)->bottom_type()->is_vect()->element_basic_type();
BasicType vbt = in(1)->bottom_type()->basic_type();
return Type::get_const_basic_type(vbt);
}
@ -172,13 +178,11 @@ class ReductionNode : public Node {
};
//------------------------------AddReductionVINode--------------------------------------
// Vector add int as a reduction
// Vector add byte, short and int as a reduction
class AddReductionVINode : public ReductionNode {
public:
AddReductionVINode(Node * ctrl, Node* in1, Node* in2) : ReductionNode(ctrl, in1, in2) {}
virtual int Opcode() const;
virtual const Type* bottom_type() const { return TypeInt::INT; }
virtual uint ideal_reg() const { return Op_RegI; }
};
//------------------------------AddReductionVLNode--------------------------------------
@ -187,8 +191,6 @@ class AddReductionVLNode : public ReductionNode {
public:
AddReductionVLNode(Node *ctrl, Node* in1, Node* in2) : ReductionNode(ctrl, in1, in2) {}
virtual int Opcode() const;
virtual const Type* bottom_type() const { return TypeLong::LONG; }
virtual uint ideal_reg() const { return Op_RegL; }
};
//------------------------------AddReductionVFNode--------------------------------------
@ -197,8 +199,6 @@ class AddReductionVFNode : public ReductionNode {
public:
AddReductionVFNode(Node *ctrl, Node* in1, Node* in2) : ReductionNode(ctrl, in1, in2) {}
virtual int Opcode() const;
virtual const Type* bottom_type() const { return Type::FLOAT; }
virtual uint ideal_reg() const { return Op_RegF; }
};
//------------------------------AddReductionVDNode--------------------------------------
@ -207,8 +207,6 @@ class AddReductionVDNode : public ReductionNode {
public:
AddReductionVDNode(Node *ctrl, Node* in1, Node* in2) : ReductionNode(ctrl, in1, in2) {}
virtual int Opcode() const;
virtual const Type* bottom_type() const { return Type::DOUBLE; }
virtual uint ideal_reg() const { return Op_RegD; }
};
//------------------------------SubVBNode--------------------------------------
@ -348,13 +346,11 @@ public:
};
//------------------------------MulReductionVINode--------------------------------------
// Vector multiply int as a reduction
// Vector multiply byte, short and int as a reduction
class MulReductionVINode : public ReductionNode {
public:
MulReductionVINode(Node *ctrl, Node* in1, Node* in2) : ReductionNode(ctrl, in1, in2) {}
virtual int Opcode() const;
virtual const Type* bottom_type() const { return TypeInt::INT; }
virtual uint ideal_reg() const { return Op_RegI; }
};
//------------------------------MulReductionVLNode--------------------------------------
@ -363,8 +359,6 @@ class MulReductionVLNode : public ReductionNode {
public:
MulReductionVLNode(Node *ctrl, Node* in1, Node* in2) : ReductionNode(ctrl, in1, in2) {}
virtual int Opcode() const;
virtual const Type* bottom_type() const { return TypeLong::LONG; }
virtual uint ideal_reg() const { return Op_RegI; }
};
//------------------------------MulReductionVFNode--------------------------------------
@ -373,8 +367,6 @@ class MulReductionVFNode : public ReductionNode {
public:
MulReductionVFNode(Node *ctrl, Node* in1, Node* in2) : ReductionNode(ctrl, in1, in2) {}
virtual int Opcode() const;
virtual const Type* bottom_type() const { return Type::FLOAT; }
virtual uint ideal_reg() const { return Op_RegF; }
};
//------------------------------MulReductionVDNode--------------------------------------
@ -383,8 +375,6 @@ class MulReductionVDNode : public ReductionNode {
public:
MulReductionVDNode(Node *ctrl, Node* in1, Node* in2) : ReductionNode(ctrl, in1, in2) {}
virtual int Opcode() const;
virtual const Type* bottom_type() const { return Type::DOUBLE; }
virtual uint ideal_reg() const { return Op_RegD; }
};
//------------------------------DivVFNode--------------------------------------
@ -419,10 +409,26 @@ public:
virtual int Opcode() const;
};
//------------------------------MinVNode--------------------------------------
// Vector Min
class MinVNode : public VectorNode {
public:
MinVNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1, in2, vt) {}
virtual int Opcode() const;
};
//------------------------------MaxVNode--------------------------------------
// Vector Max
class MaxVNode : public VectorNode {
public:
MaxVNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1, in2, vt) {}
virtual int Opcode() const;
};
//------------------------------AbsVINode--------------------------------------
// Vector Abs int
class AbsVINode : public VectorNode {
public:
public:
AbsVINode(Node* in, const TypeVect* vt) : VectorNode(in, vt) {}
virtual int Opcode() const;
};
@ -451,6 +457,14 @@ class AbsVDNode : public VectorNode {
virtual int Opcode() const;
};
//------------------------------NegVINode--------------------------------------
// Vector Neg int
class NegVINode : public VectorNode {
public:
NegVINode(Node* in, const TypeVect* vt) : VectorNode(in, vt) {}
virtual int Opcode() const;
};
//------------------------------NegVFNode--------------------------------------
// Vector Neg float
class NegVFNode : public VectorNode {
@ -618,14 +632,38 @@ class AndVNode : public VectorNode {
virtual int Opcode() const;
};
//------------------------------AndReductionVNode--------------------------------------
// Vector and byte, short, int, long as a reduction
class AndReductionVNode : public ReductionNode {
public:
AndReductionVNode(Node *ctrl, Node* in1, Node* in2) : ReductionNode(ctrl, in1, in2) {}
virtual int Opcode() const;
};
//------------------------------OrVNode---------------------------------------
// Vector or integer
// Vector or byte, short, int, long as a reduction
class OrVNode : public VectorNode {
public:
OrVNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
virtual int Opcode() const;
};
//------------------------------OrReductionVNode--------------------------------------
// Vector xor byte, short, int, long as a reduction
class OrReductionVNode : public ReductionNode {
public:
OrReductionVNode(Node *ctrl, Node* in1, Node* in2) : ReductionNode(ctrl, in1, in2) {}
virtual int Opcode() const;
};
//------------------------------XorReductionVNode--------------------------------------
// Vector and int, long as a reduction
class XorReductionVNode : public ReductionNode {
public:
XorReductionVNode(Node *ctrl, Node* in1, Node* in2) : ReductionNode(ctrl, in1, in2) {}
virtual int Opcode() const;
};
//------------------------------XorVNode---------------------------------------
// Vector xor integer
class XorVNode : public VectorNode {
@ -634,48 +672,8 @@ class XorVNode : public VectorNode {
virtual int Opcode() const;
};
//------------------------------AndReductionVNode--------------------------------------
// Vector and int, long as a reduction
class AndReductionVNode : public ReductionNode {
public:
AndReductionVNode(Node *ctrl, Node* in1, Node* in2) : ReductionNode(ctrl, in1, in2) {}
virtual int Opcode() const;
};
//------------------------------OrReductionVNode--------------------------------------
// Vector or int, long as a reduction
class OrReductionVNode : public ReductionNode {
public:
OrReductionVNode(Node *ctrl, Node* in1, Node* in2) : ReductionNode(ctrl, in1, in2) {}
virtual int Opcode() const;
};
//------------------------------XorReductionVNode--------------------------------------
// Vector xor int, long as a reduction
class XorReductionVNode : public ReductionNode {
public:
XorReductionVNode(Node *ctrl, Node* in1, Node* in2) : ReductionNode(ctrl, in1, in2) {}
virtual int Opcode() const;
};
//------------------------------MinVNode--------------------------------------
// Vector min
class MinVNode : public VectorNode {
public:
MinVNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1, in2, vt) {}
virtual int Opcode() const;
};
//------------------------------MaxVNode--------------------------------------
// Vector max
class MaxVNode : public VectorNode {
public:
MaxVNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1, in2, vt) {}
virtual int Opcode() const;
};
//------------------------------MinReductionVNode--------------------------------------
// Vector min as a reduction
// Vector min byte, short, int, long, float, double as a reduction
class MinReductionVNode : public ReductionNode {
public:
MinReductionVNode(Node *ctrl, Node* in1, Node* in2) : ReductionNode(ctrl, in1, in2) {}
@ -683,7 +681,7 @@ public:
};
//------------------------------MaxReductionVNode--------------------------------------
// Vector max as a reduction
// Vector min byte, short, int, long, float, double as a reduction
class MaxReductionVNode : public ReductionNode {
public:
MaxReductionVNode(Node *ctrl, Node* in1, Node* in2) : ReductionNode(ctrl, in1, in2) {}
@ -720,13 +718,28 @@ class LoadVectorNode : public LoadNode {
uint element_size(void) { return type2aelembytes(vect_type()->element_basic_type()); }
};
//------------------------------LoadVectorGatherNode------------------------------
// Load Vector from memory via index map
class LoadVectorGatherNode : public LoadVectorNode {
public:
LoadVectorGatherNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeVect* vt, Node* indices)
: LoadVectorNode(c, mem, adr, at, vt) {
init_class_id(Class_LoadVectorGather);
assert(indices->bottom_type()->is_vect(), "indices must be in vector");
add_req(indices);
assert(req() == MemNode::ValueIn + 1, "match_edge expects that last input is in MemNode::ValueIn");
}
virtual int Opcode() const;
virtual uint match_edge(uint idx) const { return idx == MemNode::Address || idx == MemNode::ValueIn; }
};
//------------------------------StoreVectorNode--------------------------------
// Store Vector to memory
class StoreVectorNode : public StoreNode {
public:
StoreVectorNode(Node* c, Node* mem, Node* adr, const TypePtr* at, Node* val)
: StoreNode(c, mem, adr, at, val, MemNode::unordered) {
assert(val->is_Vector() || val->is_LoadVector(), "sanity");
init_class_id(Class_StoreVector);
set_mismatched_access();
}
@ -747,6 +760,23 @@ class StoreVectorNode : public StoreNode {
uint element_size(void) { return type2aelembytes(vect_type()->element_basic_type()); }
};
//------------------------------StoreVectorScatterNode------------------------------
// Store Vector into memory via index map
class StoreVectorScatterNode : public StoreVectorNode {
public:
StoreVectorScatterNode(Node* c, Node* mem, Node* adr, const TypePtr* at, Node* val, Node* indices)
: StoreVectorNode(c, mem, adr, at, val) {
init_class_id(Class_StoreVectorScatter);
assert(indices->bottom_type()->is_vect(), "indices must be in vector");
add_req(indices);
assert(req() == MemNode::ValueIn + 2, "match_edge expects that last input is in MemNode::ValueIn+1");
}
virtual int Opcode() const;
virtual uint match_edge(uint idx) const { return idx == MemNode::Address ||
idx == MemNode::ValueIn ||
idx == MemNode::ValueIn + 1; }
};
//=========================Promote_Scalar_to_Vector============================
@ -888,6 +918,12 @@ class Pack2DNode : public PackNode {
};
class VectorLoadConstNode : public VectorNode {
public:
VectorLoadConstNode(Node* in1, const TypeVect* vt) : VectorNode(in1, vt) {}
virtual int Opcode() const;
};
//========================Extract_Scalar_from_Vector===========================
//------------------------------ExtractNode------------------------------------
@ -901,6 +937,7 @@ class ExtractNode : public Node {
uint pos() const { return in(2)->get_int(); }
static Node* make(Node* v, uint position, BasicType bt);
static int opcode(BasicType bt);
};
//------------------------------ExtractBNode-----------------------------------
@ -929,7 +966,7 @@ class ExtractCNode : public ExtractNode {
public:
ExtractCNode(Node* src, ConINode* pos) : ExtractNode(src, pos) {}
virtual int Opcode() const;
virtual const Type *bottom_type() const { return TypeInt::INT; }
virtual const Type *bottom_type() const { return TypeInt::CHAR; }
virtual uint ideal_reg() const { return Op_RegI; }
};
@ -939,7 +976,7 @@ class ExtractSNode : public ExtractNode {
public:
ExtractSNode(Node* src, ConINode* pos) : ExtractNode(src, pos) {}
virtual int Opcode() const;
virtual const Type *bottom_type() const { return TypeInt::INT; }
virtual const Type *bottom_type() const { return TypeInt::SHORT; }
virtual uint ideal_reg() const { return Op_RegI; }
};
@ -1007,6 +1044,286 @@ public:
static MacroLogicVNode* make(PhaseGVN& igvn, Node* in1, Node* in2, Node* in3, uint truth_table, const TypeVect* vt);
};
class VectorMaskCmpNode : public VectorNode {
private:
BoolTest::mask _predicate;
protected:
uint size_of() const { return sizeof(*this); }
public:
VectorMaskCmpNode(BoolTest::mask predicate, Node* in1, Node* in2, ConINode* predicate_node, const TypeVect* vt) :
VectorNode(in1, in2, predicate_node, vt),
_predicate(predicate) {
assert(in1->bottom_type()->is_vect()->element_basic_type() == in2->bottom_type()->is_vect()->element_basic_type(),
"VectorMaskCmp inputs must have same type for elements");
assert(in1->bottom_type()->is_vect()->length() == in2->bottom_type()->is_vect()->length(),
"VectorMaskCmp inputs must have same number of elements");
init_class_id(Class_VectorMaskCmp);
}
virtual int Opcode() const;
virtual uint hash() const { return VectorNode::hash() + _predicate; }
virtual bool cmp( const Node &n ) const {
return VectorNode::cmp(n) && _predicate == ((VectorMaskCmpNode&)n)._predicate;
}
BoolTest::mask get_predicate() { return _predicate; }
#ifndef PRODUCT
virtual void dump_spec(outputStream *st) const;
#endif // !PRODUCT
};
// Used to wrap other vector nodes in order to add masking functionality.
class VectorMaskWrapperNode : public VectorNode {
public:
VectorMaskWrapperNode(Node* vector, Node* mask)
: VectorNode(vector, mask, vector->bottom_type()->is_vect()) {
assert(mask->is_VectorMaskCmp(), "VectorMaskWrapper requires that second argument be a mask");
}
virtual int Opcode() const;
Node* vector_val() const { return in(1); }
Node* vector_mask() const { return in(2); }
};
class VectorTestNode : public Node {
private:
BoolTest::mask _predicate;
protected:
uint size_of() const { return sizeof(*this); }
public:
VectorTestNode( Node *in1, Node *in2, BoolTest::mask predicate) : Node(NULL, in1, in2), _predicate(predicate) {
assert(in1->is_Vector() || in1->is_LoadVector(), "must be vector");
assert(in2->is_Vector() || in2->is_LoadVector(), "must be vector");
assert(in1->bottom_type()->is_vect()->element_basic_type() == in2->bottom_type()->is_vect()->element_basic_type(),
"same type elements are needed");
assert(in1->bottom_type()->is_vect()->length() == in2->bottom_type()->is_vect()->length(),
"same number of elements is needed");
}
virtual int Opcode() const;
virtual uint hash() const { return Node::hash() + _predicate; }
virtual bool cmp( const Node &n ) const {
return Node::cmp(n) && _predicate == ((VectorTestNode&)n)._predicate;
}
virtual const Type *bottom_type() const { return TypeInt::BOOL; }
virtual uint ideal_reg() const { return Op_RegI; } // TODO Should be RegFlags but due to missing comparison flags for BoolTest
// in middle-end, we make it boolean result directly.
BoolTest::mask get_predicate() const { return _predicate; }
};
class VectorBlendNode : public VectorNode {
public:
VectorBlendNode(Node* vec1, Node* vec2, Node* mask)
: VectorNode(vec1, vec2, mask, vec1->bottom_type()->is_vect()) {
// assert(mask->is_VectorMask(), "VectorBlendNode requires that third argument be a mask");
}
virtual int Opcode() const;
Node* vec1() const { return in(1); }
Node* vec2() const { return in(2); }
Node* vec_mask() const { return in(3); }
};
class VectorRearrangeNode : public VectorNode {
public:
VectorRearrangeNode(Node* vec1, Node* shuffle)
: VectorNode(vec1, shuffle, vec1->bottom_type()->is_vect()) {
// assert(mask->is_VectorMask(), "VectorBlendNode requires that third argument be a mask");
}
virtual int Opcode() const;
Node* vec1() const { return in(1); }
Node* vec_shuffle() const { return in(2); }
};
class VectorLoadMaskNode : public VectorNode {
public:
VectorLoadMaskNode(Node* in, const TypeVect* vt)
: VectorNode(in, vt) {
assert(in->is_LoadVector(), "expected load vector");
assert(in->as_LoadVector()->vect_type()->element_basic_type() == T_BOOLEAN, "must be boolean");
}
virtual int Opcode() const;
};
class VectorLoadShuffleNode : public VectorNode {
public:
VectorLoadShuffleNode(Node* in, const TypeVect* vt)
: VectorNode(in, vt) {
assert(in->is_LoadVector(), "expected load vector");
assert(in->as_LoadVector()->vect_type()->element_basic_type() == T_BYTE, "must be BYTE");
}
int GetOutShuffleSize() const { return type2aelembytes(vect_type()->element_basic_type()); }
virtual int Opcode() const;
};
class VectorStoreMaskNode : public VectorNode {
protected:
VectorStoreMaskNode(Node* in1, ConINode* in2, const TypeVect* vt)
: VectorNode(in1, in2, vt) { }
public:
virtual int Opcode() const;
static VectorStoreMaskNode* make(PhaseGVN& gvn, Node* in, BasicType in_type, uint num_elem);
};
// This is intended for use as a simple reinterpret node that has no cast.
class VectorReinterpretNode : public VectorNode {
private:
const TypeVect* _src_vt;
protected:
uint size_of() const { return sizeof(*this); }
public:
VectorReinterpretNode(Node* in, const TypeVect* src_vt, const TypeVect* dst_vt)
: VectorNode(in, dst_vt), _src_vt(src_vt) { }
virtual uint hash() const { return VectorNode::hash() + _src_vt->hash(); }
virtual bool cmp( const Node &n ) const {
return VectorNode::cmp(n) && !Type::cmp(_src_vt,((VectorReinterpretNode&)n)._src_vt);
}
virtual Node *Identity(PhaseGVN *phase);
virtual int Opcode() const;
};
class VectorCastNode : public VectorNode {
public:
VectorCastNode(Node* in, const TypeVect* vt) : VectorNode(in, vt) {}
virtual int Opcode() const;
static VectorCastNode* make(int vopc, Node* n1, BasicType bt, uint vlen);
static int opcode(BasicType bt);
static bool implemented(BasicType bt, uint vlen);
};
class VectorCastB2XNode : public VectorCastNode {
public:
VectorCastB2XNode(Node* in, const TypeVect* vt) : VectorCastNode(in, vt) {
assert(in->bottom_type()->is_vect()->element_basic_type() == T_BYTE, "must be byte");
}
virtual int Opcode() const;
};
class VectorCastS2XNode : public VectorCastNode {
public:
VectorCastS2XNode(Node* in, const TypeVect* vt) : VectorCastNode(in, vt) {
assert(in->bottom_type()->is_vect()->element_basic_type() == T_SHORT, "must be short");
}
virtual int Opcode() const;
};
class VectorCastI2XNode : public VectorCastNode {
public:
VectorCastI2XNode(Node* in, const TypeVect* vt) : VectorCastNode(in, vt) {
assert(in->bottom_type()->is_vect()->element_basic_type() == T_INT, "must be int");
}
virtual int Opcode() const;
};
class VectorCastL2XNode : public VectorCastNode {
public:
VectorCastL2XNode(Node* in, const TypeVect* vt) : VectorCastNode(in, vt) {
assert(in->bottom_type()->is_vect()->element_basic_type() == T_LONG, "must be long");
}
virtual int Opcode() const;
};
class VectorCastF2XNode : public VectorCastNode {
public:
VectorCastF2XNode(Node* in, const TypeVect* vt) : VectorCastNode(in, vt) {
assert(in->bottom_type()->is_vect()->element_basic_type() == T_FLOAT, "must be float");
}
virtual int Opcode() const;
};
class VectorCastD2XNode : public VectorCastNode {
public:
VectorCastD2XNode(Node* in, const TypeVect* vt) : VectorCastNode(in, vt) {
assert(in->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE, "must be double");
}
virtual int Opcode() const;
};
class VectorInsertNode : public VectorNode {
public:
VectorInsertNode(Node* vsrc, Node* new_val, ConINode* pos, const TypeVect* vt) : VectorNode(vsrc, new_val, (Node*)pos, vt) {
assert(pos->get_int() >= 0, "positive constants");
assert(pos->get_int() < (int)vt->length(), "index must be less than vector length");
assert(Type::cmp(vt, vsrc->bottom_type()) == 0, "input and output must be same type");
}
virtual int Opcode() const;
uint pos() const { return in(3)->get_int(); }
static Node* make(Node* vec, Node* new_val, int position);
};
class VectorBoxNode : public Node {
private:
const TypeInstPtr* const _box_type;
const TypeVect* const _vec_type;
public:
enum {
Box = 1,
Value = 2
};
VectorBoxNode(Compile* C, Node* box, Node* val,
const TypeInstPtr* box_type, const TypeVect* vt)
: Node(NULL, box, val), _box_type(box_type), _vec_type(vt) {
init_flags(Flag_is_macro);
C->add_macro_node(this);
}
const TypeInstPtr* box_type() const { assert(_box_type != NULL, ""); return _box_type; };
const TypeVect* vec_type() const { assert(_vec_type != NULL, ""); return _vec_type; };
virtual int Opcode() const;
virtual const Type* bottom_type() const { return _box_type; }
virtual uint ideal_reg() const { return box_type()->ideal_reg(); }
virtual uint size_of() const { return sizeof(*this); }
static const TypeFunc* vec_box_type(const TypeInstPtr* box_type);
};
class VectorBoxAllocateNode : public CallStaticJavaNode {
public:
VectorBoxAllocateNode(Compile* C, const TypeInstPtr* vbox_type)
: CallStaticJavaNode(C, VectorBoxNode::vec_box_type(vbox_type), NULL, NULL, -1) {
init_flags(Flag_is_macro);
C->add_macro_node(this);
}
virtual int Opcode() const;
#ifndef PRODUCT
virtual void dump_spec(outputStream *st) const;
#endif // !PRODUCT
};
class VectorUnboxNode : public VectorNode {
private:
bool _shuffle_to_vector;
protected:
uint size_of() const { return sizeof(*this); }
public:
VectorUnboxNode(Compile* C, const TypeVect* vec_type, Node* obj, Node* mem, bool shuffle_to_vector)
: VectorNode(mem, obj, vec_type) {
_shuffle_to_vector = shuffle_to_vector;
init_flags(Flag_is_macro);
C->add_macro_node(this);
}
virtual int Opcode() const;
Node* obj() const { return in(2); }
Node* mem() const { return in(1); }
virtual Node *Identity(PhaseGVN *phase);
bool is_shuffle_to_vector() { return _shuffle_to_vector; }
};
class RotateRightVNode : public VectorNode {
public:
RotateRightVNode(Node* in1, Node* in2, const TypeVect* vt)

View file

@ -118,6 +118,7 @@ extern "C" {
void JNICALL JVM_RegisterMethodHandleMethods(JNIEnv *env, jclass unsafecls);
void JNICALL JVM_RegisterPerfMethods(JNIEnv *env, jclass perfclass);
void JNICALL JVM_RegisterWhiteBoxMethods(JNIEnv *env, jclass wbclass);
void JNICALL JVM_RegisterVectorSupportMethods(JNIEnv *env, jclass vsclass);
#if INCLUDE_JVMCI
jobject JNICALL JVM_GetJVMCIRuntime(JNIEnv *env, jclass c);
void JNICALL JVM_RegisterJVMCINatives(JNIEnv *env, jclass compilerToVMClass);
@ -132,6 +133,7 @@ static JNINativeMethod lookup_special_native_methods[] = {
{ CC"Java_java_lang_invoke_MethodHandleNatives_registerNatives", NULL, FN_PTR(JVM_RegisterMethodHandleMethods) },
{ CC"Java_jdk_internal_perf_Perf_registerNatives", NULL, FN_PTR(JVM_RegisterPerfMethods) },
{ CC"Java_sun_hotspot_WhiteBox_registerNatives", NULL, FN_PTR(JVM_RegisterWhiteBoxMethods) },
{ CC"Java_jdk_internal_vm_vector_VectorSupport_registerNatives", NULL, FN_PTR(JVM_RegisterVectorSupportMethods)},
#if INCLUDE_JVMCI
{ CC"Java_jdk_vm_ci_runtime_JVMCI_initializeRuntime", NULL, FN_PTR(JVM_GetJVMCIRuntime) },
{ CC"Java_jdk_vm_ci_hotspot_CompilerToVM_registerNatives", NULL, FN_PTR(JVM_RegisterJVMCINatives) },

View file

@ -0,0 +1,429 @@
/*
* Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#include "precompiled.hpp"
#include "jni.h"
#include "jvm.h"
#include "classfile/javaClasses.inline.hpp"
#include "code/location.hpp"
#include "prims/vectorSupport.hpp"
#include "runtime/fieldDescriptor.inline.hpp"
#include "runtime/handles.inline.hpp"
#include "runtime/interfaceSupport.inline.hpp"
#include "runtime/jniHandles.inline.hpp"
#include "runtime/stackValue.hpp"
#ifdef COMPILER2
#include "opto/matcher.hpp" // Matcher::max_vector_size(BasicType)
#endif // COMPILER2
bool VectorSupport::is_vector(Klass* klass) {
return klass->is_subclass_of(SystemDictionary::vector_VectorPayload_klass());
}
bool VectorSupport::is_vector_mask(Klass* klass) {
return klass->is_subclass_of(SystemDictionary::vector_VectorMask_klass());
}
bool VectorSupport::is_vector_shuffle(Klass* klass) {
return klass->is_subclass_of(SystemDictionary::vector_VectorShuffle_klass());
}
BasicType VectorSupport::klass2bt(InstanceKlass* ik) {
assert(ik->is_subclass_of(SystemDictionary::vector_VectorPayload_klass()), "%s not a VectorPayload", ik->name()->as_C_string());
fieldDescriptor fd; // find_field initializes fd if found
// static final Class<?> ETYPE;
Klass* holder = ik->find_field(vmSymbols::ETYPE_name(), vmSymbols::class_signature(), &fd);
assert(holder != NULL, "sanity");
assert(fd.is_static(), "");
assert(fd.offset() > 0, "");
if (is_vector_shuffle(ik)) {
return T_BYTE;
} else { // vector and mask
oop value = ik->java_mirror()->obj_field(fd.offset());
BasicType elem_bt = java_lang_Class::as_BasicType(value);
return elem_bt;
}
}
jint VectorSupport::klass2length(InstanceKlass* ik) {
fieldDescriptor fd; // find_field initializes fd if found
// static final int VLENGTH;
Klass* holder = ik->find_field(vmSymbols::VLENGTH_name(), vmSymbols::int_signature(), &fd);
assert(holder != NULL, "sanity");
assert(fd.is_static(), "");
assert(fd.offset() > 0, "");
jint vlen = ik->java_mirror()->int_field(fd.offset());
assert(vlen > 0, "");
return vlen;
}
void VectorSupport::init_vector_array(typeArrayOop arr, BasicType elem_bt, int num_elem, address value_addr) {
int elem_size = type2aelembytes(elem_bt);
for (int i = 0; i < num_elem; i++) {
switch (elem_bt) {
case T_BYTE: {
jbyte elem_value = *(jbyte*) (value_addr + i * elem_size);
arr->byte_at_put(i, elem_value);
break;
}
case T_SHORT: {
jshort elem_value = *(jshort*) (value_addr + i * elem_size);
arr->short_at_put(i, elem_value);
break;
}
case T_INT: {
jint elem_value = *(jint*) (value_addr + i * elem_size);
arr->int_at_put(i, elem_value);
break;
}
case T_LONG: {
jlong elem_value = *(jlong*) (value_addr + i * elem_size);
arr->long_at_put(i, elem_value);
break;
}
case T_FLOAT: {
jfloat elem_value = *(jfloat*) (value_addr + i * elem_size);
arr->float_at_put(i, elem_value);
break;
}
case T_DOUBLE: {
jdouble elem_value = *(jdouble*) (value_addr + i * elem_size);
arr->double_at_put(i, elem_value);
break;
}
default:
fatal("unsupported: %s", type2name(elem_bt));
}
}
}
void VectorSupport::init_mask_array(typeArrayOop arr, BasicType elem_bt, int num_elem, address value_addr) {
int elem_size = type2aelembytes(elem_bt);
for (int i = 0; i < num_elem; i++) {
switch (elem_bt) {
case T_BYTE: {
jbyte elem_value = *(jbyte*) (value_addr + i * elem_size);
arr->bool_at_put(i, elem_value != 0);
break;
}
case T_SHORT: {
jshort elem_value = *(jshort*) (value_addr + i * elem_size);
arr->bool_at_put(i, elem_value != 0);
break;
}
case T_INT: // fall-through
case T_FLOAT: {
jint elem_value = *(jint*) (value_addr + i * elem_size);
arr->bool_at_put(i, elem_value != 0);
break;
}
case T_LONG: // fall-through
case T_DOUBLE: {
jlong elem_value = *(jlong*) (value_addr + i * elem_size);
arr->bool_at_put(i, elem_value != 0);
break;
}
default:
fatal("unsupported: %s", type2name(elem_bt));
}
}
}
oop VectorSupport::allocate_vector_payload_helper(InstanceKlass* ik, BasicType elem_bt, int num_elem, address value_addr, TRAPS) {
bool is_mask = is_vector_mask(ik);
// On-heap vector values are represented as primitive arrays.
TypeArrayKlass* tak = TypeArrayKlass::cast(Universe::typeArrayKlassObj(is_mask ? T_BOOLEAN : elem_bt));
typeArrayOop arr = tak->allocate(num_elem, CHECK_NULL); // safepoint
if (is_mask) {
init_mask_array(arr, elem_bt, num_elem, value_addr);
} else {
init_vector_array(arr, elem_bt, num_elem, value_addr);
}
return arr;
}
oop VectorSupport::allocate_vector(InstanceKlass* ik, frame* fr, RegisterMap* reg_map, ObjectValue* ov, TRAPS) {
assert(is_vector(ik), "%s not a vector", ik->name()->as_C_string());
assert(ov->field_size() == 1, "%s not a vector", ik->name()->as_C_string());
// Vector value in an aligned adjacent tuple (1, 2, 4, 8, or 16 slots).
LocationValue* loc_value = ov->field_at(0)->as_LocationValue();
BasicType elem_bt = klass2bt(ik);
int num_elem = klass2length(ik);
Handle vbox = ik->allocate_instance_handle(CHECK_NULL);
Location loc = loc_value->location();
oop payload = NULL;
if (loc.type() == Location::vector) {
address value_addr = loc.is_register()
// Value was in a callee-save register
? reg_map->location(VMRegImpl::as_VMReg(loc.register_number()))
// Else value was directly saved on the stack. The frame's original stack pointer,
// before any extension by its callee (due to Compiler1 linkage on SPARC), must be used.
: ((address)fr->unextended_sp()) + loc.stack_offset();
payload = allocate_vector_payload_helper(ik, elem_bt, num_elem, value_addr, CHECK_NULL); // safepoint
} else {
// assert(false, "interesting");
StackValue* value = StackValue::create_stack_value(fr, reg_map, loc_value);
payload = value->get_obj()();
}
vector_VectorPayload::set_payload(vbox(), payload);
return vbox();
}
#ifdef COMPILER2
int VectorSupport::vop2ideal(jint id, BasicType bt) {
VectorOperation vop = (VectorOperation)id;
switch (vop) {
case VECTOR_OP_ADD: {
switch (bt) {
case T_BYTE: // fall-through
case T_SHORT: // fall-through
case T_INT: return Op_AddI;
case T_LONG: return Op_AddL;
case T_FLOAT: return Op_AddF;
case T_DOUBLE: return Op_AddD;
default: fatal("ADD: %s", type2name(bt));
}
break;
}
case VECTOR_OP_SUB: {
switch (bt) {
case T_BYTE: // fall-through
case T_SHORT: // fall-through
case T_INT: return Op_SubI;
case T_LONG: return Op_SubL;
case T_FLOAT: return Op_SubF;
case T_DOUBLE: return Op_SubD;
default: fatal("SUB: %s", type2name(bt));
}
break;
}
case VECTOR_OP_MUL: {
switch (bt) {
case T_BYTE: // fall-through
case T_SHORT: // fall-through
case T_INT: return Op_MulI;
case T_LONG: return Op_MulL;
case T_FLOAT: return Op_MulF;
case T_DOUBLE: return Op_MulD;
default: fatal("MUL: %s", type2name(bt));
}
break;
}
case VECTOR_OP_DIV: {
switch (bt) {
case T_BYTE: // fall-through
case T_SHORT: // fall-through
case T_INT: return Op_DivI;
case T_LONG: return Op_DivL;
case T_FLOAT: return Op_DivF;
case T_DOUBLE: return Op_DivD;
default: fatal("DIV: %s", type2name(bt));
}
break;
}
case VECTOR_OP_MIN: {
switch (bt) {
case T_BYTE:
case T_SHORT:
case T_INT: return Op_MinI;
case T_LONG: return Op_MinL;
case T_FLOAT: return Op_MinF;
case T_DOUBLE: return Op_MinD;
default: fatal("MIN: %s", type2name(bt));
}
break;
}
case VECTOR_OP_MAX: {
switch (bt) {
case T_BYTE:
case T_SHORT:
case T_INT: return Op_MaxI;
case T_LONG: return Op_MaxL;
case T_FLOAT: return Op_MaxF;
case T_DOUBLE: return Op_MaxD;
default: fatal("MAX: %s", type2name(bt));
}
break;
}
case VECTOR_OP_ABS: {
switch (bt) {
case T_BYTE: // fall-through
case T_SHORT: // fall-through
case T_INT: return Op_AbsI;
case T_LONG: return Op_AbsL;
case T_FLOAT: return Op_AbsF;
case T_DOUBLE: return Op_AbsD;
default: fatal("ABS: %s", type2name(bt));
}
break;
}
case VECTOR_OP_NEG: {
switch (bt) {
case T_BYTE: // fall-through
case T_SHORT: // fall-through
case T_INT: return Op_NegI;
case T_FLOAT: return Op_NegF;
case T_DOUBLE: return Op_NegD;
default: fatal("NEG: %s", type2name(bt));
}
break;
}
case VECTOR_OP_AND: {
switch (bt) {
case T_BYTE: // fall-through
case T_SHORT: // fall-through
case T_INT: return Op_AndI;
case T_LONG: return Op_AndL;
default: fatal("AND: %s", type2name(bt));
}
break;
}
case VECTOR_OP_OR: {
switch (bt) {
case T_BYTE: // fall-through
case T_SHORT: // fall-through
case T_INT: return Op_OrI;
case T_LONG: return Op_OrL;
default: fatal("OR: %s", type2name(bt));
}
break;
}
case VECTOR_OP_XOR: {
switch (bt) {
case T_BYTE: // fall-through
case T_SHORT: // fall-through
case T_INT: return Op_XorI;
case T_LONG: return Op_XorL;
default: fatal("XOR: %s", type2name(bt));
}
break;
}
case VECTOR_OP_SQRT: {
switch (bt) {
case T_FLOAT: return Op_SqrtF;
case T_DOUBLE: return Op_SqrtD;
default: fatal("SQRT: %s", type2name(bt));
}
break;
}
case VECTOR_OP_FMA: {
switch (bt) {
case T_FLOAT: return Op_FmaF;
case T_DOUBLE: return Op_FmaD;
default: fatal("FMA: %s", type2name(bt));
}
break;
}
case VECTOR_OP_LSHIFT: {
switch (bt) {
case T_BYTE: // fall-through
case T_SHORT: // fall-through
case T_INT: return Op_LShiftI;
case T_LONG: return Op_LShiftL;
default: fatal("LSHIFT: %s", type2name(bt));
}
break;
}
case VECTOR_OP_RSHIFT: {
switch (bt) {
case T_BYTE: // fall-through
case T_SHORT: // fall-through
case T_INT: return Op_RShiftI;
case T_LONG: return Op_RShiftL;
default: fatal("RSHIFT: %s", type2name(bt));
}
break;
}
case VECTOR_OP_URSHIFT: {
switch (bt) {
case T_BYTE: return Op_URShiftB;
case T_SHORT: return Op_URShiftS;
case T_INT: return Op_URShiftI;
case T_LONG: return Op_URShiftL;
default: fatal("URSHIFT: %s", type2name(bt));
}
break;
}
default: fatal("unknown op: %d", vop);
}
return 0; // Unimplemented
}
#endif // COMPILER2
/**
* Implementation of the jdk.internal.vm.vector.VectorSupport class
*/
JVM_ENTRY(jint, VectorSupport_GetMaxLaneCount(JNIEnv *env, jclass vsclazz, jobject clazz)) {
#ifdef COMPILER2
oop mirror = JNIHandles::resolve_non_null(clazz);
if (java_lang_Class::is_primitive(mirror)) {
BasicType bt = java_lang_Class::primitive_type(mirror);
return Matcher::max_vector_size(bt);
}
#endif // COMPILER2
return -1;
} JVM_END
// JVM_RegisterVectorSupportMethods
#define LANG "Ljava/lang/"
#define CLS LANG "Class;"
#define CC (char*) /*cast a literal from (const char*)*/
#define FN_PTR(f) CAST_FROM_FN_PTR(void*, &f)
static JNINativeMethod jdk_internal_vm_vector_VectorSupport_methods[] = {
{CC "getMaxLaneCount", CC "(" CLS ")I", FN_PTR(VectorSupport_GetMaxLaneCount)}
};
#undef CC
#undef FN_PTR
#undef LANG
#undef CLS
// This function is exported, used by NativeLookup.
JVM_ENTRY(void, JVM_RegisterVectorSupportMethods(JNIEnv* env, jclass vsclass)) {
ThreadToNativeFromVM ttnfv(thread);
int ok = env->RegisterNatives(vsclass, jdk_internal_vm_vector_VectorSupport_methods, sizeof(jdk_internal_vm_vector_VectorSupport_methods)/sizeof(JNINativeMethod));
guarantee(ok == 0, "register jdk.internal.vm.vector.VectorSupport natives");
} JVM_END

View file

@ -0,0 +1,90 @@
/*
* Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#ifndef SHARE_PRIMS_VECTORSUPPORT_HPP
#define SHARE_PRIMS_VECTORSUPPORT_HPP
#include "jni.h"
#include "code/debugInfo.hpp"
#include "memory/allocation.hpp"
#include "oops/typeArrayOop.inline.hpp"
#include "runtime/frame.inline.hpp"
#include "runtime/registerMap.hpp"
#include "utilities/exceptions.hpp"
extern "C" {
void JNICALL JVM_RegisterVectorSupportMethods(JNIEnv* env, jclass vsclass);
}
class VectorSupport : AllStatic {
private:
static void init_mask_array(typeArrayOop arr, BasicType elem_bt, int num_elem, address value_addr);
static void init_vector_array(typeArrayOop arr, BasicType elem_bt, int num_elem, address value_addr);
static oop allocate_vector_payload_helper(InstanceKlass* ik, BasicType elem_bt, int num_elem, address value_addr, TRAPS);
static BasicType klass2bt(InstanceKlass* ik);
static jint klass2length(InstanceKlass* ik);
public:
// Should be aligned with constants in jdk.internal.vm.vector.VectorSupport
enum VectorOperation {
// Unary
VECTOR_OP_ABS = 0,
VECTOR_OP_NEG = 1,
VECTOR_OP_SQRT = 2,
// Binary
VECTOR_OP_ADD = 4,
VECTOR_OP_SUB = 5,
VECTOR_OP_MUL = 6,
VECTOR_OP_DIV = 7,
VECTOR_OP_MIN = 8,
VECTOR_OP_MAX = 9,
VECTOR_OP_AND = 10,
VECTOR_OP_OR = 11,
VECTOR_OP_XOR = 12,
// Ternary
VECTOR_OP_FMA = 13,
// Broadcast int
VECTOR_OP_LSHIFT = 14,
VECTOR_OP_RSHIFT = 15,
VECTOR_OP_URSHIFT = 16,
// Convert
VECTOR_OP_CAST = 17,
VECTOR_OP_REINTERPRET = 18
};
static int vop2ideal(jint vop, BasicType bt);
static oop allocate_vector(InstanceKlass* holder, frame* fr, RegisterMap* reg_map, ObjectValue* sv, TRAPS);
static bool is_vector(Klass* klass);
static bool is_vector_mask(Klass* klass);
static bool is_vector_shuffle(Klass* klass);
};
#endif // SHARE_PRIMS_VECTORSUPPORT_HPP

View file

@ -4195,7 +4195,23 @@ jint Arguments::apply_ergo() {
if (!UseBiasedLocking) {
UseOptoBiasInlining = false;
}
#endif
if (!EnableVectorSupport) {
if (!FLAG_IS_DEFAULT(EnableVectorReboxing) && EnableVectorReboxing) {
warning("Disabling EnableVectorReboxing since EnableVectorSupport is turned off.");
}
FLAG_SET_DEFAULT(EnableVectorReboxing, false);
if (!FLAG_IS_DEFAULT(EnableVectorAggressiveReboxing) && EnableVectorAggressiveReboxing) {
if (!EnableVectorReboxing) {
warning("Disabling EnableVectorAggressiveReboxing since EnableVectorReboxing is turned off.");
} else {
warning("Disabling EnableVectorAggressiveReboxing since EnableVectorSupport is turned off.");
}
}
FLAG_SET_DEFAULT(EnableVectorAggressiveReboxing, false);
}
#endif // COMPILER2
if (FLAG_IS_CMDLINE(DiagnoseSyncOnPrimitiveWrappers)) {
if (DiagnoseSyncOnPrimitiveWrappers == ObjectSynchronizer::LOG_WARNING && !log_is_enabled(Info, primitivewrappers)) {

View file

@ -49,6 +49,7 @@
#include "oops/typeArrayOop.inline.hpp"
#include "oops/verifyOopClosure.hpp"
#include "prims/jvmtiThreadState.hpp"
#include "prims/vectorSupport.hpp"
#include "prims/methodHandles.hpp"
#include "runtime/atomic.hpp"
#include "runtime/biasedLocking.hpp"
@ -1015,7 +1016,15 @@ bool Deoptimization::realloc_objects(JavaThread* thread, frame* fr, RegisterMap*
#endif // INCLUDE_JVMCI || INCLUDE_AOT
InstanceKlass* ik = InstanceKlass::cast(k);
if (obj == NULL) {
#ifdef COMPILER2
if (EnableVectorSupport && VectorSupport::is_vector(ik)) {
obj = VectorSupport::allocate_vector(ik, fr, reg_map, sv, THREAD);
} else {
obj = ik->allocate_instance(THREAD);
}
#else
obj = ik->allocate_instance(THREAD);
#endif // COMPILER2
}
} else if (k->is_typeArray_klass()) {
TypeArrayKlass* ak = TypeArrayKlass::cast(k);
@ -1352,6 +1361,11 @@ void Deoptimization::reassign_fields(frame* fr, RegisterMap* reg_map, GrowableAr
continue;
}
#endif // INCLUDE_JVMCI || INCLUDE_AOT
#ifdef COMPILER2
if (EnableVectorSupport && VectorSupport::is_vector(k)) {
continue; // skip field reassignment for vectors
}
#endif
if (k->is_instance_klass()) {
InstanceKlass* ik = InstanceKlass::cast(k);
reassign_fields_by_klass(ik, fr, reg_map, sv, 0, obj(), skip_internal);

View file

@ -150,8 +150,12 @@ StackValue* StackValue::create_stack_value(const frame* fr, const RegisterMap* r
value.ji = *(jint*)value_addr;
return new StackValue(value.p);
}
case Location::invalid:
case Location::invalid: {
return new StackValue();
}
case Location::vector: {
ShouldNotReachHere(); // should be handled by Deoptimization::realloc_objects()
}
default:
ShouldNotReachHere();
}
@ -222,7 +226,7 @@ void StackValue::print_on(outputStream* st) const {
st->print("NULL");
}
st->print(" <" INTPTR_FORMAT ">", p2i(_handle_value()));
break;
break;
case T_CONFLICT:
st->print("conflict");

View file

@ -1502,6 +1502,8 @@ typedef HashtableEntry<InstanceKlass*, mtClass> KlassHashtableEntry;
declare_c2_type(MaxNode, AddNode) \
declare_c2_type(MaxINode, MaxNode) \
declare_c2_type(MinINode, MaxNode) \
declare_c2_type(MaxLNode, MaxNode) \
declare_c2_type(MinLNode, MaxNode) \
declare_c2_type(MaxFNode, MaxNode) \
declare_c2_type(MinFNode, MaxNode) \
declare_c2_type(MaxDNode, MaxNode) \
@ -1736,6 +1738,8 @@ typedef HashtableEntry<InstanceKlass*, mtClass> KlassHashtableEntry;
declare_c2_type(AbsDNode, AbsNode) \
declare_c2_type(CmpLTMaskNode, Node) \
declare_c2_type(NegNode, Node) \
declare_c2_type(NegINode, NegNode) \
declare_c2_type(NegLNode, NegNode) \
declare_c2_type(NegFNode, NegNode) \
declare_c2_type(NegDNode, NegNode) \
declare_c2_type(AtanDNode, Node) \
@ -1745,10 +1749,12 @@ typedef HashtableEntry<InstanceKlass*, mtClass> KlassHashtableEntry;
declare_c2_type(ReverseBytesLNode, Node) \
declare_c2_type(ReductionNode, Node) \
declare_c2_type(VectorNode, Node) \
declare_c2_type(AbsVBNode, VectorNode) \
declare_c2_type(AbsVSNode, VectorNode) \
declare_c2_type(AbsVINode, VectorNode) \
declare_c2_type(AbsVLNode, VectorNode) \
declare_c2_type(AbsVFNode, VectorNode) \
declare_c2_type(AbsVDNode, VectorNode) \
declare_c2_type(AbsVBNode, VectorNode) \
declare_c2_type(AbsVSNode, VectorNode) \
declare_c2_type(AbsVINode, VectorNode) \
declare_c2_type(AbsVLNode, VectorNode) \
declare_c2_type(AddVBNode, VectorNode) \
declare_c2_type(AddVSNode, VectorNode) \
declare_c2_type(AddVINode, VectorNode) \
@ -1774,6 +1780,7 @@ typedef HashtableEntry<InstanceKlass*, mtClass> KlassHashtableEntry;
declare_c2_type(MulVFNode, VectorNode) \
declare_c2_type(MulReductionVFNode, ReductionNode) \
declare_c2_type(MulVDNode, VectorNode) \
declare_c2_type(NegVINode, VectorNode) \
declare_c2_type(NegVFNode, VectorNode) \
declare_c2_type(NegVDNode, VectorNode) \
declare_c2_type(FmaVDNode, VectorNode) \
@ -1796,6 +1803,8 @@ typedef HashtableEntry<InstanceKlass*, mtClass> KlassHashtableEntry;
declare_c2_type(URShiftVSNode, VectorNode) \
declare_c2_type(URShiftVINode, VectorNode) \
declare_c2_type(URShiftVLNode, VectorNode) \
declare_c2_type(MinReductionVNode, ReductionNode) \
declare_c2_type(MaxReductionVNode, ReductionNode) \
declare_c2_type(AndVNode, VectorNode) \
declare_c2_type(AndReductionVNode, ReductionNode) \
declare_c2_type(OrVNode, VectorNode) \
@ -1804,8 +1813,6 @@ typedef HashtableEntry<InstanceKlass*, mtClass> KlassHashtableEntry;
declare_c2_type(XorReductionVNode, ReductionNode) \
declare_c2_type(MaxVNode, VectorNode) \
declare_c2_type(MinVNode, VectorNode) \
declare_c2_type(MaxReductionVNode, ReductionNode) \
declare_c2_type(MinReductionVNode, ReductionNode) \
declare_c2_type(LoadVectorNode, LoadNode) \
declare_c2_type(StoreVectorNode, StoreNode) \
declare_c2_type(ReplicateBNode, VectorNode) \
@ -1847,6 +1854,27 @@ typedef HashtableEntry<InstanceKlass*, mtClass> KlassHashtableEntry;
declare_c2_type(CopySignFNode, Node) \
declare_c2_type(SignumDNode, Node) \
declare_c2_type(SignumFNode, Node) \
declare_c2_type(LoadVectorGatherNode, LoadVectorNode) \
declare_c2_type(StoreVectorScatterNode, StoreVectorNode) \
declare_c2_type(VectorLoadMaskNode, VectorNode) \
declare_c2_type(VectorLoadShuffleNode, VectorNode) \
declare_c2_type(VectorStoreMaskNode, VectorNode) \
declare_c2_type(VectorBlendNode, VectorNode) \
declare_c2_type(VectorRearrangeNode, VectorNode) \
declare_c2_type(VectorMaskWrapperNode, VectorNode) \
declare_c2_type(VectorMaskCmpNode, VectorNode) \
declare_c2_type(VectorCastB2XNode, VectorNode) \
declare_c2_type(VectorCastS2XNode, VectorNode) \
declare_c2_type(VectorCastI2XNode, VectorNode) \
declare_c2_type(VectorCastL2XNode, VectorNode) \
declare_c2_type(VectorCastF2XNode, VectorNode) \
declare_c2_type(VectorCastD2XNode, VectorNode) \
declare_c2_type(VectorInsertNode, VectorNode) \
declare_c2_type(VectorUnboxNode, VectorNode) \
declare_c2_type(VectorReinterpretNode, VectorNode) \
declare_c2_type(VectorBoxNode, Node) \
declare_c2_type(VectorBoxAllocateNode, CallStaticJavaNode) \
declare_c2_type(VectorTestNode, Node) \
\
/*********************/ \
/* Adapter Blob Entries */ \

View file

@ -237,6 +237,9 @@ inline size_t heap_word_size(size_t byte_size) {
return (byte_size + (HeapWordSize-1)) >> LogHeapWordSize;
}
inline jfloat jfloat_cast(jint x);
inline jdouble jdouble_cast(jlong x);
//-------------------------------------------
// Constant for jlong (standardized by C++11)
@ -247,6 +250,13 @@ inline size_t heap_word_size(size_t byte_size) {
const jlong min_jlong = CONST64(0x8000000000000000);
const jlong max_jlong = CONST64(0x7fffffffffffffff);
//-------------------------------------------
// Constant for jdouble
const jlong min_jlongDouble = CONST64(0x0000000000000001);
const jdouble min_jdouble = jdouble_cast(min_jlongDouble);
const jlong max_jlongDouble = CONST64(0x7fefffffffffffff);
const jdouble max_jdouble = jdouble_cast(max_jlongDouble);
const size_t K = 1024;
const size_t M = K*K;
const size_t G = M*K;
@ -469,6 +479,11 @@ const jshort max_jshort = (1 << 15) - 1; // largest jshort
const jint min_jint = (jint)1 << (sizeof(jint)*BitsPerByte-1); // 0x80000000 == smallest jint
const jint max_jint = (juint)min_jint - 1; // 0x7FFFFFFF == largest jint
const jint min_jintFloat = (jint)(0x00000001);
const jfloat min_jfloat = jfloat_cast(min_jintFloat);
const jint max_jintFloat = (jint)(0x7f7fffff);
const jfloat max_jfloat = jfloat_cast(max_jintFloat);
//----------------------------------------------------------------------------------------------------
// JVM spec restrictions
@ -673,6 +688,14 @@ inline bool is_reference_type(BasicType t) {
return (t == T_OBJECT || t == T_ARRAY);
}
inline bool is_integral_type(BasicType t) {
return is_subword_type(t) || t == T_INT || t == T_LONG;
}
inline bool is_floating_point_type(BasicType t) {
return (t == T_FLOAT || t == T_DOUBLE);
}
extern char type2char_tab[T_CONFLICT+1]; // Map a BasicType to a jchar
inline char type2char(BasicType t) { return (uint)t < T_CONFLICT+1 ? type2char_tab[t] : 0; }
extern int type2size[T_CONFLICT+1]; // Map BasicType to result stack elements

View file

@ -0,0 +1,468 @@
/*
* Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package jdk.internal.vm.vector;
import jdk.internal.vm.annotation.IntrinsicCandidate;
import jdk.internal.misc.Unsafe;
import jdk.internal.vm.annotation.ForceInline;
import java.nio.Buffer;
import java.nio.ByteBuffer;
import java.util.Objects;
import java.util.function.*;
public class VectorSupport {
static {
registerNatives();
}
private static final Unsafe U = Unsafe.getUnsafe();
// Unary
public static final int VECTOR_OP_ABS = 0;
public static final int VECTOR_OP_NEG = 1;
public static final int VECTOR_OP_SQRT = 2;
// Binary
public static final int VECTOR_OP_ADD = 4;
public static final int VECTOR_OP_SUB = 5;
public static final int VECTOR_OP_MUL = 6;
public static final int VECTOR_OP_DIV = 7;
public static final int VECTOR_OP_MIN = 8;
public static final int VECTOR_OP_MAX = 9;
public static final int VECTOR_OP_AND = 10;
public static final int VECTOR_OP_OR = 11;
public static final int VECTOR_OP_XOR = 12;
// Ternary
public static final int VECTOR_OP_FMA = 13;
// Broadcast int
public static final int VECTOR_OP_LSHIFT = 14;
public static final int VECTOR_OP_RSHIFT = 15;
public static final int VECTOR_OP_URSHIFT = 16;
public static final int VECTOR_OP_CAST = 17;
public static final int VECTOR_OP_REINTERPRET = 18;
// enum BoolTest
public static final int BT_eq = 0;
public static final int BT_ne = 4;
public static final int BT_le = 5;
public static final int BT_ge = 7;
public static final int BT_lt = 3;
public static final int BT_gt = 1;
public static final int BT_overflow = 2;
public static final int BT_no_overflow = 6;
// BasicType codes, for primitives only:
public static final int
T_FLOAT = 6,
T_DOUBLE = 7,
T_BYTE = 8,
T_SHORT = 9,
T_INT = 10,
T_LONG = 11;
/* ============================================================================ */
public static class VectorSpecies<E> {}
public static class VectorPayload {
private final Object payload; // array of primitives
public VectorPayload(Object payload) {
this.payload = payload;
}
protected final Object getPayload() {
return VectorSupport.maybeRebox(this).payload;
}
}
public static class Vector<E> extends VectorPayload {
public Vector(Object payload) {
super(payload);
}
}
public static class VectorShuffle<E> extends VectorPayload {
public VectorShuffle(Object payload) {
super(payload);
}
}
public static class VectorMask<E> extends VectorPayload {
public VectorMask(Object payload) {
super(payload);
}
}
/* ============================================================================ */
public interface BroadcastOperation<VM, E, S extends VectorSpecies<E>> {
VM broadcast(long l, S s);
}
@IntrinsicCandidate
public static
<VM, E, S extends VectorSpecies<E>>
VM broadcastCoerced(Class<? extends VM> vmClass, Class<E> E, int length,
long bits, S s,
BroadcastOperation<VM, E, S> defaultImpl) {
assert isNonCapturingLambda(defaultImpl) : defaultImpl;
return defaultImpl.broadcast(bits, s);
}
/* ============================================================================ */
public interface ShuffleIotaOperation<E, S extends VectorSpecies<E>> {
VectorShuffle<E> apply(int length, int start, int step, S s);
}
@IntrinsicCandidate
public static
<E, S extends VectorSpecies<E>>
VectorShuffle<E> shuffleIota(Class<?> E, Class<?> ShuffleClass, S s, int length,
int start, int step, int wrap, ShuffleIotaOperation<E, S> defaultImpl) {
assert isNonCapturingLambda(defaultImpl) : defaultImpl;
return defaultImpl.apply(length, start, step, s);
}
public interface ShuffleToVectorOperation<VM, Sh, E> {
VM apply(Sh s);
}
@IntrinsicCandidate
public static
<VM ,Sh extends VectorShuffle<E>, E>
VM shuffleToVector(Class<?> VM, Class<?>E , Class<?> ShuffleClass, Sh s, int length,
ShuffleToVectorOperation<VM,Sh,E> defaultImpl) {
assert isNonCapturingLambda(defaultImpl) : defaultImpl;
return defaultImpl.apply(s);
}
/* ============================================================================ */
public interface IndexOperation<V extends Vector<E>, E, S extends VectorSpecies<E>> {
V index(V v, int step, S s);
}
//FIXME @IntrinsicCandidate
public static
<V extends Vector<E>, E, S extends VectorSpecies<E>>
V indexVector(Class<? extends V> vClass, Class<E> E, int length,
V v, int step, S s,
IndexOperation<V, E, S> defaultImpl) {
assert isNonCapturingLambda(defaultImpl) : defaultImpl;
return defaultImpl.index(v, step, s);
}
/* ============================================================================ */
@IntrinsicCandidate
public static
<V extends Vector<?>>
long reductionCoerced(int oprId, Class<?> vectorClass, Class<?> elementType, int length,
V v,
Function<V,Long> defaultImpl) {
assert isNonCapturingLambda(defaultImpl) : defaultImpl;
return defaultImpl.apply(v);
}
/* ============================================================================ */
public interface VecExtractOp<V> {
long apply(V v1, int idx);
}
@IntrinsicCandidate
public static
<V extends Vector<?>>
long extract(Class<?> vectorClass, Class<?> elementType, int vlen,
V vec, int ix,
VecExtractOp<V> defaultImpl) {
assert isNonCapturingLambda(defaultImpl) : defaultImpl;
return defaultImpl.apply(vec, ix);
}
/* ============================================================================ */
public interface VecInsertOp<V> {
V apply(V v1, int idx, long val);
}
@IntrinsicCandidate
public static
<V extends Vector<?>>
V insert(Class<? extends V> vectorClass, Class<?> elementType, int vlen,
V vec, int ix, long val,
VecInsertOp<V> defaultImpl) {
assert isNonCapturingLambda(defaultImpl) : defaultImpl;
return defaultImpl.apply(vec, ix, val);
}
/* ============================================================================ */
@IntrinsicCandidate
public static
<VM>
VM unaryOp(int oprId, Class<? extends VM> vmClass, Class<?> elementType, int length,
VM vm,
Function<VM, VM> defaultImpl) {
assert isNonCapturingLambda(defaultImpl) : defaultImpl;
return defaultImpl.apply(vm);
}
/* ============================================================================ */
@IntrinsicCandidate
public static
<VM>
VM binaryOp(int oprId, Class<? extends VM> vmClass, Class<?> elementType, int length,
VM vm1, VM vm2,
BiFunction<VM, VM, VM> defaultImpl) {
assert isNonCapturingLambda(defaultImpl) : defaultImpl;
return defaultImpl.apply(vm1, vm2);
}
/* ============================================================================ */
public interface TernaryOperation<V> {
V apply(V v1, V v2, V v3);
}
@IntrinsicCandidate
public static
<VM>
VM ternaryOp(int oprId, Class<? extends VM> vmClass, Class<?> elementType, int length,
VM vm1, VM vm2, VM vm3,
TernaryOperation<VM> defaultImpl) {
assert isNonCapturingLambda(defaultImpl) : defaultImpl;
return defaultImpl.apply(vm1, vm2, vm3);
}
/* ============================================================================ */
// Memory operations
public interface LoadOperation<C, V, E, S extends VectorSpecies<E>> {
V load(C container, int index, S s);
}
@IntrinsicCandidate
public static
<C, VM, E, S extends VectorSpecies<E>>
VM load(Class<? extends VM> vmClass, Class<E> E, int length,
Object base, long offset, // Unsafe addressing
C container, int index, S s, // Arguments for default implementation
LoadOperation<C, VM, E, S> defaultImpl) {
assert isNonCapturingLambda(defaultImpl) : defaultImpl;
return defaultImpl.load(container, index, s);
}
/* ============================================================================ */
public interface LoadVectorOperationWithMap<C, V extends Vector<?>, E, S extends VectorSpecies<E>> {
V loadWithMap(C container, int index, int[] indexMap, int indexM, S s);
}
@IntrinsicCandidate
public static
<C, V extends Vector<?>, W extends Vector<Integer>, E, S extends VectorSpecies<E>>
V loadWithMap(Class<?> vectorClass, Class<E> E, int length, Class<?> vectorIndexClass,
Object base, long offset, // Unsafe addressing
W index_vector,
C container, int index, int[] indexMap, int indexM, S s, // Arguments for default implementation
LoadVectorOperationWithMap<C, V, E, S> defaultImpl) {
assert isNonCapturingLambda(defaultImpl) : defaultImpl;
return defaultImpl.loadWithMap(container, index, indexMap, indexM, s);
}
/* ============================================================================ */
public interface StoreVectorOperation<C, V extends Vector<?>> {
void store(C container, int index, V v);
}
@IntrinsicCandidate
public static
<C, V extends Vector<?>>
void store(Class<?> vectorClass, Class<?> elementType, int length,
Object base, long offset, // Unsafe addressing
V v,
C container, int index, // Arguments for default implementation
StoreVectorOperation<C, V> defaultImpl) {
assert isNonCapturingLambda(defaultImpl) : defaultImpl;
defaultImpl.store(container, index, v);
}
/* ============================================================================ */
public interface StoreVectorOperationWithMap<C, V extends Vector<?>> {
void storeWithMap(C container, int index, V v, int[] indexMap, int indexM);
}
@IntrinsicCandidate
public static
<C, V extends Vector<?>, W extends Vector<Integer>>
void storeWithMap(Class<?> vectorClass, Class<?> elementType, int length, Class<?> vectorIndexClass,
Object base, long offset, // Unsafe addressing
W index_vector, V v,
C container, int index, int[] indexMap, int indexM, // Arguments for default implementation
StoreVectorOperationWithMap<C, V> defaultImpl) {
assert isNonCapturingLambda(defaultImpl) : defaultImpl;
defaultImpl.storeWithMap(container, index, v, indexMap, indexM);
}
/* ============================================================================ */
@IntrinsicCandidate
public static
<VM>
boolean test(int cond, Class<?> vmClass, Class<?> elementType, int length,
VM vm1, VM vm2,
BiFunction<VM, VM, Boolean> defaultImpl) {
assert isNonCapturingLambda(defaultImpl) : defaultImpl;
return defaultImpl.apply(vm1, vm2);
}
/* ============================================================================ */
public interface VectorCompareOp<V,M> {
M apply(int cond, V v1, V v2);
}
@IntrinsicCandidate
public static <V extends Vector<E>,
M extends VectorMask<E>,
E>
M compare(int cond, Class<? extends V> vectorClass, Class<M> maskClass, Class<?> elementType, int length,
V v1, V v2,
VectorCompareOp<V,M> defaultImpl) {
assert isNonCapturingLambda(defaultImpl) : defaultImpl;
return defaultImpl.apply(cond, v1, v2);
}
/* ============================================================================ */
public interface VectorRearrangeOp<V extends Vector<E>,
Sh extends VectorShuffle<E>,
E> {
V apply(V v1, Sh shuffle);
}
@IntrinsicCandidate
public static
<V extends Vector<E>,
Sh extends VectorShuffle<E>,
E>
V rearrangeOp(Class<? extends V> vectorClass, Class<Sh> shuffleClass, Class<?> elementType, int vlen,
V v1, Sh sh,
VectorRearrangeOp<V,Sh, E> defaultImpl) {
assert isNonCapturingLambda(defaultImpl) : defaultImpl;
return defaultImpl.apply(v1, sh);
}
/* ============================================================================ */
public interface VectorBlendOp<V extends Vector<E>,
M extends VectorMask<E>,
E> {
V apply(V v1, V v2, M mask);
}
@IntrinsicCandidate
public static
<V extends Vector<E>,
M extends VectorMask<E>,
E>
V blend(Class<? extends V> vectorClass, Class<M> maskClass, Class<?> elementType, int length,
V v1, V v2, M m,
VectorBlendOp<V,M, E> defaultImpl) {
assert isNonCapturingLambda(defaultImpl) : defaultImpl;
return defaultImpl.apply(v1, v2, m);
}
/* ============================================================================ */
public interface VectorBroadcastIntOp<V extends Vector<?>> {
V apply(V v, int n);
}
@IntrinsicCandidate
public static
<V extends Vector<?>>
V broadcastInt(int opr, Class<? extends V> vectorClass, Class<?> elementType, int length,
V v, int n,
VectorBroadcastIntOp<V> defaultImpl) {
assert isNonCapturingLambda(defaultImpl) : defaultImpl;
return defaultImpl.apply(v, n);
}
/* ============================================================================ */
public interface VectorConvertOp<VOUT, VIN, S> {
VOUT apply(VIN v, S species);
}
// Users of this intrinsic assume that it respects
// REGISTER_ENDIAN, which is currently ByteOrder.LITTLE_ENDIAN.
// See javadoc for REGISTER_ENDIAN.
@IntrinsicCandidate
public static <VOUT extends VectorPayload,
VIN extends VectorPayload,
S extends VectorSpecies<?>>
VOUT convert(int oprId,
Class<?> fromVectorClass, Class<?> fromElementType, int fromVLen,
Class<?> toVectorClass, Class<?> toElementType, int toVLen,
VIN v, S s,
VectorConvertOp<VOUT, VIN, S> defaultImpl) {
assert isNonCapturingLambda(defaultImpl) : defaultImpl;
return defaultImpl.apply(v, s);
}
/* ============================================================================ */
@IntrinsicCandidate
public static <V> V maybeRebox(V v) {
// The fence is added here to avoid memory aliasing problems in C2 between scalar & vector accesses.
// TODO: move the fence generation into C2. Generate only when reboxing is taking place.
U.loadFence();
return v;
}
/* ============================================================================ */
// query the JVM's supported vector sizes and types
public static native int getMaxLaneCount(Class<?> etype);
/* ============================================================================ */
public static boolean isNonCapturingLambda(Object o) {
return o.getClass().getDeclaredFields().length == 0;
}
/* ============================================================================ */
private static native int registerNatives();
}

View file

@ -138,9 +138,10 @@ module java.base {
jdk.incubator.foreign;
exports com.sun.security.ntlm to
java.security.sasl;
exports jdk.internal to
exports jdk.internal to // for @HotSpotIntrinsicCandidate
java.compiler,
jdk.compiler,
jdk.incubator.vector,
jdk.jshell;
exports jdk.internal.access to
java.desktop,
@ -195,6 +196,7 @@ module java.base {
jdk.attach,
jdk.charsets,
jdk.compiler,
jdk.incubator.vector,
jdk.jfr,
jdk.jshell,
jdk.nio.mapmode,
@ -228,9 +230,12 @@ module java.base {
jdk.management.agent;
exports jdk.internal.vm.annotation to
jdk.internal.vm.ci,
jdk.incubator.vector,
jdk.incubator.foreign,
jdk.jfr,
jdk.unsupported;
exports jdk.internal.vm.vector to
jdk.incubator.vector;
exports jdk.internal.util to
jdk.incubator.foreign;
exports jdk.internal.util.jar to

View file

@ -0,0 +1,290 @@
/*
* Copyright (c) 2017, 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
* particular file as subject to the "Classpath" exception as provided
* by Oracle in the LICENSE file that accompanied this code.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package jdk.incubator.vector;
import jdk.internal.vm.annotation.ForceInline;
import static jdk.incubator.vector.VectorOperators.*;
abstract class AbstractMask<E> extends VectorMask<E> {
AbstractMask(boolean[] bits) {
super(bits);
}
/*package-private*/
abstract boolean[] getBits();
// Unary operator
interface MUnOp {
boolean apply(int i, boolean a);
}
abstract AbstractMask<E> uOp(MUnOp f);
// Binary operator
interface MBinOp {
boolean apply(int i, boolean a, boolean b);
}
abstract AbstractMask<E> bOp(VectorMask<E> o, MBinOp f);
/*package-private*/
abstract AbstractSpecies<E> vspecies();
@Override
@ForceInline
public final VectorSpecies<E> vectorSpecies() {
return vspecies();
}
@Override
public boolean laneIsSet(int i) {
return getBits()[i];
}
@Override
public long toLong() {
// FIXME: This should be an intrinsic.
if (length() > Long.SIZE) {
throw new UnsupportedOperationException("too many lanes for one long");
}
long res = 0;
long set = 1;
boolean[] bits = getBits();
for (int i = 0; i < bits.length; i++) {
res = bits[i] ? res | set : res;
set = set << 1;
}
return res;
}
@Override
public void intoArray(boolean[] bits, int i) {
System.arraycopy(getBits(), 0, bits, i, length());
}
@Override
public boolean[] toArray() {
return getBits().clone();
}
@Override
@ForceInline
@SuppressWarnings("unchecked")
public
<F> VectorMask<F> check(Class<F> elementType) {
if (vectorSpecies().elementType() != elementType) {
throw AbstractSpecies.checkFailed(this, elementType);
}
return (VectorMask<F>) this;
}
@Override
@ForceInline
@SuppressWarnings("unchecked")
public
<F> VectorMask<F> check(VectorSpecies<F> species) {
if (species != vectorSpecies()) {
throw AbstractSpecies.checkFailed(this, species);
}
return (VectorMask<F>) this;
}
@Override
public int trueCount() {
//FIXME: use a population count intrinsic here
int c = 0;
for (boolean i : getBits()) {
if (i) c++;
}
return c;
}
@Override
public int firstTrue() {
//FIXME: use a count trailing zeros intrinsic here
boolean[] bits = getBits();
for (int i = 0; i < bits.length; i++) {
if (bits[i]) return i;
}
return bits.length;
}
@Override
public int lastTrue() {
//FIXME: use a count leading zeros intrinsic here
boolean[] bits = getBits();
for (int i = bits.length-1; i >= 0; i--) {
if (bits[i]) return i;
}
return -1;
}
@Override
public VectorMask<E> eq(VectorMask<E> m) {
// FIXME: Generate good code here.
return bOp(m, (i, a, b) -> a == b);
}
@Override
public VectorMask<E> andNot(VectorMask<E> m) {
// FIXME: Generate good code here.
return bOp(m, (i, a, b) -> a && !b);
}
/*package-private*/
static boolean anyTrueHelper(boolean[] bits) {
// FIXME: Maybe use toLong() != 0 here.
for (boolean i : bits) {
if (i) return true;
}
return false;
}
/*package-private*/
static boolean allTrueHelper(boolean[] bits) {
// FIXME: Maybe use not().toLong() == 0 here.
for (boolean i : bits) {
if (!i) return false;
}
return true;
}
@Override
@ForceInline
public VectorMask<E> indexInRange(int offset, int limit) {
int vlength = length();
Vector<E> iota = vectorSpecies().zero().addIndex(1);
VectorMask<E> badMask = checkIndex0(offset, limit, iota, vlength);
return this.andNot(badMask);
}
/*package-private*/
@ForceInline
AbstractVector<E>
toVectorTemplate() {
AbstractSpecies<E> vsp = vspecies();
Vector<E> zero = vsp.broadcast(0);
Vector<E> mone = vsp.broadcast(-1);
// -1 will result in the most significant bit being set in
// addition to some or all other lane bits.
// For integral types, *all* lane bits will be set.
// The bits for -1.0 are like {0b10111*0000*}.
// FIXME: Use a conversion intrinsic for this operation.
// https://bugs.openjdk.java.net/browse/JDK-8225740
return (AbstractVector<E>) zero.blend(mone, this);
}
/**
* Test if a masked memory access at a given offset into an array
* of the given length will stay within the array.
* The per-lane offsets are iota*esize.
*/
/*package-private*/
@ForceInline
void checkIndexByLane(int offset, int alength,
Vector<E> iota,
int esize) {
if (VectorIntrinsics.VECTOR_ACCESS_OOB_CHECK == 0) {
return;
}
// Although the specification is simple, the implementation is
// tricky, because the value iota*esize might possibly
// overflow. So we calculate our test values as scalars,
// clipping to the range [-1..VLENGTH], and test them against
// the unscaled iota vector, whose values are in [0..VLENGTH-1].
int vlength = length();
VectorMask<E> badMask;
if (esize == 1) {
badMask = checkIndex0(offset, alength, iota, vlength);
} else if (offset >= 0) {
// Masked access to multi-byte lanes in byte array.
// It could be aligned anywhere.
int elemCount = Math.min(vlength, (alength - offset) / esize);
badMask = checkIndex0(0, elemCount, iota, vlength);
} else {
// This requires a split test.
int clipOffset = Math.max(offset, -(vlength * esize));
int elemCount = Math.min(vlength, (alength - clipOffset) / esize);
badMask = checkIndex0(0, elemCount, iota, vlength);
clipOffset &= (esize - 1); // power of two, so OK
VectorMask<E> badMask2 = checkIndex0(clipOffset / esize, vlength,
iota, vlength);
badMask = badMask.or(badMask2);
}
badMask = badMask.and(this);
if (badMask.anyTrue()) {
int badLane = badMask.firstTrue();
throw ((AbstractMask<E>)badMask)
.checkIndexFailed(offset, badLane, alength, esize);
}
}
private
@ForceInline
VectorMask<E> checkIndex0(int offset, int alength,
Vector<E> iota, int vlength) {
// An active lane is bad if its number is greater than
// alength-offset, since when added to offset it will step off
// of the end of the array. To avoid overflow when
// converting, clip the comparison value to [0..vlength]
// inclusive.
int indexLimit = Math.max(0, Math.min(alength - offset, vlength));
VectorMask<E> badMask =
iota.compare(GE, iota.broadcast(indexLimit));
if (offset < 0) {
// An active lane is bad if its number is less than
// -offset, because when added to offset it will then
// address an array element at a negative index. To avoid
// overflow when converting, clip the comparison value at
// vlength. This specific expression works correctly even
// when offset is Integer.MIN_VALUE.
int firstGoodIndex = -Math.max(offset, -vlength);
VectorMask<E> badMask2 =
iota.compare(LT, iota.broadcast(firstGoodIndex));
if (indexLimit >= vlength) {
badMask = badMask2; // 1st badMask is all true
} else {
badMask = badMask.or(badMask2);
}
}
return badMask;
}
private IndexOutOfBoundsException checkIndexFailed(int offset, int lane,
int alength, int esize) {
String msg = String.format("Masked range check failed: "+
"vector mask %s out of bounds at "+
"index %d+%d in array of length %d",
this, offset, lane * esize, alength);
if (esize != 1) {
msg += String.format(" (each lane spans %d array elements)", esize);
}
throw new IndexOutOfBoundsException(msg);
}
}

View file

@ -0,0 +1,246 @@
/*
* Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
* particular file as subject to the "Classpath" exception as provided
* by Oracle in the LICENSE file that accompanied this code.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package jdk.incubator.vector;
import java.util.function.IntUnaryOperator;
import jdk.internal.vm.annotation.ForceInline;
abstract class AbstractShuffle<E> extends VectorShuffle<E> {
static final IntUnaryOperator IDENTITY = i -> i;
// Internal representation allows for a maximum index of 256
// Values are clipped to [-VLENGTH..VLENGTH-1].
AbstractShuffle(int length, byte[] reorder) {
super(reorder);
assert(length == reorder.length);
assert(indexesInRange(reorder));
}
AbstractShuffle(int length, int[] reorder) {
this(length, reorder, 0);
}
AbstractShuffle(int length, int[] reorder, int offset) {
super(prepare(length, reorder, offset));
}
AbstractShuffle(int length, IntUnaryOperator f) {
super(prepare(length, f));
}
private static byte[] prepare(int length, int[] reorder, int offset) {
byte[] a = new byte[length];
for (int i = 0; i < length; i++) {
int si = reorder[offset + i];
si = partiallyWrapIndex(si, length);
a[i] = (byte) si;
}
return a;
}
private static byte[] prepare(int length, IntUnaryOperator f) {
byte[] a = new byte[length];
for (int i = 0; i < a.length; i++) {
int si = f.applyAsInt(i);
si = partiallyWrapIndex(si, length);
a[i] = (byte) si;
}
return a;
}
byte[] reorder() {
return (byte[])getPayload();
}
/*package-private*/
abstract AbstractSpecies<E> vspecies();
@Override
@ForceInline
public final VectorSpecies<E> vectorSpecies() {
return vspecies();
}
@Override
@ForceInline
public void intoArray(int[] a, int offset) {
byte[] reorder = reorder();
int vlen = reorder.length;
for (int i = 0; i < vlen; i++) {
int sourceIndex = reorder[i];
assert(sourceIndex >= -vlen && sourceIndex < vlen);
a[offset + i] = sourceIndex;
}
}
@Override
@ForceInline
public int[] toArray() {
byte[] reorder = reorder();
int[] a = new int[reorder.length];
intoArray(a, 0);
return a;
}
/*package-private*/
@ForceInline
final
AbstractVector<E>
toVectorTemplate() {
// Note that the values produced by laneSource
// are already clipped. At this point we convert
// them from internal ints (or bytes) into the ETYPE.
// FIXME: Use a conversion intrinsic for this operation.
// https://bugs.openjdk.java.net/browse/JDK-8225740
return (AbstractVector<E>) vspecies().fromIntValues(toArray());
}
@ForceInline
public final VectorShuffle<E> checkIndexes() {
// FIXME: vectorize this
for (int index : reorder()) {
if (index < 0) {
throw checkIndexFailed(index, length());
}
}
return this;
}
@ForceInline
public final VectorShuffle<E> wrapIndexes() {
// FIXME: vectorize this
byte[] reorder = reorder();
int length = reorder.length;
for (int index : reorder) {
if (index < 0) {
return wrapAndRebuild(reorder);
}
}
return this;
}
@ForceInline
public final VectorShuffle<E> wrapAndRebuild(byte[] oldReorder) {
int length = oldReorder.length;
byte[] reorder = new byte[length];
for (int i = 0; i < length; i++) {
int si = oldReorder[i];
// FIXME: This does not work unless it's a power of 2.
if ((length & (length - 1)) == 0) {
si += si & length; // power-of-two optimization
} else if (si < 0) {
// non-POT code requires a conditional add
si += length;
}
assert(si >= 0 && si < length);
reorder[i] = (byte) si;
}
return vspecies().dummyVector().shuffleFromBytes(reorder);
}
@ForceInline
public final VectorMask<E> laneIsValid() {
// FIXME: vectorize this
byte[] reorder = reorder();
int length = reorder.length;
boolean[] bits = new boolean[length];
for (int i = 0; i < length; i++) {
if (reorder[i] >= 0) {
bits[i] = true;
}
}
return vspecies().dummyVector().maskFromArray(bits);
}
@Override
@ForceInline
@SuppressWarnings("unchecked")
public final
<F> VectorShuffle<F> check(VectorSpecies<F> species) {
if (species != vectorSpecies()) {
throw AbstractSpecies.checkFailed(this, species);
}
return (VectorShuffle<F>) this;
}
@Override
@ForceInline
public final int checkIndex(int index) {
return checkIndex0(index, length(), (byte)1);
}
@Override
@ForceInline
public final int wrapIndex(int index) {
return checkIndex0(index, length(), (byte)0);
}
/** Return invalid indexes partially wrapped
* mod VLENGTH to negative values.
*/
/*package-private*/
@ForceInline
static
int partiallyWrapIndex(int index, int laneCount) {
return checkIndex0(index, laneCount, (byte)-1);
}
/*package-private*/
@ForceInline
static int checkIndex0(int index, int laneCount, byte mode) {
int wrapped = VectorIntrinsics.wrapToRange(index, laneCount);
if (mode == 0 || wrapped == index) {
return wrapped;
}
if (mode < 0) {
return wrapped - laneCount; // special mode for internal storage
}
throw checkIndexFailed(index, laneCount);
}
private static IndexOutOfBoundsException checkIndexFailed(int index, int laneCount) {
int max = laneCount - 1;
String msg = "required an index in [0.."+max+"] but found "+index;
return new IndexOutOfBoundsException(msg);
}
static boolean indexesInRange(byte[] reorder) {
int length = reorder.length;
for (byte si : reorder) {
if (si >= length || si < -length) {
boolean assertsEnabled = false;
assert(assertsEnabled = true);
if (assertsEnabled) {
String msg = ("index "+si+"out of range ["+length+"] in "+
java.util.Arrays.toString(reorder));
throw new AssertionError(msg);
}
return false;
}
}
return true;
}
}

View file

@ -0,0 +1,658 @@
/*
* Copyright (c) 2017, 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
* particular file as subject to the "Classpath" exception as provided
* by Oracle in the LICENSE file that accompanied this code.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package jdk.incubator.vector;
import jdk.internal.vm.annotation.ForceInline;
import jdk.internal.vm.annotation.Stable;
import java.nio.ByteOrder;
import java.lang.reflect.Array;
import java.util.Arrays;
import java.util.function.Function;
import java.util.function.IntUnaryOperator;
abstract class AbstractSpecies<E> extends jdk.internal.vm.vector.VectorSupport.VectorSpecies<E>
implements VectorSpecies<E> {
@Stable
final VectorShape vectorShape;
@Stable
final LaneType laneType;
@Stable
final int laneCount;
@Stable
final int laneCountLog2P1;
@Stable
final Class<? extends AbstractVector<E>> vectorType;
@Stable
final Class<? extends AbstractMask<E>> maskType;
@Stable
final Function<Object, ? extends AbstractVector<E>> vectorFactory;
@Stable
final VectorShape indexShape;
@Stable
final int maxScale, minScale;
@Stable
final int vectorBitSize, vectorByteSize;
AbstractSpecies(VectorShape vectorShape,
LaneType laneType,
Class<? extends AbstractVector<E>> vectorType,
Class<? extends AbstractMask<E>> maskType,
Function<Object, ? extends AbstractVector<E>> vectorFactory) {
this.vectorShape = vectorShape;
this.laneType = laneType;
this.vectorType = vectorType;
this.maskType = maskType;
this.vectorFactory = vectorFactory;
// derived values:
int bitSize = vectorShape.vectorBitSize();
int byteSize = bitSize / Byte.SIZE;
assert(byteSize * 8 == bitSize);
this.vectorBitSize = bitSize;
this.vectorByteSize = byteSize;
int elementSize = laneType.elementSize;
this.laneCount = bitSize / elementSize;
assert(laneCount > 0); // could be 1 for mono-vector (double in v64)
this.laneCountLog2P1 = Integer.numberOfTrailingZeros(laneCount) + 1;
// Note: The shape might be the max-shape,
// if there is no vector this large.
int indexBitSize = Integer.SIZE * laneCount;
this.indexShape = VectorShape.forIndexBitSize(indexBitSize, elementSize);
// What are the largest and smallest scale factors that,
// when multiplied times the elements in [0..VLENGTH],
// inclusive, do not overflow the ETYPE?
int precision = laneType.elementPrecision;
if (precision >= Integer.SIZE) {
// No overflow possible from int*int.
this.maxScale = Integer.MAX_VALUE;
this.minScale = Integer.MIN_VALUE;
} else {
boolean isfp = (laneType.elementKind == 'F');
long x = laneCount;
long maxScale = ((1L << precision)-(isfp?0:1)) / x;
long minScale = (-1L << precision) / x;
this.maxScale = (int) maxScale;
this.minScale = (int) minScale;
}
}
@Stable //lazy JIT constant
AbstractSpecies<Integer> indexSpecies;
@Stable //lazy JIT constant
AbstractShuffle<Byte> swapBytesShuffle;
@Stable //lazy JIT constant
AbstractVector<E> dummyVector;
@Override
@ForceInline
public final int length() {
return laneCount;
}
// Inside the implementation we use the more descriptive
// term laneCount:
/*package-private*/
@ForceInline
final int laneCount() {
return laneCount;
}
/*package-private*/
@ForceInline
final int laneCountLog2() {
return laneCountLog2P1 - 1; // subtract one from stable value
}
@Override
@ForceInline
@SuppressWarnings("unchecked")
//NOT FINAL: SPECIALIZED
public Class<E> elementType() {
return (Class<E>) laneType.elementType;
}
// FIXME: appeal to general method (see https://bugs.openjdk.java.net/browse/JDK-6176992)
// replace usages of this method and remove
@ForceInline
@SuppressWarnings("unchecked")
//NOT FINAL: SPECIALIZED
Class<E> genericElementType() {
return (Class<E>) laneType.genericElementType;
}
@Override
@ForceInline
//NOT FINAL: SPECIALIZED
public Class<? extends AbstractVector<E>> vectorType() {
return vectorType;
}
@Override
@ForceInline
public final Class<? extends AbstractMask<E>> maskType() {
return maskType;
}
@Override
@ForceInline
public final int elementSize() {
return laneType.elementSize;
}
/*package-private*/
@ForceInline
final int elementByteSize() {
return laneType.elementSize / Byte.SIZE;
}
@Override
@ForceInline
public final VectorShape vectorShape() {
return vectorShape;
}
@ForceInline
/*package-private*/
final VectorShape indexShape() {
return indexShape;
}
@Override
@ForceInline
public final int vectorBitSize() {
return vectorBitSize;
}
@Override
@ForceInline
public final int vectorByteSize() {
return vectorByteSize;
}
@Override
@ForceInline
public final int loopBound(int length) {
return VectorIntrinsics.roundDown(length, laneCount);
}
@Override
@ForceInline
public final VectorMask<E> indexInRange(int offset, int limit) {
return maskAll(true).indexInRange(offset, limit);
}
@Override
@ForceInline
public final <F> VectorSpecies<F> withLanes(Class<F> newType) {
return withLanes(LaneType.of(newType)).check(newType);
}
@ForceInline
/*package-private*/
final
AbstractSpecies<?> withLanes(LaneType newType) {
if (newType == laneType) return this;
return findSpecies(newType, vectorShape);
}
@ForceInline
/*package-private*/
AbstractSpecies<?> asIntegral() {
return withLanes(laneType.asIntegral());
}
@ForceInline
/*package-private*/
AbstractSpecies<?> asFloating() {
return withLanes(laneType.asFloating());
}
@Override
@ForceInline
@SuppressWarnings("unchecked")
public final VectorSpecies<E> withShape(VectorShape newShape) {
if (newShape == vectorShape) return this;
return (VectorSpecies<E>) findSpecies(laneType, newShape);
}
@ForceInline
/*package-private*/
AbstractSpecies<Integer> indexSpecies() {
// This JITs to a constant value:
AbstractSpecies<Integer> sp = indexSpecies;
if (sp != null) return sp;
return indexSpecies = findSpecies(LaneType.INT, indexShape).check0(int.class);
}
@ForceInline
/*package-private*/
@SuppressWarnings("unchecked")
AbstractSpecies<Byte> byteSpecies() {
// This JITs to a constant value:
return (AbstractSpecies<Byte>) withLanes(LaneType.BYTE);
}
@ForceInline
/*package-private*/
AbstractShuffle<Byte> swapBytesShuffle() {
// This JITs to a constant value:
AbstractShuffle<Byte> sh = swapBytesShuffle;
if (sh != null) return sh;
return swapBytesShuffle = makeSwapBytesShuffle();
}
private AbstractShuffle<Byte> makeSwapBytesShuffle() {
int vbytes = vectorByteSize();
int lbytes = elementByteSize();
int[] sourceIndexes = new int[vbytes];
for (int i = 0; i < vbytes; i++) {
sourceIndexes[i] = i ^ (lbytes-1);
}
return (AbstractShuffle<Byte>)
VectorShuffle.fromValues(byteSpecies(), sourceIndexes);
}
/*package-private*/
abstract Vector<E> fromIntValues(int[] values);
/**
* Do not use a dummy except to call methods on it when you don't
* care about the lane values. The main benefit of it is to
* populate the type profile, which then allows the JIT to derive
* constant values for dummy.species(), the current species, and
* then for all of its attributes: ETYPE, VLENGTH, VSHAPE, etc.
*/
@ForceInline
/*package-private*/
AbstractVector<E> dummyVector() {
// This JITs to a constant value:
AbstractVector<E> dummy = dummyVector;
if (dummy != null) return dummy;
// The rest of this computation is probably not JIT-ted.
return makeDummyVector();
}
private AbstractVector<E> makeDummyVector() {
Object za = Array.newInstance(elementType(), laneCount);
return dummyVector = vectorFactory.apply(za);
// This is the only use of vectorFactory.
// All other factory requests are routed
// through the dummy vector.
}
/**
* Build a mask by directly calling its constructor.
* It is an error if the array is aliased elsewhere.
*/
@ForceInline
/*package-private*/
AbstractMask<E> maskFactory(boolean[] bits) {
return dummyVector().maskFromArray(bits);
}
public final
@Override
@ForceInline
VectorShuffle<E> shuffleFromArray(int[] sourceIndexes, int offset) {
return dummyVector().shuffleFromArray(sourceIndexes, offset);
}
public final
@Override
@ForceInline
VectorShuffle<E> shuffleFromValues(int... sourceIndexes) {
return dummyVector().shuffleFromArray(sourceIndexes, 0);
}
public final
@Override
@ForceInline
VectorShuffle<E> shuffleFromOp(IntUnaryOperator fn) {
return dummyVector().shuffleFromOp(fn);
}
public final
@Override
@ForceInline
VectorShuffle<E> iotaShuffle(int start, int step, boolean wrap) {
AbstractShuffle<E> res;
if (start == 0 && step == 1)
return dummyVector().iotaShuffle();
else
return dummyVector().iotaShuffle(start, step, wrap);
}
@ForceInline
@Override
public final Vector<E> fromByteArray(byte[] a, int offset, ByteOrder bo) {
return dummyVector()
.fromByteArray0(a, offset)
.maybeSwap(bo);
}
@Override
public VectorMask<E> loadMask(boolean[] bits, int offset) {
return VectorMask.fromArray(this, bits, offset);
}
// Define zero and iota when we know the ETYPE and VSHAPE.
public abstract AbstractVector<E> zero();
/*package-private*/ abstract AbstractVector<E> iota();
// Constructing vectors from raw bits.
/*package-private*/
abstract long longToElementBits(long e);
/*package-private*/
abstract AbstractVector<E> broadcastBits(long bits);
/*package-private*/
final IllegalArgumentException badElementBits(long iv, Object cv) {
String msg = String.format("Vector creation failed: "+
"value %s cannot be represented in ETYPE %s"+
"; result of cast is %s",
iv,
elementType(),
cv);
return new IllegalArgumentException(msg);
}
/*package-private*/
static
final IllegalArgumentException badArrayBits(Object iv,
boolean isInt,
long cv) {
String msg = String.format("Array creation failed: "+
"lane value %s cannot be represented in %s"+
"; result of cast is %s",
iv,
(isInt ? "int" : "long"),
cv);
return new IllegalArgumentException(msg);
}
/*package-private*/
Object iotaArray() {
// Create an iota array. It's OK if this is really slow,
// because it happens only once per species.
Object ia = Array.newInstance(laneType.elementType,
laneCount);
assert(ia.getClass() == laneType.arrayType);
checkValue(laneCount-1); // worst case
for (int i = 0; i < laneCount; i++) {
if ((byte)i == i)
Array.setByte(ia, i, (byte)i);
else if ((short)i == i)
Array.setShort(ia, i, (short)i);
else
Array.setInt(ia, i, i);
assert(Array.getDouble(ia, i) == i);
}
return ia;
}
@ForceInline
/*package-private*/
void checkScale(int scale) {
if (scale > 0) {
if (scale <= maxScale) return;
} else { // scale <= 0
if (scale >= minScale) return;
}
throw checkScaleFailed(scale);
}
private IllegalArgumentException checkScaleFailed(int scale) {
String msg = String.format("%s: cannot represent VLENGTH*%d",
this, scale);
return new IllegalArgumentException(msg);
}
/*package-private*/
interface RVOp {
long apply(int i); // supply raw element bits
}
/*package-private*/
abstract AbstractVector<E> rvOp(RVOp f);
/*package-private*/
interface FOpm {
boolean apply(int i);
}
AbstractMask<E> opm(FOpm f) {
boolean[] res = new boolean[laneCount];
for (int i = 0; i < res.length; i++) {
res[i] = f.apply(i);
}
return dummyVector().maskFromArray(res);
}
@Override
@ForceInline
public final
<F> VectorSpecies<F> check(Class<F> elementType) {
return check0(elementType);
}
@ForceInline
@SuppressWarnings("unchecked")
/*package-private*/ final
<F> AbstractSpecies<F> check0(Class<F> elementType) {
if (elementType != this.elementType()) {
throw AbstractSpecies.checkFailed(this, elementType);
}
return (AbstractSpecies<F>) this;
}
@ForceInline
/*package-private*/
AbstractSpecies<E> check(LaneType laneType) {
if (laneType != this.laneType) {
throw AbstractSpecies.checkFailed(this, laneType);
}
return this;
}
@Override
@ForceInline
public int partLimit(VectorSpecies<?> toSpecies, boolean lanewise) {
AbstractSpecies<?> rsp = (AbstractSpecies<?>) toSpecies;
int inSizeLog2 = this.vectorShape.vectorBitSizeLog2;
int outSizeLog2 = rsp.vectorShape.vectorBitSizeLog2;
if (lanewise) {
inSizeLog2 += (rsp.laneType.elementSizeLog2 -
this.laneType.elementSizeLog2);
}
int diff = (inSizeLog2 - outSizeLog2);
// Let's try a branch-free version of this.
int sign = (diff >> -1);
//d = Math.abs(diff);
//d = (sign == 0 ? diff : sign == -1 ? 1 + ~diff);
int d = (diff ^ sign) - sign;
// Compute sgn(diff) << abs(diff), but replace 1 by 0.
return ((sign | 1) << d) & ~1;
}
/**
* Helper for throwing CheckCastExceptions,
* used by the various Vector*.check(*) methods.
*/
/*package-private*/
static ClassCastException checkFailed(Object what, Object required) {
// Find a species for the thing that's failing.
AbstractSpecies<?> whatSpecies = null;
String where;
if (what instanceof VectorSpecies) {
whatSpecies = (AbstractSpecies<?>) what;
where = whatSpecies.toString();
} else if (what instanceof Vector) {
whatSpecies = (AbstractSpecies<?>) ((Vector<?>) what).species();
where = "a Vector<"+whatSpecies.genericElementType()+">";
} else if (what instanceof VectorMask) {
whatSpecies = (AbstractSpecies<?>) ((VectorMask<?>) what).vectorSpecies();
where = "a VectorMask<"+whatSpecies.genericElementType()+">";
} else if (what instanceof VectorShuffle) {
whatSpecies = (AbstractSpecies<?>) ((VectorShuffle<?>) what).vectorSpecies();
where = "a VectorShuffle<"+whatSpecies.genericElementType()+">";
} else {
where = what.toString();
}
Object found = null;
if (whatSpecies != null) {
if (required instanceof VectorSpecies) {
// required is a VectorSpecies; found the wrong species
found = whatSpecies;
} else if (required instanceof Vector) {
// same VectorSpecies required; found the wrong species
found = whatSpecies;
required = ((Vector<?>)required).species();
} else if (required instanceof Class) {
// required is a Class; found the wrong ETYPE
Class<?> requiredClass = (Class<?>) required;
LaneType requiredType = LaneType.forClassOrNull(requiredClass);
found = whatSpecies.elementType();
if (requiredType == null) {
required = required + " (not a valid lane type)";
} else if (!requiredClass.isPrimitive()) {
required = required + " (should be " + requiredType + ")";
}
} else if (required instanceof LaneType) {
// required is a LaneType; found the wrong ETYPE
required = ((LaneType) required).elementType;
found = whatSpecies.elementType();
} else if (required instanceof Integer) {
// required is a length; species has wrong VLENGTH
required = required + " lanes";
found = whatSpecies.length();
}
}
if (found == null) found = "bad value";
String msg = where+": required "+required+" but found "+found;
return new ClassCastException(msg);
}
private static final @Stable AbstractSpecies<?>[][] CACHES
= new AbstractSpecies<?>[LaneType.SK_LIMIT][VectorShape.SK_LIMIT];
// Helper functions for finding species:
/*package-private*/
@ForceInline
static <E>
AbstractSpecies<E> findSpecies(Class<E> elementType,
LaneType laneType,
VectorShape shape) {
assert(elementType == laneType.elementType);
return findSpecies(laneType, shape).check0(elementType);
}
/*package-private*/
@ForceInline
static
AbstractSpecies<?> findSpecies(LaneType laneType,
VectorShape shape) {
// The JIT can see into this cache.
// Therefore it is useful to arrange for constant
// arguments to this method. If the cache
// is full when the JIT runs, the cache item becomes
// a compile-time constant. And then all the @Stable
// fields of the AbstractSpecies are also constants.
AbstractSpecies<?> s = CACHES[laneType.switchKey][shape.switchKey];
if (s != null) return s;
return computeSpecies(laneType, shape);
}
private static
AbstractSpecies<?> computeSpecies(LaneType laneType,
VectorShape shape) {
AbstractSpecies<?> s = null;
// enum-switches don't optimize properly JDK-8161245
switch (laneType.switchKey) {
case LaneType.SK_FLOAT:
s = FloatVector.species(shape); break;
case LaneType.SK_DOUBLE:
s = DoubleVector.species(shape); break;
case LaneType.SK_BYTE:
s = ByteVector.species(shape); break;
case LaneType.SK_SHORT:
s = ShortVector.species(shape); break;
case LaneType.SK_INT:
s = IntVector.species(shape); break;
case LaneType.SK_LONG:
s = LongVector.species(shape); break;
}
if (s == null) {
// NOTE: The result of this method is guaranteed to be
// non-null. Later calls to ".check" also ensure this.
// If this method hits a NPE, it is because a helper
// method EVector.species() has returned a null value, and
// that is because a SPECIES_X static constant has not yet
// been initialized. And that, in turn, is because
// somebody is calling this method way too early during
// bootstrapping.
throw new AssertionError("bootstrap problem");
}
assert(s.laneType == laneType) : s + "!=" + laneType;
assert(s.vectorShape == shape) : s + "!=" + shape;
CACHES[laneType.switchKey][shape.switchKey] = s;
return s;
}
@Override
public final String toString() {
return "Species["+laneType+", "+laneCount+", "+vectorShape+"]";
}
@Override
public final boolean equals(Object obj) {
if (obj instanceof AbstractSpecies) {
AbstractSpecies<?> that = (AbstractSpecies<?>) obj;
return (this.laneType == that.laneType &&
this.laneCount == that.laneCount &&
this.vectorShape == that.vectorShape);
}
return this == obj;
}
/**
* Returns a hash code value for the shuffle,
* based on the lane source indexes and the vector species.
*
* @return a hash code value for this shuffle
*/
@Override
public final int hashCode() {
int[] a = { laneType.ordinal(), laneCount, vectorShape.ordinal() };
return Arrays.hashCode(a);
}
}

View file

@ -0,0 +1,726 @@
/*
* Copyright (c) 2019, 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
* particular file as subject to the "Classpath" exception as provided
* by Oracle in the LICENSE file that accompanied this code.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package jdk.incubator.vector;
import jdk.internal.vm.annotation.ForceInline;
import jdk.internal.vm.vector.VectorSupport;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.util.function.IntUnaryOperator;
import static jdk.incubator.vector.VectorOperators.*;
@SuppressWarnings("cast")
abstract class AbstractVector<E> extends Vector<E> {
/**
* The order of vector bytes when stored in natural,
* array elements of the same lane type.
* This is the also the behavior of the
* VectorSupport load/store instructions.
* If these instructions gain the capability to do
* byte swapping on the fly, add a bit to those
* instructions, but let this polarity be the
* "neutral" or "default" setting of the bit.
*/
/*package-private*/
static final ByteOrder NATIVE_ENDIAN = ByteOrder.nativeOrder();
/**
* The order of vector bytes as stored in the register
* file. This becomes visible with the asRaw[Type]Vector
* operations, which convert between the internal byte-wise
* representation and the typed lane-wise representation.
* It is very possible for a platform to have big-endian
* memory layout and little-endian register layout,
* so this is a different setting from NATIVE_ENDIAN.
* In fact, both Intel and ARM use LE conventions here.
* Future work may be needed for resolutely BE platforms.
*/
/*package-private*/
static final ByteOrder REGISTER_ENDIAN = ByteOrder.LITTLE_ENDIAN;
/*package-private*/
AbstractVector(Object bits) {
super(bits);
}
// Extractors
/*package-private*/
abstract AbstractSpecies<E> vspecies();
@Override
@ForceInline
public final VectorSpecies<E> species() {
return vspecies();
}
// Something to make types match up better:
@Override
@ForceInline
public final
<F> Vector<F> check(VectorSpecies<F> species) {
return check0(species);
}
@ForceInline
@SuppressWarnings("unchecked")
/*package-private*/ final
<F> AbstractVector<F> check0(VectorSpecies<F> species) {
if (!sameSpecies(species)) {
throw AbstractSpecies.checkFailed(this, species);
}
return (AbstractVector<F>) this;
}
/**
* {@inheritDoc} <!--workaround-->
*/
@Override
@ForceInline
public final
<F> Vector<F> check(Class<F> elementType) {
return check0(elementType);
}
@ForceInline
@SuppressWarnings("unchecked")
/*package-private*/ final
<F> AbstractVector<F> check0(Class<F> elementType) {
if (this.elementType() != elementType) {
throw AbstractSpecies.checkFailed(this, elementType);
}
return (AbstractVector<F>) this;
}
@ForceInline
@SuppressWarnings("unchecked")
/*package-private*/ final
<F> AbstractVector<F> check(Vector<F> other) {
if (!sameSpecies(other)) {
throw AbstractSpecies.checkFailed(this, other);
}
return (AbstractVector<F>) this;
}
@ForceInline
private boolean sameSpecies(Vector<?> other) {
// It's simpler and faster to do a class check.
boolean same = (this.getClass() == other.getClass());
// Make sure it works, too!
assert(same == (this.species() == other.species())) : same;
return same;
}
@ForceInline
private boolean sameSpecies(VectorSpecies<?> species) {
// It's simpler and faster to do a class check,
// even if you have to load a dummy vector.
AbstractVector<?> other = ((AbstractSpecies<?>)species).dummyVector();
boolean same = (this.getClass() == other.getClass());
// Make sure it works, too!
assert(same == (this.species() == species)) : same;
return same;
}
/**
* {@inheritDoc} <!--workaround-->
*/
@Override
@ForceInline
public final VectorMask<E> maskAll(boolean bit) {
return species().maskAll(bit);
}
// Make myself into a vector of the same shape
// and same information content but different lane type
/*package-private*/
abstract AbstractVector<?> asVectorRaw(LaneType laneType);
// Make myself into a byte vector of the same shape
/*package-private*/
abstract ByteVector asByteVectorRaw();
/*package-private*/
@ForceInline
final AbstractVector<?> asVectorRawTemplate(LaneType laneType) {
// NOTE: This assumes that convert0('X')
// respects REGISTER_ENDIAN order.
return convert0('X', vspecies().withLanes(laneType));
}
/*package-private*/
@ForceInline
ByteVector asByteVectorRawTemplate() {
return (ByteVector) asVectorRawTemplate(LaneType.BYTE);
}
abstract AbstractMask<E> maskFromArray(boolean[] bits);
abstract AbstractShuffle<E> iotaShuffle();
abstract AbstractShuffle<E> iotaShuffle(int start, int step, boolean wrap);
/*do not alias this byte array*/
abstract AbstractShuffle<E> shuffleFromBytes(byte[] reorder);
abstract AbstractShuffle<E> shuffleFromArray(int[] indexes, int i);
abstract AbstractShuffle<E> shuffleFromOp(IntUnaryOperator fn);
/*package-private*/
abstract AbstractVector<E> fromByteArray0(byte[] a, int offset);
/*package-private*/
abstract AbstractVector<E> maybeSwap(ByteOrder bo);
/*package-private*/
@ForceInline
VectorShuffle<Byte> swapBytesShuffle() {
return vspecies().swapBytesShuffle();
}
/**
* {@inheritDoc} <!--workaround-->
*/
@Override
@ForceInline
public ShortVector reinterpretAsShorts() {
return (ShortVector) asVectorRaw(LaneType.SHORT);
}
/**
* {@inheritDoc} <!--workaround-->
*/
@Override
@ForceInline
public IntVector reinterpretAsInts() {
return (IntVector) asVectorRaw(LaneType.INT);
}
/**
* {@inheritDoc} <!--workaround-->
*/
@Override
@ForceInline
public LongVector reinterpretAsLongs() {
return (LongVector) asVectorRaw(LaneType.LONG);
}
/**
* {@inheritDoc} <!--workaround-->
*/
@Override
@ForceInline
public FloatVector reinterpretAsFloats() {
return (FloatVector) asVectorRaw(LaneType.FLOAT);
}
/**
* {@inheritDoc} <!--workaround-->
*/
@Override
@ForceInline
public DoubleVector reinterpretAsDoubles() {
return (DoubleVector) asVectorRaw(LaneType.DOUBLE);
}
/**
* {@inheritDoc} <!--workaround-->
*/
@Override
@ForceInline
public final <F>
Vector<F> convert(Conversion<E,F> conv, int part) {
// Shape invariance is simple to implement.
// It's part of the API because shape invariance
// is the default mode of operation, and shape
// shifting operations must advertise themselves.
ConversionImpl<E,F> c = (ConversionImpl<E,F>) conv;
@SuppressWarnings("unchecked")
VectorSpecies<F> rsp = (VectorSpecies<F>)
vspecies().withLanes(c.range());
return convertShape(conv, rsp, part);
}
/**
* {@inheritDoc} <!--workaround-->
*/
@Override
@ForceInline
public final <F>
Vector<F> castShape(VectorSpecies<F> toSpecies, int part) {
// This is an odd mix of shape conversion plus
// lanewise conversions. It seems to be useful
// sometimes as a shorthand, though maybe we
// can drop it.
AbstractSpecies<E> vsp = vspecies();
AbstractSpecies<F> rsp = (AbstractSpecies<F>) toSpecies;
@SuppressWarnings("unchecked")
ConversionImpl<E,F> c = (ConversionImpl<E,F>)
ConversionImpl.ofCast(vsp.laneType, rsp.laneType);
return convertShape(c, rsp, part);
}
/**
* {@inheritDoc} <!--workaround-->
*/
@Override
@ForceInline
public abstract <F>
Vector<F> convertShape(Conversion<E,F> conv, VectorSpecies<F> rsp, int part);
/**
* This is the template for Vector::reinterpretShape, to be
* specialized by each distinct vector class.
*/
/*package-private*/
@ForceInline
final <F>
AbstractVector<F> reinterpretShapeTemplate(VectorSpecies<F> toSpecies, int part) {
AbstractSpecies<F> rsp = (AbstractSpecies<F>) toSpecies;
AbstractSpecies<E> vsp = vspecies();
if (part == 0) {
// Works the same for in-place, expand, or contract.
return convert0('X', rsp);
} else {
int origin = shapeChangeOrigin(vsp, rsp, false, part);
//System.out.println("*** origin = "+origin+", part = "+part+", reinterpret");
if (part > 0) { // Expansion: slice first then cast.
return slice(origin, vsp.zero()).convert0('X', rsp);
} else { // Contraction: cast first then unslice.
return rsp.zero().slice(rsp.laneCount() - origin,
convert0('X', rsp));
}
}
}
@Override
public abstract AbstractVector<E> slice(int origin, Vector<E> v1);
/**
* This is the template for Vector::convertShape, to be
* specialized by each distinct vector class.
*/
/*package-private*/
@ForceInline
final <F>
AbstractVector<F> convertShapeTemplate(Conversion<E,F> conv, VectorSpecies<F> toSpecies, int part) {
ConversionImpl<E,F> c = (ConversionImpl<E,F>) conv;
AbstractSpecies<F> rsp = (AbstractSpecies<F>) toSpecies;
AbstractSpecies<E> vsp = vspecies();
char kind = c.kind();
switch (kind) {
case 'C': // Regular cast conversion, known to the JIT.
break;
case 'I': // Identity conversion => reinterpret.
assert(c.sizeChangeLog2() == 0);
kind = 'X';
break;
case 'Z': // Lane-wise expansion with zero padding.
assert(c.sizeChangeLog2() > 0);
assert(c.range().elementKind == 'I');
break;
case 'R': // Lane-wise reinterpret conversion.
if (c.sizeChangeLog2() != 0) {
kind = 'Z'; // some goofy stuff here
break;
}
kind = 'X'; // No size change => reinterpret whole vector
break;
default:
throw new AssertionError(c);
}
vsp.check(c.domain()); // apply dynamic check to conv
rsp.check(c.range()); // apply dynamic check to conv
if (part == 0) {
// Works the same for in-place, expand, or contract.
return convert0(kind, rsp);
} else {
int origin = shapeChangeOrigin(vsp, rsp, true, part);
//System.out.println("*** origin = "+origin+", part = "+part+", lanewise");
if (part > 0) { // Expansion: slice first then cast.
return slice(origin, vsp.zero()).convert0(kind, rsp);
} else { // Contraction: cast first then unslice.
return rsp.zero().slice(rsp.laneCount() - origin,
convert0(kind, rsp));
}
}
}
/**
* Check a part number and return it multiplied by the appropriate
* block factor to yield the origin of the operand block, as a
* lane number. For expansions the origin is reckoned in the
* domain vector, since the domain vector has too much information
* and must be sliced. For contractions the origin is reckoned in
* the range vector, since the range vector has too many lanes and
* the result must be unsliced at the same position as the inverse
* expansion. If the conversion is lanewise, then lane sizes may
* be changing as well. This affects the logical size of the
* result, and so the domain size is multiplied or divided by the
* lane size change.
*/
/*package-private*/
@ForceInline
static
int shapeChangeOrigin(AbstractSpecies<?> dsp,
AbstractSpecies<?> rsp,
boolean lanewise,
int part) {
int domSizeLog2 = dsp.vectorShape.vectorBitSizeLog2;
int phySizeLog2 = rsp.vectorShape.vectorBitSizeLog2;
int laneChangeLog2 = 0;
if (lanewise) {
laneChangeLog2 = (rsp.laneType.elementSizeLog2 -
dsp.laneType.elementSizeLog2);
}
int resSizeLog2 = domSizeLog2 + laneChangeLog2;
// resSizeLog2 = 0 => 1-lane vector shrinking to 1-byte lane-size
// resSizeLog2 < 0 => small vector shrinking by more than a lane-size
assert(resSizeLog2 >= 0);
// Expansion ratio: expansionLog2 = resSizeLog2 - phySizeLog2;
if (!partInRange(resSizeLog2, phySizeLog2, part)) {
// fall through...
} else if (resSizeLog2 > phySizeLog2) {
// Expansion by M means we must slice a block from the domain.
// What is that block size? It is 1/M of the domain.
// Let's compute the log2 of that block size, as 's'.
//s = (dsp.laneCountLog2() - expansionLog2);
//s = ((domSizeLog2 - dsp.laneType.elementSizeLog2) - expansionLog2);
//s = (domSizeLog2 - expansionLog2 - dsp.laneType.elementSizeLog2);
int s = phySizeLog2 - laneChangeLog2 - dsp.laneType.elementSizeLog2;
// Scale the part number by the input block size, in input lanes.
if ((s & 31) == s) // sanity check
return part << s;
} else {
// Contraction by M means we must drop a block into the range.
// What is that block size? It is 1/M of the range.
// Let's compute the log2 of that block size, as 's'.
//s = (rsp.laneCountLog2() + expansionLog2);
//s = ((phySizeLog2 - rsp.laneType.elementSizeLog2) + expansionLog2);
//s = (phySizeLog2 + expansionLog2 - rsp.laneType.elementSizeLog2);
int s = resSizeLog2 - rsp.laneType.elementSizeLog2;
// Scale the part number by the output block size, in output lanes.
if ((s & 31) == s) // sanity check
return -part << s;
}
throw wrongPart(dsp, rsp, lanewise, part);
}
@ForceInline
private static boolean partInRange(int resSizeLog2, int phySizeLog2, int part) {
// Let's try a branch-free version of this.
int diff = (resSizeLog2 - phySizeLog2);
int sign = (diff >> -1);
//d = Math.abs(diff);
//d = (sign == 0 ? diff : sign == -1 ? 1 + ~diff);
int d = (diff ^ sign) - sign;
assert(d == Math.abs(diff) && d <= 16); // let's not go crazy here
//p = part * sign;
int p = (part ^ sign) - sign;
// z = sign == 0 ? 0<=part<(1<<d), == (part & (-1 << d)) == 0
// z = sign == -1 ? 0<=-part<(1<<d), == (-part & (-1 << d)) == 0
boolean z = (p & (-1 << d)) == 0;
assert(z == partInRangeSlow(resSizeLog2, phySizeLog2, part)) : z;
return z;
}
private static boolean partInRangeSlow(int resSizeLog2, int phySizeLog2, int part) {
if (resSizeLog2 > phySizeLog2) { // expansion
int limit = 1 << (resSizeLog2 - phySizeLog2);
return part >= 0 && part < limit;
} else if (resSizeLog2 < phySizeLog2) { // contraction
int limit = 1 << (phySizeLog2 - resSizeLog2);
return part > -limit && part <= 0;
} else {
return (part == 0);
}
}
private static
ArrayIndexOutOfBoundsException
wrongPart(AbstractSpecies<?> dsp,
AbstractSpecies<?> rsp,
boolean lanewise,
int part) {
String laneChange = "";
String converting = "converting";
int dsize = dsp.elementSize(), rsize = rsp.elementSize();
if (!lanewise) {
converting = "reinterpreting";
} else if (dsize < rsize) {
laneChange = String.format(" (lanes are expanding by %d)",
rsize / dsize);
} else if (dsize > rsize) {
laneChange = String.format(" (lanes are contracting by %d)",
dsize / rsize);
}
String msg = String.format("bad part number %d %s %s -> %s%s",
part, converting, dsp, rsp, laneChange);
return new ArrayIndexOutOfBoundsException(msg);
}
/*package-private*/
ArithmeticException divZeroException() {
throw new ArithmeticException("zero vector lane in dividend "+this);
}
/**
* Helper function for all sorts of byte-wise reinterpretation casts.
* This function kicks in after intrinsic failure.
*/
/*package-private*/
@ForceInline
final <F>
AbstractVector<F> defaultReinterpret(AbstractSpecies<F> rsp) {
int blen = Math.max(this.bitSize(), rsp.vectorBitSize()) / Byte.SIZE;
ByteOrder bo = ByteOrder.LITTLE_ENDIAN;
ByteBuffer bb = ByteBuffer.allocate(blen);
this.intoByteBuffer(bb, 0, bo);
VectorMask<F> m = rsp.maskAll(true);
// enum-switches don't optimize properly JDK-8161245
switch (rsp.laneType.switchKey) {
case LaneType.SK_BYTE:
return ByteVector.fromByteBuffer(rsp.check(byte.class), bb, 0, bo, m.check(byte.class)).check0(rsp);
case LaneType.SK_SHORT:
return ShortVector.fromByteBuffer(rsp.check(short.class), bb, 0, bo, m.check(short.class)).check0(rsp);
case LaneType.SK_INT:
return IntVector.fromByteBuffer(rsp.check(int.class), bb, 0, bo, m.check(int.class)).check0(rsp);
case LaneType.SK_LONG:
return LongVector.fromByteBuffer(rsp.check(long.class), bb, 0, bo, m.check(long.class)).check0(rsp);
case LaneType.SK_FLOAT:
return FloatVector.fromByteBuffer(rsp.check(float.class), bb, 0, bo, m.check(float.class)).check0(rsp);
case LaneType.SK_DOUBLE:
return DoubleVector.fromByteBuffer(rsp.check(double.class), bb, 0, bo, m.check(double.class)).check0(rsp);
default:
throw new AssertionError(rsp.toString());
}
}
/**
* Helper function for all sorts of lane-wise conversions.
* This function kicks in after intrinsic failure.
*/
/*package-private*/
@ForceInline
final <F>
AbstractVector<F> defaultCast(AbstractSpecies<F> dsp) {
int rlength = dsp.laneCount;
if (vspecies().laneType.elementKind == 'F') {
// Buffer input values in a double array.
double[] lanes = toDoubleArray();
int limit = Math.min(lanes.length, rlength);
// enum-switches don't optimize properly JDK-8161245
switch (dsp.laneType.switchKey) {
case LaneType.SK_BYTE: {
byte[] a = new byte[rlength];
for (int i = 0; i < limit; i++) {
a[i] = (byte) lanes[i];
}
return ByteVector.fromArray(dsp.check(byte.class), a, 0).check0(dsp);
}
case LaneType.SK_SHORT: {
short[] a = new short[rlength];
for (int i = 0; i < limit; i++) {
a[i] = (short) lanes[i];
}
return ShortVector.fromArray(dsp.check(short.class), a, 0).check0(dsp);
}
case LaneType.SK_INT: {
int[] a = new int[rlength];
for (int i = 0; i < limit; i++) {
a[i] = (int) lanes[i];
}
return IntVector.fromArray(dsp.check(int.class), a, 0).check0(dsp);
}
case LaneType.SK_LONG: {
long[] a = new long[rlength];
for (int i = 0; i < limit; i++) {
a[i] = (long) lanes[i];
}
return LongVector.fromArray(dsp.check(long.class), a, 0).check0(dsp);
}
case LaneType.SK_FLOAT: {
float[] a = new float[rlength];
for (int i = 0; i < limit; i++) {
a[i] = (float) lanes[i];
}
return FloatVector.fromArray(dsp.check(float.class), a, 0).check0(dsp);
}
case LaneType.SK_DOUBLE: {
double[] a = new double[rlength];
for (int i = 0; i < limit; i++) {
a[i] = (double) lanes[i];
}
return DoubleVector.fromArray(dsp.check(double.class), a, 0).check0(dsp);
}
default: break;
}
} else {
// Buffer input values in a long array.
long[] lanes = toLongArray();
int limit = Math.min(lanes.length, rlength);
// enum-switches don't optimize properly JDK-8161245
switch (dsp.laneType.switchKey) {
case LaneType.SK_BYTE: {
byte[] a = new byte[rlength];
for (int i = 0; i < limit; i++) {
a[i] = (byte) lanes[i];
}
return ByteVector.fromArray(dsp.check(byte.class), a, 0).check0(dsp);
}
case LaneType.SK_SHORT: {
short[] a = new short[rlength];
for (int i = 0; i < limit; i++) {
a[i] = (short) lanes[i];
}
return ShortVector.fromArray(dsp.check(short.class), a, 0).check0(dsp);
}
case LaneType.SK_INT: {
int[] a = new int[rlength];
for (int i = 0; i < limit; i++) {
a[i] = (int) lanes[i];
}
return IntVector.fromArray(dsp.check(int.class), a, 0).check0(dsp);
}
case LaneType.SK_LONG: {
long[] a = new long[rlength];
for (int i = 0; i < limit; i++) {
a[i] = (long) lanes[i];
}
return LongVector.fromArray(dsp.check(long.class), a, 0).check0(dsp);
}
case LaneType.SK_FLOAT: {
float[] a = new float[rlength];
for (int i = 0; i < limit; i++) {
a[i] = (float) lanes[i];
}
return FloatVector.fromArray(dsp.check(float.class), a, 0).check0(dsp);
}
case LaneType.SK_DOUBLE: {
double[] a = new double[rlength];
for (int i = 0; i < limit; i++) {
a[i] = (double) lanes[i];
}
return DoubleVector.fromArray(dsp.check(double.class), a, 0).check0(dsp);
}
default: break;
}
}
throw new AssertionError();
}
// Constant-folded access to conversion intrinsics:
/**
* Dispatch on conversion kind and target species.
* The code of this is arranged to fold up if the
* vector class is constant and the target species
* is also constant. This is often the case.
* Residual non-folded code may also perform acceptably
* in some cases due to type profiling, especially
* of rvtype. If only one shape is being used,
* the profiling of rvtype should help speculatively
* fold the code even when the target species is
* not a constant.
*/
/*package-private*/
@ForceInline
final <F>
AbstractVector<F> convert0(char kind, AbstractSpecies<F> rsp) {
// Derive some JIT-time constants:
Class<?> etype; // fill in after switch (constant)
int vlength; // fill in after switch (mark type profile?)
Class<?> rvtype; // fill in after switch (mark type profile)
Class<?> rtype;
int rlength;
switch (kind) {
case 'Z': // lane-wise size change, maybe with sign clip
// Maybe this should be an intrinsic also.
AbstractSpecies<?> rspi = rsp.asIntegral();
AbstractVector<?> bitv = resizeLanes0(this, rspi);
return (rspi == rsp ? bitv.check0(rsp) : bitv.convert0('X', rsp));
case 'C': // lane-wise cast (but not identity)
rtype = rsp.elementType();
rlength = rsp.laneCount();
etype = this.elementType(); // (profile)
vlength = this.length(); // (profile)
rvtype = rsp.dummyVector().getClass(); // (profile)
return VectorSupport.convert(VectorSupport.VECTOR_OP_CAST,
this.getClass(), etype, vlength,
rvtype, rtype, rlength,
this, rsp,
AbstractVector::defaultCast);
case 'X': // reinterpret cast, not lane-wise if lane sizes differ
rtype = rsp.elementType();
rlength = rsp.laneCount();
etype = this.elementType(); // (profile)
vlength = this.length(); // (profile)
rvtype = rsp.dummyVector().getClass(); // (profile)
return VectorSupport.convert(VectorSupport.VECTOR_OP_REINTERPRET,
this.getClass(), etype, vlength,
rvtype, rtype, rlength,
this, rsp,
AbstractVector::defaultReinterpret);
}
throw new AssertionError();
}
@ForceInline
private static <F>
AbstractVector<F>
resizeLanes0(AbstractVector<?> v, AbstractSpecies<F> rspi) {
AbstractSpecies<?> dsp = v.vspecies();
int sizeChange = rspi.elementSize() - dsp.elementSize();
AbstractSpecies<?> dspi = dsp.asIntegral();
if (dspi != dsp) v = v.convert0('R', dspi);
if (sizeChange <= 0) { // clip in place
return v.convert0('C', rspi);
}
// extend in place, but remove unwanted sign extension
long mask = -1L >>> sizeChange;
return (AbstractVector<F>)
v.convert0('C', rspi)
.lanewise(AND, rspi.broadcast(mask));
}
// Byte buffer wrappers.
static ByteBuffer wrapper(ByteBuffer bb, ByteOrder bo) {
return bb.duplicate().order(bo);
}
static ByteBuffer wrapper(byte[] a, ByteOrder bo) {
return ByteBuffer.wrap(a).order(bo);
}
static {
// Recode uses of VectorSupport.reinterpret if this assertion fails:
assert(REGISTER_ENDIAN == ByteOrder.LITTLE_ENDIAN);
}
}

View file

@ -0,0 +1,840 @@
/*
* Copyright (c) 2017, 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
* particular file as subject to the "Classpath" exception as provided
* by Oracle in the LICENSE file that accompanied this code.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package jdk.incubator.vector;
import java.nio.ByteBuffer;
import java.util.Arrays;
import java.util.Objects;
import java.util.function.IntUnaryOperator;
import jdk.internal.vm.annotation.ForceInline;
import jdk.internal.vm.vector.VectorSupport;
import static jdk.internal.vm.vector.VectorSupport.*;
import static jdk.incubator.vector.VectorOperators.*;
// -- This file was mechanically generated: Do not edit! -- //
@SuppressWarnings("cast") // warning: redundant cast
final class Byte128Vector extends ByteVector {
static final ByteSpecies VSPECIES =
(ByteSpecies) ByteVector.SPECIES_128;
static final VectorShape VSHAPE =
VSPECIES.vectorShape();
static final Class<Byte128Vector> VCLASS = Byte128Vector.class;
static final int VSIZE = VSPECIES.vectorBitSize();
static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM
static final Class<Byte> ETYPE = byte.class; // used by the JVM
Byte128Vector(byte[] v) {
super(v);
}
// For compatibility as Byte128Vector::new,
// stored into species.vectorFactory.
Byte128Vector(Object v) {
this((byte[]) v);
}
static final Byte128Vector ZERO = new Byte128Vector(new byte[VLENGTH]);
static final Byte128Vector IOTA = new Byte128Vector(VSPECIES.iotaArray());
static {
// Warm up a few species caches.
// If we do this too much we will
// get NPEs from bootstrap circularity.
VSPECIES.dummyVector();
VSPECIES.withLanes(LaneType.BYTE);
}
// Specialized extractors
@ForceInline
final @Override
public ByteSpecies vspecies() {
// ISSUE: This should probably be a @Stable
// field inside AbstractVector, rather than
// a megamorphic method.
return VSPECIES;
}
@ForceInline
@Override
public final Class<Byte> elementType() { return byte.class; }
@ForceInline
@Override
public final int elementSize() { return Byte.SIZE; }
@ForceInline
@Override
public final VectorShape shape() { return VSHAPE; }
@ForceInline
@Override
public final int length() { return VLENGTH; }
@ForceInline
@Override
public final int bitSize() { return VSIZE; }
@ForceInline
@Override
public final int byteSize() { return VSIZE / Byte.SIZE; }
/*package-private*/
@ForceInline
final @Override
byte[] vec() {
return (byte[])getPayload();
}
// Virtualized constructors
@Override
@ForceInline
public final Byte128Vector broadcast(byte e) {
return (Byte128Vector) super.broadcastTemplate(e); // specialize
}
@Override
@ForceInline
public final Byte128Vector broadcast(long e) {
return (Byte128Vector) super.broadcastTemplate(e); // specialize
}
@Override
@ForceInline
Byte128Mask maskFromArray(boolean[] bits) {
return new Byte128Mask(bits);
}
@Override
@ForceInline
Byte128Shuffle iotaShuffle() { return Byte128Shuffle.IOTA; }
@ForceInline
Byte128Shuffle iotaShuffle(int start, int step, boolean wrap) {
if (wrap) {
return (Byte128Shuffle)VectorSupport.shuffleIota(ETYPE, Byte128Shuffle.class, VSPECIES, VLENGTH, start, step, 1,
(l, lstart, lstep, s) -> s.shuffleFromOp(i -> (VectorIntrinsics.wrapToRange(i*lstep + lstart, l))));
} else {
return (Byte128Shuffle)VectorSupport.shuffleIota(ETYPE, Byte128Shuffle.class, VSPECIES, VLENGTH, start, step, 0,
(l, lstart, lstep, s) -> s.shuffleFromOp(i -> (i*lstep + lstart)));
}
}
@Override
@ForceInline
Byte128Shuffle shuffleFromBytes(byte[] reorder) { return new Byte128Shuffle(reorder); }
@Override
@ForceInline
Byte128Shuffle shuffleFromArray(int[] indexes, int i) { return new Byte128Shuffle(indexes, i); }
@Override
@ForceInline
Byte128Shuffle shuffleFromOp(IntUnaryOperator fn) { return new Byte128Shuffle(fn); }
// Make a vector of the same species but the given elements:
@ForceInline
final @Override
Byte128Vector vectorFactory(byte[] vec) {
return new Byte128Vector(vec);
}
@ForceInline
final @Override
Byte128Vector asByteVectorRaw() {
return (Byte128Vector) super.asByteVectorRawTemplate(); // specialize
}
@ForceInline
final @Override
AbstractVector<?> asVectorRaw(LaneType laneType) {
return super.asVectorRawTemplate(laneType); // specialize
}
// Unary operator
@ForceInline
final @Override
Byte128Vector uOp(FUnOp f) {
return (Byte128Vector) super.uOpTemplate(f); // specialize
}
@ForceInline
final @Override
Byte128Vector uOp(VectorMask<Byte> m, FUnOp f) {
return (Byte128Vector)
super.uOpTemplate((Byte128Mask)m, f); // specialize
}
// Binary operator
@ForceInline
final @Override
Byte128Vector bOp(Vector<Byte> v, FBinOp f) {
return (Byte128Vector) super.bOpTemplate((Byte128Vector)v, f); // specialize
}
@ForceInline
final @Override
Byte128Vector bOp(Vector<Byte> v,
VectorMask<Byte> m, FBinOp f) {
return (Byte128Vector)
super.bOpTemplate((Byte128Vector)v, (Byte128Mask)m,
f); // specialize
}
// Ternary operator
@ForceInline
final @Override
Byte128Vector tOp(Vector<Byte> v1, Vector<Byte> v2, FTriOp f) {
return (Byte128Vector)
super.tOpTemplate((Byte128Vector)v1, (Byte128Vector)v2,
f); // specialize
}
@ForceInline
final @Override
Byte128Vector tOp(Vector<Byte> v1, Vector<Byte> v2,
VectorMask<Byte> m, FTriOp f) {
return (Byte128Vector)
super.tOpTemplate((Byte128Vector)v1, (Byte128Vector)v2,
(Byte128Mask)m, f); // specialize
}
@ForceInline
final @Override
byte rOp(byte v, FBinOp f) {
return super.rOpTemplate(v, f); // specialize
}
@Override
@ForceInline
public final <F>
Vector<F> convertShape(VectorOperators.Conversion<Byte,F> conv,
VectorSpecies<F> rsp, int part) {
return super.convertShapeTemplate(conv, rsp, part); // specialize
}
@Override
@ForceInline
public final <F>
Vector<F> reinterpretShape(VectorSpecies<F> toSpecies, int part) {
return super.reinterpretShapeTemplate(toSpecies, part); // specialize
}
// Specialized algebraic operations:
// The following definition forces a specialized version of this
// crucial method into the v-table of this class. A call to add()
// will inline to a call to lanewise(ADD,), at which point the JIT
// intrinsic will have the opcode of ADD, plus all the metadata
// for this particular class, enabling it to generate precise
// code.
//
// There is probably no benefit to the JIT to specialize the
// masked or broadcast versions of the lanewise method.
@Override
@ForceInline
public Byte128Vector lanewise(Unary op) {
return (Byte128Vector) super.lanewiseTemplate(op); // specialize
}
@Override
@ForceInline
public Byte128Vector lanewise(Binary op, Vector<Byte> v) {
return (Byte128Vector) super.lanewiseTemplate(op, v); // specialize
}
/*package-private*/
@Override
@ForceInline Byte128Vector
lanewiseShift(VectorOperators.Binary op, int e) {
return (Byte128Vector) super.lanewiseShiftTemplate(op, e); // specialize
}
/*package-private*/
@Override
@ForceInline
public final
Byte128Vector
lanewise(VectorOperators.Ternary op, Vector<Byte> v1, Vector<Byte> v2) {
return (Byte128Vector) super.lanewiseTemplate(op, v1, v2); // specialize
}
@Override
@ForceInline
public final
Byte128Vector addIndex(int scale) {
return (Byte128Vector) super.addIndexTemplate(scale); // specialize
}
// Type specific horizontal reductions
@Override
@ForceInline
public final byte reduceLanes(VectorOperators.Associative op) {
return super.reduceLanesTemplate(op); // specialized
}
@Override
@ForceInline
public final byte reduceLanes(VectorOperators.Associative op,
VectorMask<Byte> m) {
return super.reduceLanesTemplate(op, m); // specialized
}
@Override
@ForceInline
public final long reduceLanesToLong(VectorOperators.Associative op) {
return (long) super.reduceLanesTemplate(op); // specialized
}
@Override
@ForceInline
public final long reduceLanesToLong(VectorOperators.Associative op,
VectorMask<Byte> m) {
return (long) super.reduceLanesTemplate(op, m); // specialized
}
@Override
@ForceInline
public VectorShuffle<Byte> toShuffle() {
byte[] a = toArray();
int[] sa = new int[a.length];
for (int i = 0; i < a.length; i++) {
sa[i] = (int) a[i];
}
return VectorShuffle.fromArray(VSPECIES, sa, 0);
}
// Specialized unary testing
@Override
@ForceInline
public final Byte128Mask test(Test op) {
return super.testTemplate(Byte128Mask.class, op); // specialize
}
// Specialized comparisons
@Override
@ForceInline
public final Byte128Mask compare(Comparison op, Vector<Byte> v) {
return super.compareTemplate(Byte128Mask.class, op, v); // specialize
}
@Override
@ForceInline
public final Byte128Mask compare(Comparison op, byte s) {
return super.compareTemplate(Byte128Mask.class, op, s); // specialize
}
@Override
@ForceInline
public final Byte128Mask compare(Comparison op, long s) {
return super.compareTemplate(Byte128Mask.class, op, s); // specialize
}
@Override
@ForceInline
public Byte128Vector blend(Vector<Byte> v, VectorMask<Byte> m) {
return (Byte128Vector)
super.blendTemplate(Byte128Mask.class,
(Byte128Vector) v,
(Byte128Mask) m); // specialize
}
@Override
@ForceInline
public Byte128Vector slice(int origin, Vector<Byte> v) {
return (Byte128Vector) super.sliceTemplate(origin, v); // specialize
}
@Override
@ForceInline
public Byte128Vector slice(int origin) {
if ((origin < 0) || (origin >= VLENGTH)) {
throw new ArrayIndexOutOfBoundsException("Index " + origin + " out of bounds for vector length " + VLENGTH);
} else {
Byte128Shuffle Iota = iotaShuffle();
VectorMask<Byte> BlendMask = Iota.toVector().compare(VectorOperators.LT, (broadcast((byte)(VLENGTH-origin))));
Iota = iotaShuffle(origin, 1, true);
return ZERO.blend(this.rearrange(Iota), BlendMask);
}
}
@Override
@ForceInline
public Byte128Vector unslice(int origin, Vector<Byte> w, int part) {
return (Byte128Vector) super.unsliceTemplate(origin, w, part); // specialize
}
@Override
@ForceInline
public Byte128Vector unslice(int origin, Vector<Byte> w, int part, VectorMask<Byte> m) {
return (Byte128Vector)
super.unsliceTemplate(Byte128Mask.class,
origin, w, part,
(Byte128Mask) m); // specialize
}
@Override
@ForceInline
public Byte128Vector unslice(int origin) {
if ((origin < 0) || (origin >= VLENGTH)) {
throw new ArrayIndexOutOfBoundsException("Index " + origin + " out of bounds for vector length " + VLENGTH);
} else {
Byte128Shuffle Iota = iotaShuffle();
VectorMask<Byte> BlendMask = Iota.toVector().compare(VectorOperators.GE, (broadcast((byte)(origin))));
Iota = iotaShuffle(-origin, 1, true);
return ZERO.blend(this.rearrange(Iota), BlendMask);
}
}
@Override
@ForceInline
public Byte128Vector rearrange(VectorShuffle<Byte> s) {
return (Byte128Vector)
super.rearrangeTemplate(Byte128Shuffle.class,
(Byte128Shuffle) s); // specialize
}
@Override
@ForceInline
public Byte128Vector rearrange(VectorShuffle<Byte> shuffle,
VectorMask<Byte> m) {
return (Byte128Vector)
super.rearrangeTemplate(Byte128Shuffle.class,
(Byte128Shuffle) shuffle,
(Byte128Mask) m); // specialize
}
@Override
@ForceInline
public Byte128Vector rearrange(VectorShuffle<Byte> s,
Vector<Byte> v) {
return (Byte128Vector)
super.rearrangeTemplate(Byte128Shuffle.class,
(Byte128Shuffle) s,
(Byte128Vector) v); // specialize
}
@Override
@ForceInline
public Byte128Vector selectFrom(Vector<Byte> v) {
return (Byte128Vector)
super.selectFromTemplate((Byte128Vector) v); // specialize
}
@Override
@ForceInline
public Byte128Vector selectFrom(Vector<Byte> v,
VectorMask<Byte> m) {
return (Byte128Vector)
super.selectFromTemplate((Byte128Vector) v,
(Byte128Mask) m); // specialize
}
@ForceInline
@Override
public byte lane(int i) {
switch(i) {
case 0: return laneHelper(0);
case 1: return laneHelper(1);
case 2: return laneHelper(2);
case 3: return laneHelper(3);
case 4: return laneHelper(4);
case 5: return laneHelper(5);
case 6: return laneHelper(6);
case 7: return laneHelper(7);
case 8: return laneHelper(8);
case 9: return laneHelper(9);
case 10: return laneHelper(10);
case 11: return laneHelper(11);
case 12: return laneHelper(12);
case 13: return laneHelper(13);
case 14: return laneHelper(14);
case 15: return laneHelper(15);
default: throw new IllegalArgumentException("Index " + i + " must be zero or positive, and less than " + VLENGTH);
}
}
public byte laneHelper(int i) {
return (byte) VectorSupport.extract(
VCLASS, ETYPE, VLENGTH,
this, i,
(vec, ix) -> {
byte[] vecarr = vec.vec();
return (long)vecarr[ix];
});
}
@ForceInline
@Override
public Byte128Vector withLane(int i, byte e) {
switch (i) {
case 0: return withLaneHelper(0, e);
case 1: return withLaneHelper(1, e);
case 2: return withLaneHelper(2, e);
case 3: return withLaneHelper(3, e);
case 4: return withLaneHelper(4, e);
case 5: return withLaneHelper(5, e);
case 6: return withLaneHelper(6, e);
case 7: return withLaneHelper(7, e);
case 8: return withLaneHelper(8, e);
case 9: return withLaneHelper(9, e);
case 10: return withLaneHelper(10, e);
case 11: return withLaneHelper(11, e);
case 12: return withLaneHelper(12, e);
case 13: return withLaneHelper(13, e);
case 14: return withLaneHelper(14, e);
case 15: return withLaneHelper(15, e);
default: throw new IllegalArgumentException("Index " + i + " must be zero or positive, and less than " + VLENGTH);
}
}
public Byte128Vector withLaneHelper(int i, byte e) {
return VectorSupport.insert(
VCLASS, ETYPE, VLENGTH,
this, i, (long)e,
(v, ix, bits) -> {
byte[] res = v.vec().clone();
res[ix] = (byte)bits;
return v.vectorFactory(res);
});
}
// Mask
static final class Byte128Mask extends AbstractMask<Byte> {
static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM
static final Class<Byte> ETYPE = byte.class; // used by the JVM
Byte128Mask(boolean[] bits) {
this(bits, 0);
}
Byte128Mask(boolean[] bits, int offset) {
super(prepare(bits, offset));
}
Byte128Mask(boolean val) {
super(prepare(val));
}
private static boolean[] prepare(boolean[] bits, int offset) {
boolean[] newBits = new boolean[VSPECIES.laneCount()];
for (int i = 0; i < newBits.length; i++) {
newBits[i] = bits[offset + i];
}
return newBits;
}
private static boolean[] prepare(boolean val) {
boolean[] bits = new boolean[VSPECIES.laneCount()];
Arrays.fill(bits, val);
return bits;
}
@ForceInline
final @Override
public ByteSpecies vspecies() {
// ISSUE: This should probably be a @Stable
// field inside AbstractMask, rather than
// a megamorphic method.
return VSPECIES;
}
@ForceInline
boolean[] getBits() {
return (boolean[])getPayload();
}
@Override
Byte128Mask uOp(MUnOp f) {
boolean[] res = new boolean[vspecies().laneCount()];
boolean[] bits = getBits();
for (int i = 0; i < res.length; i++) {
res[i] = f.apply(i, bits[i]);
}
return new Byte128Mask(res);
}
@Override
Byte128Mask bOp(VectorMask<Byte> m, MBinOp f) {
boolean[] res = new boolean[vspecies().laneCount()];
boolean[] bits = getBits();
boolean[] mbits = ((Byte128Mask)m).getBits();
for (int i = 0; i < res.length; i++) {
res[i] = f.apply(i, bits[i], mbits[i]);
}
return new Byte128Mask(res);
}
@ForceInline
@Override
public final
Byte128Vector toVector() {
return (Byte128Vector) super.toVectorTemplate(); // specialize
}
@Override
@ForceInline
public <E> VectorMask<E> cast(VectorSpecies<E> s) {
AbstractSpecies<E> species = (AbstractSpecies<E>) s;
if (length() != species.laneCount())
throw new IllegalArgumentException("VectorMask length and species length differ");
boolean[] maskArray = toArray();
// enum-switches don't optimize properly JDK-8161245
switch (species.laneType.switchKey) {
case LaneType.SK_BYTE:
return new Byte128Vector.Byte128Mask(maskArray).check(species);
case LaneType.SK_SHORT:
return new Short128Vector.Short128Mask(maskArray).check(species);
case LaneType.SK_INT:
return new Int128Vector.Int128Mask(maskArray).check(species);
case LaneType.SK_LONG:
return new Long128Vector.Long128Mask(maskArray).check(species);
case LaneType.SK_FLOAT:
return new Float128Vector.Float128Mask(maskArray).check(species);
case LaneType.SK_DOUBLE:
return new Double128Vector.Double128Mask(maskArray).check(species);
}
// Should not reach here.
throw new AssertionError(species);
}
// Unary operations
@Override
@ForceInline
public Byte128Mask not() {
return xor(maskAll(true));
}
// Binary operations
@Override
@ForceInline
public Byte128Mask and(VectorMask<Byte> mask) {
Objects.requireNonNull(mask);
Byte128Mask m = (Byte128Mask)mask;
return VectorSupport.binaryOp(VECTOR_OP_AND, Byte128Mask.class, byte.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a & b));
}
@Override
@ForceInline
public Byte128Mask or(VectorMask<Byte> mask) {
Objects.requireNonNull(mask);
Byte128Mask m = (Byte128Mask)mask;
return VectorSupport.binaryOp(VECTOR_OP_OR, Byte128Mask.class, byte.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a | b));
}
@ForceInline
/* package-private */
Byte128Mask xor(VectorMask<Byte> mask) {
Objects.requireNonNull(mask);
Byte128Mask m = (Byte128Mask)mask;
return VectorSupport.binaryOp(VECTOR_OP_XOR, Byte128Mask.class, byte.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a ^ b));
}
// Reductions
@Override
@ForceInline
public boolean anyTrue() {
return VectorSupport.test(BT_ne, Byte128Mask.class, byte.class, VLENGTH,
this, vspecies().maskAll(true),
(m, __) -> anyTrueHelper(((Byte128Mask)m).getBits()));
}
@Override
@ForceInline
public boolean allTrue() {
return VectorSupport.test(BT_overflow, Byte128Mask.class, byte.class, VLENGTH,
this, vspecies().maskAll(true),
(m, __) -> allTrueHelper(((Byte128Mask)m).getBits()));
}
@ForceInline
/*package-private*/
static Byte128Mask maskAll(boolean bit) {
return VectorSupport.broadcastCoerced(Byte128Mask.class, byte.class, VLENGTH,
(bit ? -1 : 0), null,
(v, __) -> (v != 0 ? TRUE_MASK : FALSE_MASK));
}
private static final Byte128Mask TRUE_MASK = new Byte128Mask(true);
private static final Byte128Mask FALSE_MASK = new Byte128Mask(false);
}
// Shuffle
static final class Byte128Shuffle extends AbstractShuffle<Byte> {
static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM
static final Class<Byte> ETYPE = byte.class; // used by the JVM
Byte128Shuffle(byte[] reorder) {
super(VLENGTH, reorder);
}
public Byte128Shuffle(int[] reorder) {
super(VLENGTH, reorder);
}
public Byte128Shuffle(int[] reorder, int i) {
super(VLENGTH, reorder, i);
}
public Byte128Shuffle(IntUnaryOperator fn) {
super(VLENGTH, fn);
}
@Override
public ByteSpecies vspecies() {
return VSPECIES;
}
static {
// There must be enough bits in the shuffle lanes to encode
// VLENGTH valid indexes and VLENGTH exceptional ones.
assert(VLENGTH < Byte.MAX_VALUE);
assert(Byte.MIN_VALUE <= -VLENGTH);
}
static final Byte128Shuffle IOTA = new Byte128Shuffle(IDENTITY);
@Override
@ForceInline
public Byte128Vector toVector() {
return VectorSupport.shuffleToVector(VCLASS, ETYPE, Byte128Shuffle.class, this, VLENGTH,
(s) -> ((Byte128Vector)(((AbstractShuffle<Byte>)(s)).toVectorTemplate())));
}
@Override
@ForceInline
public <F> VectorShuffle<F> cast(VectorSpecies<F> s) {
AbstractSpecies<F> species = (AbstractSpecies<F>) s;
if (length() != species.laneCount())
throw new IllegalArgumentException("VectorShuffle length and species length differ");
int[] shuffleArray = toArray();
// enum-switches don't optimize properly JDK-8161245
switch (species.laneType.switchKey) {
case LaneType.SK_BYTE:
return new Byte128Vector.Byte128Shuffle(shuffleArray).check(species);
case LaneType.SK_SHORT:
return new Short128Vector.Short128Shuffle(shuffleArray).check(species);
case LaneType.SK_INT:
return new Int128Vector.Int128Shuffle(shuffleArray).check(species);
case LaneType.SK_LONG:
return new Long128Vector.Long128Shuffle(shuffleArray).check(species);
case LaneType.SK_FLOAT:
return new Float128Vector.Float128Shuffle(shuffleArray).check(species);
case LaneType.SK_DOUBLE:
return new Double128Vector.Double128Shuffle(shuffleArray).check(species);
}
// Should not reach here.
throw new AssertionError(species);
}
@ForceInline
@Override
public Byte128Shuffle rearrange(VectorShuffle<Byte> shuffle) {
Byte128Shuffle s = (Byte128Shuffle) shuffle;
byte[] reorder1 = reorder();
byte[] reorder2 = s.reorder();
byte[] r = new byte[reorder1.length];
for (int i = 0; i < reorder1.length; i++) {
int ssi = reorder2[i];
r[i] = reorder1[ssi]; // throws on exceptional index
}
return new Byte128Shuffle(r);
}
}
// ================================================
// Specialized low-level memory operations.
@ForceInline
@Override
final
ByteVector fromArray0(byte[] a, int offset) {
return super.fromArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
ByteVector fromByteArray0(byte[] a, int offset) {
return super.fromByteArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
ByteVector fromByteBuffer0(ByteBuffer bb, int offset) {
return super.fromByteBuffer0Template(bb, offset); // specialize
}
@ForceInline
@Override
final
void intoArray0(byte[] a, int offset) {
super.intoArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
void intoByteArray0(byte[] a, int offset) {
super.intoByteArray0Template(a, offset); // specialize
}
// End of specialized low-level memory operations.
// ================================================
}

View file

@ -0,0 +1,872 @@
/*
* Copyright (c) 2017, 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
* particular file as subject to the "Classpath" exception as provided
* by Oracle in the LICENSE file that accompanied this code.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package jdk.incubator.vector;
import java.nio.ByteBuffer;
import java.util.Arrays;
import java.util.Objects;
import java.util.function.IntUnaryOperator;
import jdk.internal.vm.annotation.ForceInline;
import jdk.internal.vm.vector.VectorSupport;
import static jdk.internal.vm.vector.VectorSupport.*;
import static jdk.incubator.vector.VectorOperators.*;
// -- This file was mechanically generated: Do not edit! -- //
@SuppressWarnings("cast") // warning: redundant cast
final class Byte256Vector extends ByteVector {
static final ByteSpecies VSPECIES =
(ByteSpecies) ByteVector.SPECIES_256;
static final VectorShape VSHAPE =
VSPECIES.vectorShape();
static final Class<Byte256Vector> VCLASS = Byte256Vector.class;
static final int VSIZE = VSPECIES.vectorBitSize();
static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM
static final Class<Byte> ETYPE = byte.class; // used by the JVM
Byte256Vector(byte[] v) {
super(v);
}
// For compatibility as Byte256Vector::new,
// stored into species.vectorFactory.
Byte256Vector(Object v) {
this((byte[]) v);
}
static final Byte256Vector ZERO = new Byte256Vector(new byte[VLENGTH]);
static final Byte256Vector IOTA = new Byte256Vector(VSPECIES.iotaArray());
static {
// Warm up a few species caches.
// If we do this too much we will
// get NPEs from bootstrap circularity.
VSPECIES.dummyVector();
VSPECIES.withLanes(LaneType.BYTE);
}
// Specialized extractors
@ForceInline
final @Override
public ByteSpecies vspecies() {
// ISSUE: This should probably be a @Stable
// field inside AbstractVector, rather than
// a megamorphic method.
return VSPECIES;
}
@ForceInline
@Override
public final Class<Byte> elementType() { return byte.class; }
@ForceInline
@Override
public final int elementSize() { return Byte.SIZE; }
@ForceInline
@Override
public final VectorShape shape() { return VSHAPE; }
@ForceInline
@Override
public final int length() { return VLENGTH; }
@ForceInline
@Override
public final int bitSize() { return VSIZE; }
@ForceInline
@Override
public final int byteSize() { return VSIZE / Byte.SIZE; }
/*package-private*/
@ForceInline
final @Override
byte[] vec() {
return (byte[])getPayload();
}
// Virtualized constructors
@Override
@ForceInline
public final Byte256Vector broadcast(byte e) {
return (Byte256Vector) super.broadcastTemplate(e); // specialize
}
@Override
@ForceInline
public final Byte256Vector broadcast(long e) {
return (Byte256Vector) super.broadcastTemplate(e); // specialize
}
@Override
@ForceInline
Byte256Mask maskFromArray(boolean[] bits) {
return new Byte256Mask(bits);
}
@Override
@ForceInline
Byte256Shuffle iotaShuffle() { return Byte256Shuffle.IOTA; }
@ForceInline
Byte256Shuffle iotaShuffle(int start, int step, boolean wrap) {
if (wrap) {
return (Byte256Shuffle)VectorSupport.shuffleIota(ETYPE, Byte256Shuffle.class, VSPECIES, VLENGTH, start, step, 1,
(l, lstart, lstep, s) -> s.shuffleFromOp(i -> (VectorIntrinsics.wrapToRange(i*lstep + lstart, l))));
} else {
return (Byte256Shuffle)VectorSupport.shuffleIota(ETYPE, Byte256Shuffle.class, VSPECIES, VLENGTH, start, step, 0,
(l, lstart, lstep, s) -> s.shuffleFromOp(i -> (i*lstep + lstart)));
}
}
@Override
@ForceInline
Byte256Shuffle shuffleFromBytes(byte[] reorder) { return new Byte256Shuffle(reorder); }
@Override
@ForceInline
Byte256Shuffle shuffleFromArray(int[] indexes, int i) { return new Byte256Shuffle(indexes, i); }
@Override
@ForceInline
Byte256Shuffle shuffleFromOp(IntUnaryOperator fn) { return new Byte256Shuffle(fn); }
// Make a vector of the same species but the given elements:
@ForceInline
final @Override
Byte256Vector vectorFactory(byte[] vec) {
return new Byte256Vector(vec);
}
@ForceInline
final @Override
Byte256Vector asByteVectorRaw() {
return (Byte256Vector) super.asByteVectorRawTemplate(); // specialize
}
@ForceInline
final @Override
AbstractVector<?> asVectorRaw(LaneType laneType) {
return super.asVectorRawTemplate(laneType); // specialize
}
// Unary operator
@ForceInline
final @Override
Byte256Vector uOp(FUnOp f) {
return (Byte256Vector) super.uOpTemplate(f); // specialize
}
@ForceInline
final @Override
Byte256Vector uOp(VectorMask<Byte> m, FUnOp f) {
return (Byte256Vector)
super.uOpTemplate((Byte256Mask)m, f); // specialize
}
// Binary operator
@ForceInline
final @Override
Byte256Vector bOp(Vector<Byte> v, FBinOp f) {
return (Byte256Vector) super.bOpTemplate((Byte256Vector)v, f); // specialize
}
@ForceInline
final @Override
Byte256Vector bOp(Vector<Byte> v,
VectorMask<Byte> m, FBinOp f) {
return (Byte256Vector)
super.bOpTemplate((Byte256Vector)v, (Byte256Mask)m,
f); // specialize
}
// Ternary operator
@ForceInline
final @Override
Byte256Vector tOp(Vector<Byte> v1, Vector<Byte> v2, FTriOp f) {
return (Byte256Vector)
super.tOpTemplate((Byte256Vector)v1, (Byte256Vector)v2,
f); // specialize
}
@ForceInline
final @Override
Byte256Vector tOp(Vector<Byte> v1, Vector<Byte> v2,
VectorMask<Byte> m, FTriOp f) {
return (Byte256Vector)
super.tOpTemplate((Byte256Vector)v1, (Byte256Vector)v2,
(Byte256Mask)m, f); // specialize
}
@ForceInline
final @Override
byte rOp(byte v, FBinOp f) {
return super.rOpTemplate(v, f); // specialize
}
@Override
@ForceInline
public final <F>
Vector<F> convertShape(VectorOperators.Conversion<Byte,F> conv,
VectorSpecies<F> rsp, int part) {
return super.convertShapeTemplate(conv, rsp, part); // specialize
}
@Override
@ForceInline
public final <F>
Vector<F> reinterpretShape(VectorSpecies<F> toSpecies, int part) {
return super.reinterpretShapeTemplate(toSpecies, part); // specialize
}
// Specialized algebraic operations:
// The following definition forces a specialized version of this
// crucial method into the v-table of this class. A call to add()
// will inline to a call to lanewise(ADD,), at which point the JIT
// intrinsic will have the opcode of ADD, plus all the metadata
// for this particular class, enabling it to generate precise
// code.
//
// There is probably no benefit to the JIT to specialize the
// masked or broadcast versions of the lanewise method.
@Override
@ForceInline
public Byte256Vector lanewise(Unary op) {
return (Byte256Vector) super.lanewiseTemplate(op); // specialize
}
@Override
@ForceInline
public Byte256Vector lanewise(Binary op, Vector<Byte> v) {
return (Byte256Vector) super.lanewiseTemplate(op, v); // specialize
}
/*package-private*/
@Override
@ForceInline Byte256Vector
lanewiseShift(VectorOperators.Binary op, int e) {
return (Byte256Vector) super.lanewiseShiftTemplate(op, e); // specialize
}
/*package-private*/
@Override
@ForceInline
public final
Byte256Vector
lanewise(VectorOperators.Ternary op, Vector<Byte> v1, Vector<Byte> v2) {
return (Byte256Vector) super.lanewiseTemplate(op, v1, v2); // specialize
}
@Override
@ForceInline
public final
Byte256Vector addIndex(int scale) {
return (Byte256Vector) super.addIndexTemplate(scale); // specialize
}
// Type specific horizontal reductions
@Override
@ForceInline
public final byte reduceLanes(VectorOperators.Associative op) {
return super.reduceLanesTemplate(op); // specialized
}
@Override
@ForceInline
public final byte reduceLanes(VectorOperators.Associative op,
VectorMask<Byte> m) {
return super.reduceLanesTemplate(op, m); // specialized
}
@Override
@ForceInline
public final long reduceLanesToLong(VectorOperators.Associative op) {
return (long) super.reduceLanesTemplate(op); // specialized
}
@Override
@ForceInline
public final long reduceLanesToLong(VectorOperators.Associative op,
VectorMask<Byte> m) {
return (long) super.reduceLanesTemplate(op, m); // specialized
}
@Override
@ForceInline
public VectorShuffle<Byte> toShuffle() {
byte[] a = toArray();
int[] sa = new int[a.length];
for (int i = 0; i < a.length; i++) {
sa[i] = (int) a[i];
}
return VectorShuffle.fromArray(VSPECIES, sa, 0);
}
// Specialized unary testing
@Override
@ForceInline
public final Byte256Mask test(Test op) {
return super.testTemplate(Byte256Mask.class, op); // specialize
}
// Specialized comparisons
@Override
@ForceInline
public final Byte256Mask compare(Comparison op, Vector<Byte> v) {
return super.compareTemplate(Byte256Mask.class, op, v); // specialize
}
@Override
@ForceInline
public final Byte256Mask compare(Comparison op, byte s) {
return super.compareTemplate(Byte256Mask.class, op, s); // specialize
}
@Override
@ForceInline
public final Byte256Mask compare(Comparison op, long s) {
return super.compareTemplate(Byte256Mask.class, op, s); // specialize
}
@Override
@ForceInline
public Byte256Vector blend(Vector<Byte> v, VectorMask<Byte> m) {
return (Byte256Vector)
super.blendTemplate(Byte256Mask.class,
(Byte256Vector) v,
(Byte256Mask) m); // specialize
}
@Override
@ForceInline
public Byte256Vector slice(int origin, Vector<Byte> v) {
return (Byte256Vector) super.sliceTemplate(origin, v); // specialize
}
@Override
@ForceInline
public Byte256Vector slice(int origin) {
if ((origin < 0) || (origin >= VLENGTH)) {
throw new ArrayIndexOutOfBoundsException("Index " + origin + " out of bounds for vector length " + VLENGTH);
} else {
Byte256Shuffle Iota = iotaShuffle();
VectorMask<Byte> BlendMask = Iota.toVector().compare(VectorOperators.LT, (broadcast((byte)(VLENGTH-origin))));
Iota = iotaShuffle(origin, 1, true);
return ZERO.blend(this.rearrange(Iota), BlendMask);
}
}
@Override
@ForceInline
public Byte256Vector unslice(int origin, Vector<Byte> w, int part) {
return (Byte256Vector) super.unsliceTemplate(origin, w, part); // specialize
}
@Override
@ForceInline
public Byte256Vector unslice(int origin, Vector<Byte> w, int part, VectorMask<Byte> m) {
return (Byte256Vector)
super.unsliceTemplate(Byte256Mask.class,
origin, w, part,
(Byte256Mask) m); // specialize
}
@Override
@ForceInline
public Byte256Vector unslice(int origin) {
if ((origin < 0) || (origin >= VLENGTH)) {
throw new ArrayIndexOutOfBoundsException("Index " + origin + " out of bounds for vector length " + VLENGTH);
} else {
Byte256Shuffle Iota = iotaShuffle();
VectorMask<Byte> BlendMask = Iota.toVector().compare(VectorOperators.GE, (broadcast((byte)(origin))));
Iota = iotaShuffle(-origin, 1, true);
return ZERO.blend(this.rearrange(Iota), BlendMask);
}
}
@Override
@ForceInline
public Byte256Vector rearrange(VectorShuffle<Byte> s) {
return (Byte256Vector)
super.rearrangeTemplate(Byte256Shuffle.class,
(Byte256Shuffle) s); // specialize
}
@Override
@ForceInline
public Byte256Vector rearrange(VectorShuffle<Byte> shuffle,
VectorMask<Byte> m) {
return (Byte256Vector)
super.rearrangeTemplate(Byte256Shuffle.class,
(Byte256Shuffle) shuffle,
(Byte256Mask) m); // specialize
}
@Override
@ForceInline
public Byte256Vector rearrange(VectorShuffle<Byte> s,
Vector<Byte> v) {
return (Byte256Vector)
super.rearrangeTemplate(Byte256Shuffle.class,
(Byte256Shuffle) s,
(Byte256Vector) v); // specialize
}
@Override
@ForceInline
public Byte256Vector selectFrom(Vector<Byte> v) {
return (Byte256Vector)
super.selectFromTemplate((Byte256Vector) v); // specialize
}
@Override
@ForceInline
public Byte256Vector selectFrom(Vector<Byte> v,
VectorMask<Byte> m) {
return (Byte256Vector)
super.selectFromTemplate((Byte256Vector) v,
(Byte256Mask) m); // specialize
}
@ForceInline
@Override
public byte lane(int i) {
switch(i) {
case 0: return laneHelper(0);
case 1: return laneHelper(1);
case 2: return laneHelper(2);
case 3: return laneHelper(3);
case 4: return laneHelper(4);
case 5: return laneHelper(5);
case 6: return laneHelper(6);
case 7: return laneHelper(7);
case 8: return laneHelper(8);
case 9: return laneHelper(9);
case 10: return laneHelper(10);
case 11: return laneHelper(11);
case 12: return laneHelper(12);
case 13: return laneHelper(13);
case 14: return laneHelper(14);
case 15: return laneHelper(15);
case 16: return laneHelper(16);
case 17: return laneHelper(17);
case 18: return laneHelper(18);
case 19: return laneHelper(19);
case 20: return laneHelper(20);
case 21: return laneHelper(21);
case 22: return laneHelper(22);
case 23: return laneHelper(23);
case 24: return laneHelper(24);
case 25: return laneHelper(25);
case 26: return laneHelper(26);
case 27: return laneHelper(27);
case 28: return laneHelper(28);
case 29: return laneHelper(29);
case 30: return laneHelper(30);
case 31: return laneHelper(31);
default: throw new IllegalArgumentException("Index " + i + " must be zero or positive, and less than " + VLENGTH);
}
}
public byte laneHelper(int i) {
return (byte) VectorSupport.extract(
VCLASS, ETYPE, VLENGTH,
this, i,
(vec, ix) -> {
byte[] vecarr = vec.vec();
return (long)vecarr[ix];
});
}
@ForceInline
@Override
public Byte256Vector withLane(int i, byte e) {
switch (i) {
case 0: return withLaneHelper(0, e);
case 1: return withLaneHelper(1, e);
case 2: return withLaneHelper(2, e);
case 3: return withLaneHelper(3, e);
case 4: return withLaneHelper(4, e);
case 5: return withLaneHelper(5, e);
case 6: return withLaneHelper(6, e);
case 7: return withLaneHelper(7, e);
case 8: return withLaneHelper(8, e);
case 9: return withLaneHelper(9, e);
case 10: return withLaneHelper(10, e);
case 11: return withLaneHelper(11, e);
case 12: return withLaneHelper(12, e);
case 13: return withLaneHelper(13, e);
case 14: return withLaneHelper(14, e);
case 15: return withLaneHelper(15, e);
case 16: return withLaneHelper(16, e);
case 17: return withLaneHelper(17, e);
case 18: return withLaneHelper(18, e);
case 19: return withLaneHelper(19, e);
case 20: return withLaneHelper(20, e);
case 21: return withLaneHelper(21, e);
case 22: return withLaneHelper(22, e);
case 23: return withLaneHelper(23, e);
case 24: return withLaneHelper(24, e);
case 25: return withLaneHelper(25, e);
case 26: return withLaneHelper(26, e);
case 27: return withLaneHelper(27, e);
case 28: return withLaneHelper(28, e);
case 29: return withLaneHelper(29, e);
case 30: return withLaneHelper(30, e);
case 31: return withLaneHelper(31, e);
default: throw new IllegalArgumentException("Index " + i + " must be zero or positive, and less than " + VLENGTH);
}
}
public Byte256Vector withLaneHelper(int i, byte e) {
return VectorSupport.insert(
VCLASS, ETYPE, VLENGTH,
this, i, (long)e,
(v, ix, bits) -> {
byte[] res = v.vec().clone();
res[ix] = (byte)bits;
return v.vectorFactory(res);
});
}
// Mask
static final class Byte256Mask extends AbstractMask<Byte> {
static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM
static final Class<Byte> ETYPE = byte.class; // used by the JVM
Byte256Mask(boolean[] bits) {
this(bits, 0);
}
Byte256Mask(boolean[] bits, int offset) {
super(prepare(bits, offset));
}
Byte256Mask(boolean val) {
super(prepare(val));
}
private static boolean[] prepare(boolean[] bits, int offset) {
boolean[] newBits = new boolean[VSPECIES.laneCount()];
for (int i = 0; i < newBits.length; i++) {
newBits[i] = bits[offset + i];
}
return newBits;
}
private static boolean[] prepare(boolean val) {
boolean[] bits = new boolean[VSPECIES.laneCount()];
Arrays.fill(bits, val);
return bits;
}
@ForceInline
final @Override
public ByteSpecies vspecies() {
// ISSUE: This should probably be a @Stable
// field inside AbstractMask, rather than
// a megamorphic method.
return VSPECIES;
}
@ForceInline
boolean[] getBits() {
return (boolean[])getPayload();
}
@Override
Byte256Mask uOp(MUnOp f) {
boolean[] res = new boolean[vspecies().laneCount()];
boolean[] bits = getBits();
for (int i = 0; i < res.length; i++) {
res[i] = f.apply(i, bits[i]);
}
return new Byte256Mask(res);
}
@Override
Byte256Mask bOp(VectorMask<Byte> m, MBinOp f) {
boolean[] res = new boolean[vspecies().laneCount()];
boolean[] bits = getBits();
boolean[] mbits = ((Byte256Mask)m).getBits();
for (int i = 0; i < res.length; i++) {
res[i] = f.apply(i, bits[i], mbits[i]);
}
return new Byte256Mask(res);
}
@ForceInline
@Override
public final
Byte256Vector toVector() {
return (Byte256Vector) super.toVectorTemplate(); // specialize
}
@Override
@ForceInline
public <E> VectorMask<E> cast(VectorSpecies<E> s) {
AbstractSpecies<E> species = (AbstractSpecies<E>) s;
if (length() != species.laneCount())
throw new IllegalArgumentException("VectorMask length and species length differ");
boolean[] maskArray = toArray();
// enum-switches don't optimize properly JDK-8161245
switch (species.laneType.switchKey) {
case LaneType.SK_BYTE:
return new Byte256Vector.Byte256Mask(maskArray).check(species);
case LaneType.SK_SHORT:
return new Short256Vector.Short256Mask(maskArray).check(species);
case LaneType.SK_INT:
return new Int256Vector.Int256Mask(maskArray).check(species);
case LaneType.SK_LONG:
return new Long256Vector.Long256Mask(maskArray).check(species);
case LaneType.SK_FLOAT:
return new Float256Vector.Float256Mask(maskArray).check(species);
case LaneType.SK_DOUBLE:
return new Double256Vector.Double256Mask(maskArray).check(species);
}
// Should not reach here.
throw new AssertionError(species);
}
// Unary operations
@Override
@ForceInline
public Byte256Mask not() {
return xor(maskAll(true));
}
// Binary operations
@Override
@ForceInline
public Byte256Mask and(VectorMask<Byte> mask) {
Objects.requireNonNull(mask);
Byte256Mask m = (Byte256Mask)mask;
return VectorSupport.binaryOp(VECTOR_OP_AND, Byte256Mask.class, byte.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a & b));
}
@Override
@ForceInline
public Byte256Mask or(VectorMask<Byte> mask) {
Objects.requireNonNull(mask);
Byte256Mask m = (Byte256Mask)mask;
return VectorSupport.binaryOp(VECTOR_OP_OR, Byte256Mask.class, byte.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a | b));
}
@ForceInline
/* package-private */
Byte256Mask xor(VectorMask<Byte> mask) {
Objects.requireNonNull(mask);
Byte256Mask m = (Byte256Mask)mask;
return VectorSupport.binaryOp(VECTOR_OP_XOR, Byte256Mask.class, byte.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a ^ b));
}
// Reductions
@Override
@ForceInline
public boolean anyTrue() {
return VectorSupport.test(BT_ne, Byte256Mask.class, byte.class, VLENGTH,
this, vspecies().maskAll(true),
(m, __) -> anyTrueHelper(((Byte256Mask)m).getBits()));
}
@Override
@ForceInline
public boolean allTrue() {
return VectorSupport.test(BT_overflow, Byte256Mask.class, byte.class, VLENGTH,
this, vspecies().maskAll(true),
(m, __) -> allTrueHelper(((Byte256Mask)m).getBits()));
}
@ForceInline
/*package-private*/
static Byte256Mask maskAll(boolean bit) {
return VectorSupport.broadcastCoerced(Byte256Mask.class, byte.class, VLENGTH,
(bit ? -1 : 0), null,
(v, __) -> (v != 0 ? TRUE_MASK : FALSE_MASK));
}
private static final Byte256Mask TRUE_MASK = new Byte256Mask(true);
private static final Byte256Mask FALSE_MASK = new Byte256Mask(false);
}
// Shuffle
static final class Byte256Shuffle extends AbstractShuffle<Byte> {
static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM
static final Class<Byte> ETYPE = byte.class; // used by the JVM
Byte256Shuffle(byte[] reorder) {
super(VLENGTH, reorder);
}
public Byte256Shuffle(int[] reorder) {
super(VLENGTH, reorder);
}
public Byte256Shuffle(int[] reorder, int i) {
super(VLENGTH, reorder, i);
}
public Byte256Shuffle(IntUnaryOperator fn) {
super(VLENGTH, fn);
}
@Override
public ByteSpecies vspecies() {
return VSPECIES;
}
static {
// There must be enough bits in the shuffle lanes to encode
// VLENGTH valid indexes and VLENGTH exceptional ones.
assert(VLENGTH < Byte.MAX_VALUE);
assert(Byte.MIN_VALUE <= -VLENGTH);
}
static final Byte256Shuffle IOTA = new Byte256Shuffle(IDENTITY);
@Override
@ForceInline
public Byte256Vector toVector() {
return VectorSupport.shuffleToVector(VCLASS, ETYPE, Byte256Shuffle.class, this, VLENGTH,
(s) -> ((Byte256Vector)(((AbstractShuffle<Byte>)(s)).toVectorTemplate())));
}
@Override
@ForceInline
public <F> VectorShuffle<F> cast(VectorSpecies<F> s) {
AbstractSpecies<F> species = (AbstractSpecies<F>) s;
if (length() != species.laneCount())
throw new IllegalArgumentException("VectorShuffle length and species length differ");
int[] shuffleArray = toArray();
// enum-switches don't optimize properly JDK-8161245
switch (species.laneType.switchKey) {
case LaneType.SK_BYTE:
return new Byte256Vector.Byte256Shuffle(shuffleArray).check(species);
case LaneType.SK_SHORT:
return new Short256Vector.Short256Shuffle(shuffleArray).check(species);
case LaneType.SK_INT:
return new Int256Vector.Int256Shuffle(shuffleArray).check(species);
case LaneType.SK_LONG:
return new Long256Vector.Long256Shuffle(shuffleArray).check(species);
case LaneType.SK_FLOAT:
return new Float256Vector.Float256Shuffle(shuffleArray).check(species);
case LaneType.SK_DOUBLE:
return new Double256Vector.Double256Shuffle(shuffleArray).check(species);
}
// Should not reach here.
throw new AssertionError(species);
}
@ForceInline
@Override
public Byte256Shuffle rearrange(VectorShuffle<Byte> shuffle) {
Byte256Shuffle s = (Byte256Shuffle) shuffle;
byte[] reorder1 = reorder();
byte[] reorder2 = s.reorder();
byte[] r = new byte[reorder1.length];
for (int i = 0; i < reorder1.length; i++) {
int ssi = reorder2[i];
r[i] = reorder1[ssi]; // throws on exceptional index
}
return new Byte256Shuffle(r);
}
}
// ================================================
// Specialized low-level memory operations.
@ForceInline
@Override
final
ByteVector fromArray0(byte[] a, int offset) {
return super.fromArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
ByteVector fromByteArray0(byte[] a, int offset) {
return super.fromByteArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
ByteVector fromByteBuffer0(ByteBuffer bb, int offset) {
return super.fromByteBuffer0Template(bb, offset); // specialize
}
@ForceInline
@Override
final
void intoArray0(byte[] a, int offset) {
super.intoArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
void intoByteArray0(byte[] a, int offset) {
super.intoByteArray0Template(a, offset); // specialize
}
// End of specialized low-level memory operations.
// ================================================
}

View file

@ -0,0 +1,936 @@
/*
* Copyright (c) 2017, 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
* particular file as subject to the "Classpath" exception as provided
* by Oracle in the LICENSE file that accompanied this code.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package jdk.incubator.vector;
import java.nio.ByteBuffer;
import java.util.Arrays;
import java.util.Objects;
import java.util.function.IntUnaryOperator;
import jdk.internal.vm.annotation.ForceInline;
import jdk.internal.vm.vector.VectorSupport;
import static jdk.internal.vm.vector.VectorSupport.*;
import static jdk.incubator.vector.VectorOperators.*;
// -- This file was mechanically generated: Do not edit! -- //
@SuppressWarnings("cast") // warning: redundant cast
final class Byte512Vector extends ByteVector {
static final ByteSpecies VSPECIES =
(ByteSpecies) ByteVector.SPECIES_512;
static final VectorShape VSHAPE =
VSPECIES.vectorShape();
static final Class<Byte512Vector> VCLASS = Byte512Vector.class;
static final int VSIZE = VSPECIES.vectorBitSize();
static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM
static final Class<Byte> ETYPE = byte.class; // used by the JVM
Byte512Vector(byte[] v) {
super(v);
}
// For compatibility as Byte512Vector::new,
// stored into species.vectorFactory.
Byte512Vector(Object v) {
this((byte[]) v);
}
static final Byte512Vector ZERO = new Byte512Vector(new byte[VLENGTH]);
static final Byte512Vector IOTA = new Byte512Vector(VSPECIES.iotaArray());
static {
// Warm up a few species caches.
// If we do this too much we will
// get NPEs from bootstrap circularity.
VSPECIES.dummyVector();
VSPECIES.withLanes(LaneType.BYTE);
}
// Specialized extractors
@ForceInline
final @Override
public ByteSpecies vspecies() {
// ISSUE: This should probably be a @Stable
// field inside AbstractVector, rather than
// a megamorphic method.
return VSPECIES;
}
@ForceInline
@Override
public final Class<Byte> elementType() { return byte.class; }
@ForceInline
@Override
public final int elementSize() { return Byte.SIZE; }
@ForceInline
@Override
public final VectorShape shape() { return VSHAPE; }
@ForceInline
@Override
public final int length() { return VLENGTH; }
@ForceInline
@Override
public final int bitSize() { return VSIZE; }
@ForceInline
@Override
public final int byteSize() { return VSIZE / Byte.SIZE; }
/*package-private*/
@ForceInline
final @Override
byte[] vec() {
return (byte[])getPayload();
}
// Virtualized constructors
@Override
@ForceInline
public final Byte512Vector broadcast(byte e) {
return (Byte512Vector) super.broadcastTemplate(e); // specialize
}
@Override
@ForceInline
public final Byte512Vector broadcast(long e) {
return (Byte512Vector) super.broadcastTemplate(e); // specialize
}
@Override
@ForceInline
Byte512Mask maskFromArray(boolean[] bits) {
return new Byte512Mask(bits);
}
@Override
@ForceInline
Byte512Shuffle iotaShuffle() { return Byte512Shuffle.IOTA; }
@ForceInline
Byte512Shuffle iotaShuffle(int start, int step, boolean wrap) {
if (wrap) {
return (Byte512Shuffle)VectorSupport.shuffleIota(ETYPE, Byte512Shuffle.class, VSPECIES, VLENGTH, start, step, 1,
(l, lstart, lstep, s) -> s.shuffleFromOp(i -> (VectorIntrinsics.wrapToRange(i*lstep + lstart, l))));
} else {
return (Byte512Shuffle)VectorSupport.shuffleIota(ETYPE, Byte512Shuffle.class, VSPECIES, VLENGTH, start, step, 0,
(l, lstart, lstep, s) -> s.shuffleFromOp(i -> (i*lstep + lstart)));
}
}
@Override
@ForceInline
Byte512Shuffle shuffleFromBytes(byte[] reorder) { return new Byte512Shuffle(reorder); }
@Override
@ForceInline
Byte512Shuffle shuffleFromArray(int[] indexes, int i) { return new Byte512Shuffle(indexes, i); }
@Override
@ForceInline
Byte512Shuffle shuffleFromOp(IntUnaryOperator fn) { return new Byte512Shuffle(fn); }
// Make a vector of the same species but the given elements:
@ForceInline
final @Override
Byte512Vector vectorFactory(byte[] vec) {
return new Byte512Vector(vec);
}
@ForceInline
final @Override
Byte512Vector asByteVectorRaw() {
return (Byte512Vector) super.asByteVectorRawTemplate(); // specialize
}
@ForceInline
final @Override
AbstractVector<?> asVectorRaw(LaneType laneType) {
return super.asVectorRawTemplate(laneType); // specialize
}
// Unary operator
@ForceInline
final @Override
Byte512Vector uOp(FUnOp f) {
return (Byte512Vector) super.uOpTemplate(f); // specialize
}
@ForceInline
final @Override
Byte512Vector uOp(VectorMask<Byte> m, FUnOp f) {
return (Byte512Vector)
super.uOpTemplate((Byte512Mask)m, f); // specialize
}
// Binary operator
@ForceInline
final @Override
Byte512Vector bOp(Vector<Byte> v, FBinOp f) {
return (Byte512Vector) super.bOpTemplate((Byte512Vector)v, f); // specialize
}
@ForceInline
final @Override
Byte512Vector bOp(Vector<Byte> v,
VectorMask<Byte> m, FBinOp f) {
return (Byte512Vector)
super.bOpTemplate((Byte512Vector)v, (Byte512Mask)m,
f); // specialize
}
// Ternary operator
@ForceInline
final @Override
Byte512Vector tOp(Vector<Byte> v1, Vector<Byte> v2, FTriOp f) {
return (Byte512Vector)
super.tOpTemplate((Byte512Vector)v1, (Byte512Vector)v2,
f); // specialize
}
@ForceInline
final @Override
Byte512Vector tOp(Vector<Byte> v1, Vector<Byte> v2,
VectorMask<Byte> m, FTriOp f) {
return (Byte512Vector)
super.tOpTemplate((Byte512Vector)v1, (Byte512Vector)v2,
(Byte512Mask)m, f); // specialize
}
@ForceInline
final @Override
byte rOp(byte v, FBinOp f) {
return super.rOpTemplate(v, f); // specialize
}
@Override
@ForceInline
public final <F>
Vector<F> convertShape(VectorOperators.Conversion<Byte,F> conv,
VectorSpecies<F> rsp, int part) {
return super.convertShapeTemplate(conv, rsp, part); // specialize
}
@Override
@ForceInline
public final <F>
Vector<F> reinterpretShape(VectorSpecies<F> toSpecies, int part) {
return super.reinterpretShapeTemplate(toSpecies, part); // specialize
}
// Specialized algebraic operations:
// The following definition forces a specialized version of this
// crucial method into the v-table of this class. A call to add()
// will inline to a call to lanewise(ADD,), at which point the JIT
// intrinsic will have the opcode of ADD, plus all the metadata
// for this particular class, enabling it to generate precise
// code.
//
// There is probably no benefit to the JIT to specialize the
// masked or broadcast versions of the lanewise method.
@Override
@ForceInline
public Byte512Vector lanewise(Unary op) {
return (Byte512Vector) super.lanewiseTemplate(op); // specialize
}
@Override
@ForceInline
public Byte512Vector lanewise(Binary op, Vector<Byte> v) {
return (Byte512Vector) super.lanewiseTemplate(op, v); // specialize
}
/*package-private*/
@Override
@ForceInline Byte512Vector
lanewiseShift(VectorOperators.Binary op, int e) {
return (Byte512Vector) super.lanewiseShiftTemplate(op, e); // specialize
}
/*package-private*/
@Override
@ForceInline
public final
Byte512Vector
lanewise(VectorOperators.Ternary op, Vector<Byte> v1, Vector<Byte> v2) {
return (Byte512Vector) super.lanewiseTemplate(op, v1, v2); // specialize
}
@Override
@ForceInline
public final
Byte512Vector addIndex(int scale) {
return (Byte512Vector) super.addIndexTemplate(scale); // specialize
}
// Type specific horizontal reductions
@Override
@ForceInline
public final byte reduceLanes(VectorOperators.Associative op) {
return super.reduceLanesTemplate(op); // specialized
}
@Override
@ForceInline
public final byte reduceLanes(VectorOperators.Associative op,
VectorMask<Byte> m) {
return super.reduceLanesTemplate(op, m); // specialized
}
@Override
@ForceInline
public final long reduceLanesToLong(VectorOperators.Associative op) {
return (long) super.reduceLanesTemplate(op); // specialized
}
@Override
@ForceInline
public final long reduceLanesToLong(VectorOperators.Associative op,
VectorMask<Byte> m) {
return (long) super.reduceLanesTemplate(op, m); // specialized
}
@Override
@ForceInline
public VectorShuffle<Byte> toShuffle() {
byte[] a = toArray();
int[] sa = new int[a.length];
for (int i = 0; i < a.length; i++) {
sa[i] = (int) a[i];
}
return VectorShuffle.fromArray(VSPECIES, sa, 0);
}
// Specialized unary testing
@Override
@ForceInline
public final Byte512Mask test(Test op) {
return super.testTemplate(Byte512Mask.class, op); // specialize
}
// Specialized comparisons
@Override
@ForceInline
public final Byte512Mask compare(Comparison op, Vector<Byte> v) {
return super.compareTemplate(Byte512Mask.class, op, v); // specialize
}
@Override
@ForceInline
public final Byte512Mask compare(Comparison op, byte s) {
return super.compareTemplate(Byte512Mask.class, op, s); // specialize
}
@Override
@ForceInline
public final Byte512Mask compare(Comparison op, long s) {
return super.compareTemplate(Byte512Mask.class, op, s); // specialize
}
@Override
@ForceInline
public Byte512Vector blend(Vector<Byte> v, VectorMask<Byte> m) {
return (Byte512Vector)
super.blendTemplate(Byte512Mask.class,
(Byte512Vector) v,
(Byte512Mask) m); // specialize
}
@Override
@ForceInline
public Byte512Vector slice(int origin, Vector<Byte> v) {
return (Byte512Vector) super.sliceTemplate(origin, v); // specialize
}
@Override
@ForceInline
public Byte512Vector slice(int origin) {
if ((origin < 0) || (origin >= VLENGTH)) {
throw new ArrayIndexOutOfBoundsException("Index " + origin + " out of bounds for vector length " + VLENGTH);
} else {
Byte512Shuffle Iota = iotaShuffle();
VectorMask<Byte> BlendMask = Iota.toVector().compare(VectorOperators.LT, (broadcast((byte)(VLENGTH-origin))));
Iota = iotaShuffle(origin, 1, true);
return ZERO.blend(this.rearrange(Iota), BlendMask);
}
}
@Override
@ForceInline
public Byte512Vector unslice(int origin, Vector<Byte> w, int part) {
return (Byte512Vector) super.unsliceTemplate(origin, w, part); // specialize
}
@Override
@ForceInline
public Byte512Vector unslice(int origin, Vector<Byte> w, int part, VectorMask<Byte> m) {
return (Byte512Vector)
super.unsliceTemplate(Byte512Mask.class,
origin, w, part,
(Byte512Mask) m); // specialize
}
@Override
@ForceInline
public Byte512Vector unslice(int origin) {
if ((origin < 0) || (origin >= VLENGTH)) {
throw new ArrayIndexOutOfBoundsException("Index " + origin + " out of bounds for vector length " + VLENGTH);
} else {
Byte512Shuffle Iota = iotaShuffle();
VectorMask<Byte> BlendMask = Iota.toVector().compare(VectorOperators.GE, (broadcast((byte)(origin))));
Iota = iotaShuffle(-origin, 1, true);
return ZERO.blend(this.rearrange(Iota), BlendMask);
}
}
@Override
@ForceInline
public Byte512Vector rearrange(VectorShuffle<Byte> s) {
return (Byte512Vector)
super.rearrangeTemplate(Byte512Shuffle.class,
(Byte512Shuffle) s); // specialize
}
@Override
@ForceInline
public Byte512Vector rearrange(VectorShuffle<Byte> shuffle,
VectorMask<Byte> m) {
return (Byte512Vector)
super.rearrangeTemplate(Byte512Shuffle.class,
(Byte512Shuffle) shuffle,
(Byte512Mask) m); // specialize
}
@Override
@ForceInline
public Byte512Vector rearrange(VectorShuffle<Byte> s,
Vector<Byte> v) {
return (Byte512Vector)
super.rearrangeTemplate(Byte512Shuffle.class,
(Byte512Shuffle) s,
(Byte512Vector) v); // specialize
}
@Override
@ForceInline
public Byte512Vector selectFrom(Vector<Byte> v) {
return (Byte512Vector)
super.selectFromTemplate((Byte512Vector) v); // specialize
}
@Override
@ForceInline
public Byte512Vector selectFrom(Vector<Byte> v,
VectorMask<Byte> m) {
return (Byte512Vector)
super.selectFromTemplate((Byte512Vector) v,
(Byte512Mask) m); // specialize
}
@ForceInline
@Override
public byte lane(int i) {
switch(i) {
case 0: return laneHelper(0);
case 1: return laneHelper(1);
case 2: return laneHelper(2);
case 3: return laneHelper(3);
case 4: return laneHelper(4);
case 5: return laneHelper(5);
case 6: return laneHelper(6);
case 7: return laneHelper(7);
case 8: return laneHelper(8);
case 9: return laneHelper(9);
case 10: return laneHelper(10);
case 11: return laneHelper(11);
case 12: return laneHelper(12);
case 13: return laneHelper(13);
case 14: return laneHelper(14);
case 15: return laneHelper(15);
case 16: return laneHelper(16);
case 17: return laneHelper(17);
case 18: return laneHelper(18);
case 19: return laneHelper(19);
case 20: return laneHelper(20);
case 21: return laneHelper(21);
case 22: return laneHelper(22);
case 23: return laneHelper(23);
case 24: return laneHelper(24);
case 25: return laneHelper(25);
case 26: return laneHelper(26);
case 27: return laneHelper(27);
case 28: return laneHelper(28);
case 29: return laneHelper(29);
case 30: return laneHelper(30);
case 31: return laneHelper(31);
case 32: return laneHelper(32);
case 33: return laneHelper(33);
case 34: return laneHelper(34);
case 35: return laneHelper(35);
case 36: return laneHelper(36);
case 37: return laneHelper(37);
case 38: return laneHelper(38);
case 39: return laneHelper(39);
case 40: return laneHelper(40);
case 41: return laneHelper(41);
case 42: return laneHelper(42);
case 43: return laneHelper(43);
case 44: return laneHelper(44);
case 45: return laneHelper(45);
case 46: return laneHelper(46);
case 47: return laneHelper(47);
case 48: return laneHelper(48);
case 49: return laneHelper(49);
case 50: return laneHelper(50);
case 51: return laneHelper(51);
case 52: return laneHelper(52);
case 53: return laneHelper(53);
case 54: return laneHelper(54);
case 55: return laneHelper(55);
case 56: return laneHelper(56);
case 57: return laneHelper(57);
case 58: return laneHelper(58);
case 59: return laneHelper(59);
case 60: return laneHelper(60);
case 61: return laneHelper(61);
case 62: return laneHelper(62);
case 63: return laneHelper(63);
default: throw new IllegalArgumentException("Index " + i + " must be zero or positive, and less than " + VLENGTH);
}
}
public byte laneHelper(int i) {
return (byte) VectorSupport.extract(
VCLASS, ETYPE, VLENGTH,
this, i,
(vec, ix) -> {
byte[] vecarr = vec.vec();
return (long)vecarr[ix];
});
}
@ForceInline
@Override
public Byte512Vector withLane(int i, byte e) {
switch (i) {
case 0: return withLaneHelper(0, e);
case 1: return withLaneHelper(1, e);
case 2: return withLaneHelper(2, e);
case 3: return withLaneHelper(3, e);
case 4: return withLaneHelper(4, e);
case 5: return withLaneHelper(5, e);
case 6: return withLaneHelper(6, e);
case 7: return withLaneHelper(7, e);
case 8: return withLaneHelper(8, e);
case 9: return withLaneHelper(9, e);
case 10: return withLaneHelper(10, e);
case 11: return withLaneHelper(11, e);
case 12: return withLaneHelper(12, e);
case 13: return withLaneHelper(13, e);
case 14: return withLaneHelper(14, e);
case 15: return withLaneHelper(15, e);
case 16: return withLaneHelper(16, e);
case 17: return withLaneHelper(17, e);
case 18: return withLaneHelper(18, e);
case 19: return withLaneHelper(19, e);
case 20: return withLaneHelper(20, e);
case 21: return withLaneHelper(21, e);
case 22: return withLaneHelper(22, e);
case 23: return withLaneHelper(23, e);
case 24: return withLaneHelper(24, e);
case 25: return withLaneHelper(25, e);
case 26: return withLaneHelper(26, e);
case 27: return withLaneHelper(27, e);
case 28: return withLaneHelper(28, e);
case 29: return withLaneHelper(29, e);
case 30: return withLaneHelper(30, e);
case 31: return withLaneHelper(31, e);
case 32: return withLaneHelper(32, e);
case 33: return withLaneHelper(33, e);
case 34: return withLaneHelper(34, e);
case 35: return withLaneHelper(35, e);
case 36: return withLaneHelper(36, e);
case 37: return withLaneHelper(37, e);
case 38: return withLaneHelper(38, e);
case 39: return withLaneHelper(39, e);
case 40: return withLaneHelper(40, e);
case 41: return withLaneHelper(41, e);
case 42: return withLaneHelper(42, e);
case 43: return withLaneHelper(43, e);
case 44: return withLaneHelper(44, e);
case 45: return withLaneHelper(45, e);
case 46: return withLaneHelper(46, e);
case 47: return withLaneHelper(47, e);
case 48: return withLaneHelper(48, e);
case 49: return withLaneHelper(49, e);
case 50: return withLaneHelper(50, e);
case 51: return withLaneHelper(51, e);
case 52: return withLaneHelper(52, e);
case 53: return withLaneHelper(53, e);
case 54: return withLaneHelper(54, e);
case 55: return withLaneHelper(55, e);
case 56: return withLaneHelper(56, e);
case 57: return withLaneHelper(57, e);
case 58: return withLaneHelper(58, e);
case 59: return withLaneHelper(59, e);
case 60: return withLaneHelper(60, e);
case 61: return withLaneHelper(61, e);
case 62: return withLaneHelper(62, e);
case 63: return withLaneHelper(63, e);
default: throw new IllegalArgumentException("Index " + i + " must be zero or positive, and less than " + VLENGTH);
}
}
public Byte512Vector withLaneHelper(int i, byte e) {
return VectorSupport.insert(
VCLASS, ETYPE, VLENGTH,
this, i, (long)e,
(v, ix, bits) -> {
byte[] res = v.vec().clone();
res[ix] = (byte)bits;
return v.vectorFactory(res);
});
}
// Mask
static final class Byte512Mask extends AbstractMask<Byte> {
static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM
static final Class<Byte> ETYPE = byte.class; // used by the JVM
Byte512Mask(boolean[] bits) {
this(bits, 0);
}
Byte512Mask(boolean[] bits, int offset) {
super(prepare(bits, offset));
}
Byte512Mask(boolean val) {
super(prepare(val));
}
private static boolean[] prepare(boolean[] bits, int offset) {
boolean[] newBits = new boolean[VSPECIES.laneCount()];
for (int i = 0; i < newBits.length; i++) {
newBits[i] = bits[offset + i];
}
return newBits;
}
private static boolean[] prepare(boolean val) {
boolean[] bits = new boolean[VSPECIES.laneCount()];
Arrays.fill(bits, val);
return bits;
}
@ForceInline
final @Override
public ByteSpecies vspecies() {
// ISSUE: This should probably be a @Stable
// field inside AbstractMask, rather than
// a megamorphic method.
return VSPECIES;
}
@ForceInline
boolean[] getBits() {
return (boolean[])getPayload();
}
@Override
Byte512Mask uOp(MUnOp f) {
boolean[] res = new boolean[vspecies().laneCount()];
boolean[] bits = getBits();
for (int i = 0; i < res.length; i++) {
res[i] = f.apply(i, bits[i]);
}
return new Byte512Mask(res);
}
@Override
Byte512Mask bOp(VectorMask<Byte> m, MBinOp f) {
boolean[] res = new boolean[vspecies().laneCount()];
boolean[] bits = getBits();
boolean[] mbits = ((Byte512Mask)m).getBits();
for (int i = 0; i < res.length; i++) {
res[i] = f.apply(i, bits[i], mbits[i]);
}
return new Byte512Mask(res);
}
@ForceInline
@Override
public final
Byte512Vector toVector() {
return (Byte512Vector) super.toVectorTemplate(); // specialize
}
@Override
@ForceInline
public <E> VectorMask<E> cast(VectorSpecies<E> s) {
AbstractSpecies<E> species = (AbstractSpecies<E>) s;
if (length() != species.laneCount())
throw new IllegalArgumentException("VectorMask length and species length differ");
boolean[] maskArray = toArray();
// enum-switches don't optimize properly JDK-8161245
switch (species.laneType.switchKey) {
case LaneType.SK_BYTE:
return new Byte512Vector.Byte512Mask(maskArray).check(species);
case LaneType.SK_SHORT:
return new Short512Vector.Short512Mask(maskArray).check(species);
case LaneType.SK_INT:
return new Int512Vector.Int512Mask(maskArray).check(species);
case LaneType.SK_LONG:
return new Long512Vector.Long512Mask(maskArray).check(species);
case LaneType.SK_FLOAT:
return new Float512Vector.Float512Mask(maskArray).check(species);
case LaneType.SK_DOUBLE:
return new Double512Vector.Double512Mask(maskArray).check(species);
}
// Should not reach here.
throw new AssertionError(species);
}
// Unary operations
@Override
@ForceInline
public Byte512Mask not() {
return xor(maskAll(true));
}
// Binary operations
@Override
@ForceInline
public Byte512Mask and(VectorMask<Byte> mask) {
Objects.requireNonNull(mask);
Byte512Mask m = (Byte512Mask)mask;
return VectorSupport.binaryOp(VECTOR_OP_AND, Byte512Mask.class, byte.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a & b));
}
@Override
@ForceInline
public Byte512Mask or(VectorMask<Byte> mask) {
Objects.requireNonNull(mask);
Byte512Mask m = (Byte512Mask)mask;
return VectorSupport.binaryOp(VECTOR_OP_OR, Byte512Mask.class, byte.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a | b));
}
@ForceInline
/* package-private */
Byte512Mask xor(VectorMask<Byte> mask) {
Objects.requireNonNull(mask);
Byte512Mask m = (Byte512Mask)mask;
return VectorSupport.binaryOp(VECTOR_OP_XOR, Byte512Mask.class, byte.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a ^ b));
}
// Reductions
@Override
@ForceInline
public boolean anyTrue() {
return VectorSupport.test(BT_ne, Byte512Mask.class, byte.class, VLENGTH,
this, vspecies().maskAll(true),
(m, __) -> anyTrueHelper(((Byte512Mask)m).getBits()));
}
@Override
@ForceInline
public boolean allTrue() {
return VectorSupport.test(BT_overflow, Byte512Mask.class, byte.class, VLENGTH,
this, vspecies().maskAll(true),
(m, __) -> allTrueHelper(((Byte512Mask)m).getBits()));
}
@ForceInline
/*package-private*/
static Byte512Mask maskAll(boolean bit) {
return VectorSupport.broadcastCoerced(Byte512Mask.class, byte.class, VLENGTH,
(bit ? -1 : 0), null,
(v, __) -> (v != 0 ? TRUE_MASK : FALSE_MASK));
}
private static final Byte512Mask TRUE_MASK = new Byte512Mask(true);
private static final Byte512Mask FALSE_MASK = new Byte512Mask(false);
}
// Shuffle
static final class Byte512Shuffle extends AbstractShuffle<Byte> {
static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM
static final Class<Byte> ETYPE = byte.class; // used by the JVM
Byte512Shuffle(byte[] reorder) {
super(VLENGTH, reorder);
}
public Byte512Shuffle(int[] reorder) {
super(VLENGTH, reorder);
}
public Byte512Shuffle(int[] reorder, int i) {
super(VLENGTH, reorder, i);
}
public Byte512Shuffle(IntUnaryOperator fn) {
super(VLENGTH, fn);
}
@Override
public ByteSpecies vspecies() {
return VSPECIES;
}
static {
// There must be enough bits in the shuffle lanes to encode
// VLENGTH valid indexes and VLENGTH exceptional ones.
assert(VLENGTH < Byte.MAX_VALUE);
assert(Byte.MIN_VALUE <= -VLENGTH);
}
static final Byte512Shuffle IOTA = new Byte512Shuffle(IDENTITY);
@Override
@ForceInline
public Byte512Vector toVector() {
return VectorSupport.shuffleToVector(VCLASS, ETYPE, Byte512Shuffle.class, this, VLENGTH,
(s) -> ((Byte512Vector)(((AbstractShuffle<Byte>)(s)).toVectorTemplate())));
}
@Override
@ForceInline
public <F> VectorShuffle<F> cast(VectorSpecies<F> s) {
AbstractSpecies<F> species = (AbstractSpecies<F>) s;
if (length() != species.laneCount())
throw new IllegalArgumentException("VectorShuffle length and species length differ");
int[] shuffleArray = toArray();
// enum-switches don't optimize properly JDK-8161245
switch (species.laneType.switchKey) {
case LaneType.SK_BYTE:
return new Byte512Vector.Byte512Shuffle(shuffleArray).check(species);
case LaneType.SK_SHORT:
return new Short512Vector.Short512Shuffle(shuffleArray).check(species);
case LaneType.SK_INT:
return new Int512Vector.Int512Shuffle(shuffleArray).check(species);
case LaneType.SK_LONG:
return new Long512Vector.Long512Shuffle(shuffleArray).check(species);
case LaneType.SK_FLOAT:
return new Float512Vector.Float512Shuffle(shuffleArray).check(species);
case LaneType.SK_DOUBLE:
return new Double512Vector.Double512Shuffle(shuffleArray).check(species);
}
// Should not reach here.
throw new AssertionError(species);
}
@ForceInline
@Override
public Byte512Shuffle rearrange(VectorShuffle<Byte> shuffle) {
Byte512Shuffle s = (Byte512Shuffle) shuffle;
byte[] reorder1 = reorder();
byte[] reorder2 = s.reorder();
byte[] r = new byte[reorder1.length];
for (int i = 0; i < reorder1.length; i++) {
int ssi = reorder2[i];
r[i] = reorder1[ssi]; // throws on exceptional index
}
return new Byte512Shuffle(r);
}
}
// ================================================
// Specialized low-level memory operations.
@ForceInline
@Override
final
ByteVector fromArray0(byte[] a, int offset) {
return super.fromArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
ByteVector fromByteArray0(byte[] a, int offset) {
return super.fromByteArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
ByteVector fromByteBuffer0(ByteBuffer bb, int offset) {
return super.fromByteBuffer0Template(bb, offset); // specialize
}
@ForceInline
@Override
final
void intoArray0(byte[] a, int offset) {
super.intoArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
void intoByteArray0(byte[] a, int offset) {
super.intoByteArray0Template(a, offset); // specialize
}
// End of specialized low-level memory operations.
// ================================================
}

View file

@ -0,0 +1,824 @@
/*
* Copyright (c) 2017, 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
* particular file as subject to the "Classpath" exception as provided
* by Oracle in the LICENSE file that accompanied this code.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package jdk.incubator.vector;
import java.nio.ByteBuffer;
import java.util.Arrays;
import java.util.Objects;
import java.util.function.IntUnaryOperator;
import jdk.internal.vm.annotation.ForceInline;
import jdk.internal.vm.vector.VectorSupport;
import static jdk.internal.vm.vector.VectorSupport.*;
import static jdk.incubator.vector.VectorOperators.*;
// -- This file was mechanically generated: Do not edit! -- //
@SuppressWarnings("cast") // warning: redundant cast
final class Byte64Vector extends ByteVector {
static final ByteSpecies VSPECIES =
(ByteSpecies) ByteVector.SPECIES_64;
static final VectorShape VSHAPE =
VSPECIES.vectorShape();
static final Class<Byte64Vector> VCLASS = Byte64Vector.class;
static final int VSIZE = VSPECIES.vectorBitSize();
static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM
static final Class<Byte> ETYPE = byte.class; // used by the JVM
Byte64Vector(byte[] v) {
super(v);
}
// For compatibility as Byte64Vector::new,
// stored into species.vectorFactory.
Byte64Vector(Object v) {
this((byte[]) v);
}
static final Byte64Vector ZERO = new Byte64Vector(new byte[VLENGTH]);
static final Byte64Vector IOTA = new Byte64Vector(VSPECIES.iotaArray());
static {
// Warm up a few species caches.
// If we do this too much we will
// get NPEs from bootstrap circularity.
VSPECIES.dummyVector();
VSPECIES.withLanes(LaneType.BYTE);
}
// Specialized extractors
@ForceInline
final @Override
public ByteSpecies vspecies() {
// ISSUE: This should probably be a @Stable
// field inside AbstractVector, rather than
// a megamorphic method.
return VSPECIES;
}
@ForceInline
@Override
public final Class<Byte> elementType() { return byte.class; }
@ForceInline
@Override
public final int elementSize() { return Byte.SIZE; }
@ForceInline
@Override
public final VectorShape shape() { return VSHAPE; }
@ForceInline
@Override
public final int length() { return VLENGTH; }
@ForceInline
@Override
public final int bitSize() { return VSIZE; }
@ForceInline
@Override
public final int byteSize() { return VSIZE / Byte.SIZE; }
/*package-private*/
@ForceInline
final @Override
byte[] vec() {
return (byte[])getPayload();
}
// Virtualized constructors
@Override
@ForceInline
public final Byte64Vector broadcast(byte e) {
return (Byte64Vector) super.broadcastTemplate(e); // specialize
}
@Override
@ForceInline
public final Byte64Vector broadcast(long e) {
return (Byte64Vector) super.broadcastTemplate(e); // specialize
}
@Override
@ForceInline
Byte64Mask maskFromArray(boolean[] bits) {
return new Byte64Mask(bits);
}
@Override
@ForceInline
Byte64Shuffle iotaShuffle() { return Byte64Shuffle.IOTA; }
@ForceInline
Byte64Shuffle iotaShuffle(int start, int step, boolean wrap) {
if (wrap) {
return (Byte64Shuffle)VectorSupport.shuffleIota(ETYPE, Byte64Shuffle.class, VSPECIES, VLENGTH, start, step, 1,
(l, lstart, lstep, s) -> s.shuffleFromOp(i -> (VectorIntrinsics.wrapToRange(i*lstep + lstart, l))));
} else {
return (Byte64Shuffle)VectorSupport.shuffleIota(ETYPE, Byte64Shuffle.class, VSPECIES, VLENGTH, start, step, 0,
(l, lstart, lstep, s) -> s.shuffleFromOp(i -> (i*lstep + lstart)));
}
}
@Override
@ForceInline
Byte64Shuffle shuffleFromBytes(byte[] reorder) { return new Byte64Shuffle(reorder); }
@Override
@ForceInline
Byte64Shuffle shuffleFromArray(int[] indexes, int i) { return new Byte64Shuffle(indexes, i); }
@Override
@ForceInline
Byte64Shuffle shuffleFromOp(IntUnaryOperator fn) { return new Byte64Shuffle(fn); }
// Make a vector of the same species but the given elements:
@ForceInline
final @Override
Byte64Vector vectorFactory(byte[] vec) {
return new Byte64Vector(vec);
}
@ForceInline
final @Override
Byte64Vector asByteVectorRaw() {
return (Byte64Vector) super.asByteVectorRawTemplate(); // specialize
}
@ForceInline
final @Override
AbstractVector<?> asVectorRaw(LaneType laneType) {
return super.asVectorRawTemplate(laneType); // specialize
}
// Unary operator
@ForceInline
final @Override
Byte64Vector uOp(FUnOp f) {
return (Byte64Vector) super.uOpTemplate(f); // specialize
}
@ForceInline
final @Override
Byte64Vector uOp(VectorMask<Byte> m, FUnOp f) {
return (Byte64Vector)
super.uOpTemplate((Byte64Mask)m, f); // specialize
}
// Binary operator
@ForceInline
final @Override
Byte64Vector bOp(Vector<Byte> v, FBinOp f) {
return (Byte64Vector) super.bOpTemplate((Byte64Vector)v, f); // specialize
}
@ForceInline
final @Override
Byte64Vector bOp(Vector<Byte> v,
VectorMask<Byte> m, FBinOp f) {
return (Byte64Vector)
super.bOpTemplate((Byte64Vector)v, (Byte64Mask)m,
f); // specialize
}
// Ternary operator
@ForceInline
final @Override
Byte64Vector tOp(Vector<Byte> v1, Vector<Byte> v2, FTriOp f) {
return (Byte64Vector)
super.tOpTemplate((Byte64Vector)v1, (Byte64Vector)v2,
f); // specialize
}
@ForceInline
final @Override
Byte64Vector tOp(Vector<Byte> v1, Vector<Byte> v2,
VectorMask<Byte> m, FTriOp f) {
return (Byte64Vector)
super.tOpTemplate((Byte64Vector)v1, (Byte64Vector)v2,
(Byte64Mask)m, f); // specialize
}
@ForceInline
final @Override
byte rOp(byte v, FBinOp f) {
return super.rOpTemplate(v, f); // specialize
}
@Override
@ForceInline
public final <F>
Vector<F> convertShape(VectorOperators.Conversion<Byte,F> conv,
VectorSpecies<F> rsp, int part) {
return super.convertShapeTemplate(conv, rsp, part); // specialize
}
@Override
@ForceInline
public final <F>
Vector<F> reinterpretShape(VectorSpecies<F> toSpecies, int part) {
return super.reinterpretShapeTemplate(toSpecies, part); // specialize
}
// Specialized algebraic operations:
// The following definition forces a specialized version of this
// crucial method into the v-table of this class. A call to add()
// will inline to a call to lanewise(ADD,), at which point the JIT
// intrinsic will have the opcode of ADD, plus all the metadata
// for this particular class, enabling it to generate precise
// code.
//
// There is probably no benefit to the JIT to specialize the
// masked or broadcast versions of the lanewise method.
@Override
@ForceInline
public Byte64Vector lanewise(Unary op) {
return (Byte64Vector) super.lanewiseTemplate(op); // specialize
}
@Override
@ForceInline
public Byte64Vector lanewise(Binary op, Vector<Byte> v) {
return (Byte64Vector) super.lanewiseTemplate(op, v); // specialize
}
/*package-private*/
@Override
@ForceInline Byte64Vector
lanewiseShift(VectorOperators.Binary op, int e) {
return (Byte64Vector) super.lanewiseShiftTemplate(op, e); // specialize
}
/*package-private*/
@Override
@ForceInline
public final
Byte64Vector
lanewise(VectorOperators.Ternary op, Vector<Byte> v1, Vector<Byte> v2) {
return (Byte64Vector) super.lanewiseTemplate(op, v1, v2); // specialize
}
@Override
@ForceInline
public final
Byte64Vector addIndex(int scale) {
return (Byte64Vector) super.addIndexTemplate(scale); // specialize
}
// Type specific horizontal reductions
@Override
@ForceInline
public final byte reduceLanes(VectorOperators.Associative op) {
return super.reduceLanesTemplate(op); // specialized
}
@Override
@ForceInline
public final byte reduceLanes(VectorOperators.Associative op,
VectorMask<Byte> m) {
return super.reduceLanesTemplate(op, m); // specialized
}
@Override
@ForceInline
public final long reduceLanesToLong(VectorOperators.Associative op) {
return (long) super.reduceLanesTemplate(op); // specialized
}
@Override
@ForceInline
public final long reduceLanesToLong(VectorOperators.Associative op,
VectorMask<Byte> m) {
return (long) super.reduceLanesTemplate(op, m); // specialized
}
@Override
@ForceInline
public VectorShuffle<Byte> toShuffle() {
byte[] a = toArray();
int[] sa = new int[a.length];
for (int i = 0; i < a.length; i++) {
sa[i] = (int) a[i];
}
return VectorShuffle.fromArray(VSPECIES, sa, 0);
}
// Specialized unary testing
@Override
@ForceInline
public final Byte64Mask test(Test op) {
return super.testTemplate(Byte64Mask.class, op); // specialize
}
// Specialized comparisons
@Override
@ForceInline
public final Byte64Mask compare(Comparison op, Vector<Byte> v) {
return super.compareTemplate(Byte64Mask.class, op, v); // specialize
}
@Override
@ForceInline
public final Byte64Mask compare(Comparison op, byte s) {
return super.compareTemplate(Byte64Mask.class, op, s); // specialize
}
@Override
@ForceInline
public final Byte64Mask compare(Comparison op, long s) {
return super.compareTemplate(Byte64Mask.class, op, s); // specialize
}
@Override
@ForceInline
public Byte64Vector blend(Vector<Byte> v, VectorMask<Byte> m) {
return (Byte64Vector)
super.blendTemplate(Byte64Mask.class,
(Byte64Vector) v,
(Byte64Mask) m); // specialize
}
@Override
@ForceInline
public Byte64Vector slice(int origin, Vector<Byte> v) {
return (Byte64Vector) super.sliceTemplate(origin, v); // specialize
}
@Override
@ForceInline
public Byte64Vector slice(int origin) {
if ((origin < 0) || (origin >= VLENGTH)) {
throw new ArrayIndexOutOfBoundsException("Index " + origin + " out of bounds for vector length " + VLENGTH);
} else {
Byte64Shuffle Iota = iotaShuffle();
VectorMask<Byte> BlendMask = Iota.toVector().compare(VectorOperators.LT, (broadcast((byte)(VLENGTH-origin))));
Iota = iotaShuffle(origin, 1, true);
return ZERO.blend(this.rearrange(Iota), BlendMask);
}
}
@Override
@ForceInline
public Byte64Vector unslice(int origin, Vector<Byte> w, int part) {
return (Byte64Vector) super.unsliceTemplate(origin, w, part); // specialize
}
@Override
@ForceInline
public Byte64Vector unslice(int origin, Vector<Byte> w, int part, VectorMask<Byte> m) {
return (Byte64Vector)
super.unsliceTemplate(Byte64Mask.class,
origin, w, part,
(Byte64Mask) m); // specialize
}
@Override
@ForceInline
public Byte64Vector unslice(int origin) {
if ((origin < 0) || (origin >= VLENGTH)) {
throw new ArrayIndexOutOfBoundsException("Index " + origin + " out of bounds for vector length " + VLENGTH);
} else {
Byte64Shuffle Iota = iotaShuffle();
VectorMask<Byte> BlendMask = Iota.toVector().compare(VectorOperators.GE, (broadcast((byte)(origin))));
Iota = iotaShuffle(-origin, 1, true);
return ZERO.blend(this.rearrange(Iota), BlendMask);
}
}
@Override
@ForceInline
public Byte64Vector rearrange(VectorShuffle<Byte> s) {
return (Byte64Vector)
super.rearrangeTemplate(Byte64Shuffle.class,
(Byte64Shuffle) s); // specialize
}
@Override
@ForceInline
public Byte64Vector rearrange(VectorShuffle<Byte> shuffle,
VectorMask<Byte> m) {
return (Byte64Vector)
super.rearrangeTemplate(Byte64Shuffle.class,
(Byte64Shuffle) shuffle,
(Byte64Mask) m); // specialize
}
@Override
@ForceInline
public Byte64Vector rearrange(VectorShuffle<Byte> s,
Vector<Byte> v) {
return (Byte64Vector)
super.rearrangeTemplate(Byte64Shuffle.class,
(Byte64Shuffle) s,
(Byte64Vector) v); // specialize
}
@Override
@ForceInline
public Byte64Vector selectFrom(Vector<Byte> v) {
return (Byte64Vector)
super.selectFromTemplate((Byte64Vector) v); // specialize
}
@Override
@ForceInline
public Byte64Vector selectFrom(Vector<Byte> v,
VectorMask<Byte> m) {
return (Byte64Vector)
super.selectFromTemplate((Byte64Vector) v,
(Byte64Mask) m); // specialize
}
@ForceInline
@Override
public byte lane(int i) {
switch(i) {
case 0: return laneHelper(0);
case 1: return laneHelper(1);
case 2: return laneHelper(2);
case 3: return laneHelper(3);
case 4: return laneHelper(4);
case 5: return laneHelper(5);
case 6: return laneHelper(6);
case 7: return laneHelper(7);
default: throw new IllegalArgumentException("Index " + i + " must be zero or positive, and less than " + VLENGTH);
}
}
public byte laneHelper(int i) {
return (byte) VectorSupport.extract(
VCLASS, ETYPE, VLENGTH,
this, i,
(vec, ix) -> {
byte[] vecarr = vec.vec();
return (long)vecarr[ix];
});
}
@ForceInline
@Override
public Byte64Vector withLane(int i, byte e) {
switch (i) {
case 0: return withLaneHelper(0, e);
case 1: return withLaneHelper(1, e);
case 2: return withLaneHelper(2, e);
case 3: return withLaneHelper(3, e);
case 4: return withLaneHelper(4, e);
case 5: return withLaneHelper(5, e);
case 6: return withLaneHelper(6, e);
case 7: return withLaneHelper(7, e);
default: throw new IllegalArgumentException("Index " + i + " must be zero or positive, and less than " + VLENGTH);
}
}
public Byte64Vector withLaneHelper(int i, byte e) {
return VectorSupport.insert(
VCLASS, ETYPE, VLENGTH,
this, i, (long)e,
(v, ix, bits) -> {
byte[] res = v.vec().clone();
res[ix] = (byte)bits;
return v.vectorFactory(res);
});
}
// Mask
static final class Byte64Mask extends AbstractMask<Byte> {
static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM
static final Class<Byte> ETYPE = byte.class; // used by the JVM
Byte64Mask(boolean[] bits) {
this(bits, 0);
}
Byte64Mask(boolean[] bits, int offset) {
super(prepare(bits, offset));
}
Byte64Mask(boolean val) {
super(prepare(val));
}
private static boolean[] prepare(boolean[] bits, int offset) {
boolean[] newBits = new boolean[VSPECIES.laneCount()];
for (int i = 0; i < newBits.length; i++) {
newBits[i] = bits[offset + i];
}
return newBits;
}
private static boolean[] prepare(boolean val) {
boolean[] bits = new boolean[VSPECIES.laneCount()];
Arrays.fill(bits, val);
return bits;
}
@ForceInline
final @Override
public ByteSpecies vspecies() {
// ISSUE: This should probably be a @Stable
// field inside AbstractMask, rather than
// a megamorphic method.
return VSPECIES;
}
@ForceInline
boolean[] getBits() {
return (boolean[])getPayload();
}
@Override
Byte64Mask uOp(MUnOp f) {
boolean[] res = new boolean[vspecies().laneCount()];
boolean[] bits = getBits();
for (int i = 0; i < res.length; i++) {
res[i] = f.apply(i, bits[i]);
}
return new Byte64Mask(res);
}
@Override
Byte64Mask bOp(VectorMask<Byte> m, MBinOp f) {
boolean[] res = new boolean[vspecies().laneCount()];
boolean[] bits = getBits();
boolean[] mbits = ((Byte64Mask)m).getBits();
for (int i = 0; i < res.length; i++) {
res[i] = f.apply(i, bits[i], mbits[i]);
}
return new Byte64Mask(res);
}
@ForceInline
@Override
public final
Byte64Vector toVector() {
return (Byte64Vector) super.toVectorTemplate(); // specialize
}
@Override
@ForceInline
public <E> VectorMask<E> cast(VectorSpecies<E> s) {
AbstractSpecies<E> species = (AbstractSpecies<E>) s;
if (length() != species.laneCount())
throw new IllegalArgumentException("VectorMask length and species length differ");
boolean[] maskArray = toArray();
// enum-switches don't optimize properly JDK-8161245
switch (species.laneType.switchKey) {
case LaneType.SK_BYTE:
return new Byte64Vector.Byte64Mask(maskArray).check(species);
case LaneType.SK_SHORT:
return new Short64Vector.Short64Mask(maskArray).check(species);
case LaneType.SK_INT:
return new Int64Vector.Int64Mask(maskArray).check(species);
case LaneType.SK_LONG:
return new Long64Vector.Long64Mask(maskArray).check(species);
case LaneType.SK_FLOAT:
return new Float64Vector.Float64Mask(maskArray).check(species);
case LaneType.SK_DOUBLE:
return new Double64Vector.Double64Mask(maskArray).check(species);
}
// Should not reach here.
throw new AssertionError(species);
}
// Unary operations
@Override
@ForceInline
public Byte64Mask not() {
return xor(maskAll(true));
}
// Binary operations
@Override
@ForceInline
public Byte64Mask and(VectorMask<Byte> mask) {
Objects.requireNonNull(mask);
Byte64Mask m = (Byte64Mask)mask;
return VectorSupport.binaryOp(VECTOR_OP_AND, Byte64Mask.class, byte.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a & b));
}
@Override
@ForceInline
public Byte64Mask or(VectorMask<Byte> mask) {
Objects.requireNonNull(mask);
Byte64Mask m = (Byte64Mask)mask;
return VectorSupport.binaryOp(VECTOR_OP_OR, Byte64Mask.class, byte.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a | b));
}
@ForceInline
/* package-private */
Byte64Mask xor(VectorMask<Byte> mask) {
Objects.requireNonNull(mask);
Byte64Mask m = (Byte64Mask)mask;
return VectorSupport.binaryOp(VECTOR_OP_XOR, Byte64Mask.class, byte.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a ^ b));
}
// Reductions
@Override
@ForceInline
public boolean anyTrue() {
return VectorSupport.test(BT_ne, Byte64Mask.class, byte.class, VLENGTH,
this, vspecies().maskAll(true),
(m, __) -> anyTrueHelper(((Byte64Mask)m).getBits()));
}
@Override
@ForceInline
public boolean allTrue() {
return VectorSupport.test(BT_overflow, Byte64Mask.class, byte.class, VLENGTH,
this, vspecies().maskAll(true),
(m, __) -> allTrueHelper(((Byte64Mask)m).getBits()));
}
@ForceInline
/*package-private*/
static Byte64Mask maskAll(boolean bit) {
return VectorSupport.broadcastCoerced(Byte64Mask.class, byte.class, VLENGTH,
(bit ? -1 : 0), null,
(v, __) -> (v != 0 ? TRUE_MASK : FALSE_MASK));
}
private static final Byte64Mask TRUE_MASK = new Byte64Mask(true);
private static final Byte64Mask FALSE_MASK = new Byte64Mask(false);
}
// Shuffle
static final class Byte64Shuffle extends AbstractShuffle<Byte> {
static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM
static final Class<Byte> ETYPE = byte.class; // used by the JVM
Byte64Shuffle(byte[] reorder) {
super(VLENGTH, reorder);
}
public Byte64Shuffle(int[] reorder) {
super(VLENGTH, reorder);
}
public Byte64Shuffle(int[] reorder, int i) {
super(VLENGTH, reorder, i);
}
public Byte64Shuffle(IntUnaryOperator fn) {
super(VLENGTH, fn);
}
@Override
public ByteSpecies vspecies() {
return VSPECIES;
}
static {
// There must be enough bits in the shuffle lanes to encode
// VLENGTH valid indexes and VLENGTH exceptional ones.
assert(VLENGTH < Byte.MAX_VALUE);
assert(Byte.MIN_VALUE <= -VLENGTH);
}
static final Byte64Shuffle IOTA = new Byte64Shuffle(IDENTITY);
@Override
@ForceInline
public Byte64Vector toVector() {
return VectorSupport.shuffleToVector(VCLASS, ETYPE, Byte64Shuffle.class, this, VLENGTH,
(s) -> ((Byte64Vector)(((AbstractShuffle<Byte>)(s)).toVectorTemplate())));
}
@Override
@ForceInline
public <F> VectorShuffle<F> cast(VectorSpecies<F> s) {
AbstractSpecies<F> species = (AbstractSpecies<F>) s;
if (length() != species.laneCount())
throw new IllegalArgumentException("VectorShuffle length and species length differ");
int[] shuffleArray = toArray();
// enum-switches don't optimize properly JDK-8161245
switch (species.laneType.switchKey) {
case LaneType.SK_BYTE:
return new Byte64Vector.Byte64Shuffle(shuffleArray).check(species);
case LaneType.SK_SHORT:
return new Short64Vector.Short64Shuffle(shuffleArray).check(species);
case LaneType.SK_INT:
return new Int64Vector.Int64Shuffle(shuffleArray).check(species);
case LaneType.SK_LONG:
return new Long64Vector.Long64Shuffle(shuffleArray).check(species);
case LaneType.SK_FLOAT:
return new Float64Vector.Float64Shuffle(shuffleArray).check(species);
case LaneType.SK_DOUBLE:
return new Double64Vector.Double64Shuffle(shuffleArray).check(species);
}
// Should not reach here.
throw new AssertionError(species);
}
@ForceInline
@Override
public Byte64Shuffle rearrange(VectorShuffle<Byte> shuffle) {
Byte64Shuffle s = (Byte64Shuffle) shuffle;
byte[] reorder1 = reorder();
byte[] reorder2 = s.reorder();
byte[] r = new byte[reorder1.length];
for (int i = 0; i < reorder1.length; i++) {
int ssi = reorder2[i];
r[i] = reorder1[ssi]; // throws on exceptional index
}
return new Byte64Shuffle(r);
}
}
// ================================================
// Specialized low-level memory operations.
@ForceInline
@Override
final
ByteVector fromArray0(byte[] a, int offset) {
return super.fromArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
ByteVector fromByteArray0(byte[] a, int offset) {
return super.fromByteArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
ByteVector fromByteBuffer0(ByteBuffer bb, int offset) {
return super.fromByteBuffer0Template(bb, offset); // specialize
}
@ForceInline
@Override
final
void intoArray0(byte[] a, int offset) {
super.intoArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
void intoByteArray0(byte[] a, int offset) {
super.intoByteArray0Template(a, offset); // specialize
}
// End of specialized low-level memory operations.
// ================================================
}

View file

@ -0,0 +1,810 @@
/*
* Copyright (c) 2017, 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
* particular file as subject to the "Classpath" exception as provided
* by Oracle in the LICENSE file that accompanied this code.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package jdk.incubator.vector;
import java.nio.ByteBuffer;
import java.util.Arrays;
import java.util.Objects;
import java.util.function.IntUnaryOperator;
import jdk.internal.vm.annotation.ForceInline;
import jdk.internal.vm.vector.VectorSupport;
import static jdk.internal.vm.vector.VectorSupport.*;
import static jdk.incubator.vector.VectorOperators.*;
// -- This file was mechanically generated: Do not edit! -- //
@SuppressWarnings("cast") // warning: redundant cast
final class ByteMaxVector extends ByteVector {
static final ByteSpecies VSPECIES =
(ByteSpecies) ByteVector.SPECIES_MAX;
static final VectorShape VSHAPE =
VSPECIES.vectorShape();
static final Class<ByteMaxVector> VCLASS = ByteMaxVector.class;
static final int VSIZE = VSPECIES.vectorBitSize();
static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM
static final Class<Byte> ETYPE = byte.class; // used by the JVM
ByteMaxVector(byte[] v) {
super(v);
}
// For compatibility as ByteMaxVector::new,
// stored into species.vectorFactory.
ByteMaxVector(Object v) {
this((byte[]) v);
}
static final ByteMaxVector ZERO = new ByteMaxVector(new byte[VLENGTH]);
static final ByteMaxVector IOTA = new ByteMaxVector(VSPECIES.iotaArray());
static {
// Warm up a few species caches.
// If we do this too much we will
// get NPEs from bootstrap circularity.
VSPECIES.dummyVector();
VSPECIES.withLanes(LaneType.BYTE);
}
// Specialized extractors
@ForceInline
final @Override
public ByteSpecies vspecies() {
// ISSUE: This should probably be a @Stable
// field inside AbstractVector, rather than
// a megamorphic method.
return VSPECIES;
}
@ForceInline
@Override
public final Class<Byte> elementType() { return byte.class; }
@ForceInline
@Override
public final int elementSize() { return Byte.SIZE; }
@ForceInline
@Override
public final VectorShape shape() { return VSHAPE; }
@ForceInline
@Override
public final int length() { return VLENGTH; }
@ForceInline
@Override
public final int bitSize() { return VSIZE; }
@ForceInline
@Override
public final int byteSize() { return VSIZE / Byte.SIZE; }
/*package-private*/
@ForceInline
final @Override
byte[] vec() {
return (byte[])getPayload();
}
// Virtualized constructors
@Override
@ForceInline
public final ByteMaxVector broadcast(byte e) {
return (ByteMaxVector) super.broadcastTemplate(e); // specialize
}
@Override
@ForceInline
public final ByteMaxVector broadcast(long e) {
return (ByteMaxVector) super.broadcastTemplate(e); // specialize
}
@Override
@ForceInline
ByteMaxMask maskFromArray(boolean[] bits) {
return new ByteMaxMask(bits);
}
@Override
@ForceInline
ByteMaxShuffle iotaShuffle() { return ByteMaxShuffle.IOTA; }
@ForceInline
ByteMaxShuffle iotaShuffle(int start, int step, boolean wrap) {
if (wrap) {
return (ByteMaxShuffle)VectorSupport.shuffleIota(ETYPE, ByteMaxShuffle.class, VSPECIES, VLENGTH, start, step, 1,
(l, lstart, lstep, s) -> s.shuffleFromOp(i -> (VectorIntrinsics.wrapToRange(i*lstep + lstart, l))));
} else {
return (ByteMaxShuffle)VectorSupport.shuffleIota(ETYPE, ByteMaxShuffle.class, VSPECIES, VLENGTH, start, step, 0,
(l, lstart, lstep, s) -> s.shuffleFromOp(i -> (i*lstep + lstart)));
}
}
@Override
@ForceInline
ByteMaxShuffle shuffleFromBytes(byte[] reorder) { return new ByteMaxShuffle(reorder); }
@Override
@ForceInline
ByteMaxShuffle shuffleFromArray(int[] indexes, int i) { return new ByteMaxShuffle(indexes, i); }
@Override
@ForceInline
ByteMaxShuffle shuffleFromOp(IntUnaryOperator fn) { return new ByteMaxShuffle(fn); }
// Make a vector of the same species but the given elements:
@ForceInline
final @Override
ByteMaxVector vectorFactory(byte[] vec) {
return new ByteMaxVector(vec);
}
@ForceInline
final @Override
ByteMaxVector asByteVectorRaw() {
return (ByteMaxVector) super.asByteVectorRawTemplate(); // specialize
}
@ForceInline
final @Override
AbstractVector<?> asVectorRaw(LaneType laneType) {
return super.asVectorRawTemplate(laneType); // specialize
}
// Unary operator
@ForceInline
final @Override
ByteMaxVector uOp(FUnOp f) {
return (ByteMaxVector) super.uOpTemplate(f); // specialize
}
@ForceInline
final @Override
ByteMaxVector uOp(VectorMask<Byte> m, FUnOp f) {
return (ByteMaxVector)
super.uOpTemplate((ByteMaxMask)m, f); // specialize
}
// Binary operator
@ForceInline
final @Override
ByteMaxVector bOp(Vector<Byte> v, FBinOp f) {
return (ByteMaxVector) super.bOpTemplate((ByteMaxVector)v, f); // specialize
}
@ForceInline
final @Override
ByteMaxVector bOp(Vector<Byte> v,
VectorMask<Byte> m, FBinOp f) {
return (ByteMaxVector)
super.bOpTemplate((ByteMaxVector)v, (ByteMaxMask)m,
f); // specialize
}
// Ternary operator
@ForceInline
final @Override
ByteMaxVector tOp(Vector<Byte> v1, Vector<Byte> v2, FTriOp f) {
return (ByteMaxVector)
super.tOpTemplate((ByteMaxVector)v1, (ByteMaxVector)v2,
f); // specialize
}
@ForceInline
final @Override
ByteMaxVector tOp(Vector<Byte> v1, Vector<Byte> v2,
VectorMask<Byte> m, FTriOp f) {
return (ByteMaxVector)
super.tOpTemplate((ByteMaxVector)v1, (ByteMaxVector)v2,
(ByteMaxMask)m, f); // specialize
}
@ForceInline
final @Override
byte rOp(byte v, FBinOp f) {
return super.rOpTemplate(v, f); // specialize
}
@Override
@ForceInline
public final <F>
Vector<F> convertShape(VectorOperators.Conversion<Byte,F> conv,
VectorSpecies<F> rsp, int part) {
return super.convertShapeTemplate(conv, rsp, part); // specialize
}
@Override
@ForceInline
public final <F>
Vector<F> reinterpretShape(VectorSpecies<F> toSpecies, int part) {
return super.reinterpretShapeTemplate(toSpecies, part); // specialize
}
// Specialized algebraic operations:
// The following definition forces a specialized version of this
// crucial method into the v-table of this class. A call to add()
// will inline to a call to lanewise(ADD,), at which point the JIT
// intrinsic will have the opcode of ADD, plus all the metadata
// for this particular class, enabling it to generate precise
// code.
//
// There is probably no benefit to the JIT to specialize the
// masked or broadcast versions of the lanewise method.
@Override
@ForceInline
public ByteMaxVector lanewise(Unary op) {
return (ByteMaxVector) super.lanewiseTemplate(op); // specialize
}
@Override
@ForceInline
public ByteMaxVector lanewise(Binary op, Vector<Byte> v) {
return (ByteMaxVector) super.lanewiseTemplate(op, v); // specialize
}
/*package-private*/
@Override
@ForceInline ByteMaxVector
lanewiseShift(VectorOperators.Binary op, int e) {
return (ByteMaxVector) super.lanewiseShiftTemplate(op, e); // specialize
}
/*package-private*/
@Override
@ForceInline
public final
ByteMaxVector
lanewise(VectorOperators.Ternary op, Vector<Byte> v1, Vector<Byte> v2) {
return (ByteMaxVector) super.lanewiseTemplate(op, v1, v2); // specialize
}
@Override
@ForceInline
public final
ByteMaxVector addIndex(int scale) {
return (ByteMaxVector) super.addIndexTemplate(scale); // specialize
}
// Type specific horizontal reductions
@Override
@ForceInline
public final byte reduceLanes(VectorOperators.Associative op) {
return super.reduceLanesTemplate(op); // specialized
}
@Override
@ForceInline
public final byte reduceLanes(VectorOperators.Associative op,
VectorMask<Byte> m) {
return super.reduceLanesTemplate(op, m); // specialized
}
@Override
@ForceInline
public final long reduceLanesToLong(VectorOperators.Associative op) {
return (long) super.reduceLanesTemplate(op); // specialized
}
@Override
@ForceInline
public final long reduceLanesToLong(VectorOperators.Associative op,
VectorMask<Byte> m) {
return (long) super.reduceLanesTemplate(op, m); // specialized
}
@Override
@ForceInline
public VectorShuffle<Byte> toShuffle() {
byte[] a = toArray();
int[] sa = new int[a.length];
for (int i = 0; i < a.length; i++) {
sa[i] = (int) a[i];
}
return VectorShuffle.fromArray(VSPECIES, sa, 0);
}
// Specialized unary testing
@Override
@ForceInline
public final ByteMaxMask test(Test op) {
return super.testTemplate(ByteMaxMask.class, op); // specialize
}
// Specialized comparisons
@Override
@ForceInline
public final ByteMaxMask compare(Comparison op, Vector<Byte> v) {
return super.compareTemplate(ByteMaxMask.class, op, v); // specialize
}
@Override
@ForceInline
public final ByteMaxMask compare(Comparison op, byte s) {
return super.compareTemplate(ByteMaxMask.class, op, s); // specialize
}
@Override
@ForceInline
public final ByteMaxMask compare(Comparison op, long s) {
return super.compareTemplate(ByteMaxMask.class, op, s); // specialize
}
@Override
@ForceInline
public ByteMaxVector blend(Vector<Byte> v, VectorMask<Byte> m) {
return (ByteMaxVector)
super.blendTemplate(ByteMaxMask.class,
(ByteMaxVector) v,
(ByteMaxMask) m); // specialize
}
@Override
@ForceInline
public ByteMaxVector slice(int origin, Vector<Byte> v) {
return (ByteMaxVector) super.sliceTemplate(origin, v); // specialize
}
@Override
@ForceInline
public ByteMaxVector slice(int origin) {
if ((origin < 0) || (origin >= VLENGTH)) {
throw new ArrayIndexOutOfBoundsException("Index " + origin + " out of bounds for vector length " + VLENGTH);
} else {
ByteMaxShuffle Iota = iotaShuffle();
VectorMask<Byte> BlendMask = Iota.toVector().compare(VectorOperators.LT, (broadcast((byte)(VLENGTH-origin))));
Iota = iotaShuffle(origin, 1, true);
return ZERO.blend(this.rearrange(Iota), BlendMask);
}
}
@Override
@ForceInline
public ByteMaxVector unslice(int origin, Vector<Byte> w, int part) {
return (ByteMaxVector) super.unsliceTemplate(origin, w, part); // specialize
}
@Override
@ForceInline
public ByteMaxVector unslice(int origin, Vector<Byte> w, int part, VectorMask<Byte> m) {
return (ByteMaxVector)
super.unsliceTemplate(ByteMaxMask.class,
origin, w, part,
(ByteMaxMask) m); // specialize
}
@Override
@ForceInline
public ByteMaxVector unslice(int origin) {
if ((origin < 0) || (origin >= VLENGTH)) {
throw new ArrayIndexOutOfBoundsException("Index " + origin + " out of bounds for vector length " + VLENGTH);
} else {
ByteMaxShuffle Iota = iotaShuffle();
VectorMask<Byte> BlendMask = Iota.toVector().compare(VectorOperators.GE, (broadcast((byte)(origin))));
Iota = iotaShuffle(-origin, 1, true);
return ZERO.blend(this.rearrange(Iota), BlendMask);
}
}
@Override
@ForceInline
public ByteMaxVector rearrange(VectorShuffle<Byte> s) {
return (ByteMaxVector)
super.rearrangeTemplate(ByteMaxShuffle.class,
(ByteMaxShuffle) s); // specialize
}
@Override
@ForceInline
public ByteMaxVector rearrange(VectorShuffle<Byte> shuffle,
VectorMask<Byte> m) {
return (ByteMaxVector)
super.rearrangeTemplate(ByteMaxShuffle.class,
(ByteMaxShuffle) shuffle,
(ByteMaxMask) m); // specialize
}
@Override
@ForceInline
public ByteMaxVector rearrange(VectorShuffle<Byte> s,
Vector<Byte> v) {
return (ByteMaxVector)
super.rearrangeTemplate(ByteMaxShuffle.class,
(ByteMaxShuffle) s,
(ByteMaxVector) v); // specialize
}
@Override
@ForceInline
public ByteMaxVector selectFrom(Vector<Byte> v) {
return (ByteMaxVector)
super.selectFromTemplate((ByteMaxVector) v); // specialize
}
@Override
@ForceInline
public ByteMaxVector selectFrom(Vector<Byte> v,
VectorMask<Byte> m) {
return (ByteMaxVector)
super.selectFromTemplate((ByteMaxVector) v,
(ByteMaxMask) m); // specialize
}
@ForceInline
@Override
public byte lane(int i) {
if (i < 0 || i >= VLENGTH) {
throw new IllegalArgumentException("Index " + i + " must be zero or positive, and less than " + VLENGTH);
}
return laneHelper(i);
}
public byte laneHelper(int i) {
return (byte) VectorSupport.extract(
VCLASS, ETYPE, VLENGTH,
this, i,
(vec, ix) -> {
byte[] vecarr = vec.vec();
return (long)vecarr[ix];
});
}
@ForceInline
@Override
public ByteMaxVector withLane(int i, byte e) {
if (i < 0 || i >= VLENGTH) {
throw new IllegalArgumentException("Index " + i + " must be zero or positive, and less than " + VLENGTH);
}
return withLaneHelper(i, e);
}
public ByteMaxVector withLaneHelper(int i, byte e) {
return VectorSupport.insert(
VCLASS, ETYPE, VLENGTH,
this, i, (long)e,
(v, ix, bits) -> {
byte[] res = v.vec().clone();
res[ix] = (byte)bits;
return v.vectorFactory(res);
});
}
// Mask
static final class ByteMaxMask extends AbstractMask<Byte> {
static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM
static final Class<Byte> ETYPE = byte.class; // used by the JVM
ByteMaxMask(boolean[] bits) {
this(bits, 0);
}
ByteMaxMask(boolean[] bits, int offset) {
super(prepare(bits, offset));
}
ByteMaxMask(boolean val) {
super(prepare(val));
}
private static boolean[] prepare(boolean[] bits, int offset) {
boolean[] newBits = new boolean[VSPECIES.laneCount()];
for (int i = 0; i < newBits.length; i++) {
newBits[i] = bits[offset + i];
}
return newBits;
}
private static boolean[] prepare(boolean val) {
boolean[] bits = new boolean[VSPECIES.laneCount()];
Arrays.fill(bits, val);
return bits;
}
@ForceInline
final @Override
public ByteSpecies vspecies() {
// ISSUE: This should probably be a @Stable
// field inside AbstractMask, rather than
// a megamorphic method.
return VSPECIES;
}
@ForceInline
boolean[] getBits() {
return (boolean[])getPayload();
}
@Override
ByteMaxMask uOp(MUnOp f) {
boolean[] res = new boolean[vspecies().laneCount()];
boolean[] bits = getBits();
for (int i = 0; i < res.length; i++) {
res[i] = f.apply(i, bits[i]);
}
return new ByteMaxMask(res);
}
@Override
ByteMaxMask bOp(VectorMask<Byte> m, MBinOp f) {
boolean[] res = new boolean[vspecies().laneCount()];
boolean[] bits = getBits();
boolean[] mbits = ((ByteMaxMask)m).getBits();
for (int i = 0; i < res.length; i++) {
res[i] = f.apply(i, bits[i], mbits[i]);
}
return new ByteMaxMask(res);
}
@ForceInline
@Override
public final
ByteMaxVector toVector() {
return (ByteMaxVector) super.toVectorTemplate(); // specialize
}
@Override
@ForceInline
public <E> VectorMask<E> cast(VectorSpecies<E> s) {
AbstractSpecies<E> species = (AbstractSpecies<E>) s;
if (length() != species.laneCount())
throw new IllegalArgumentException("VectorMask length and species length differ");
boolean[] maskArray = toArray();
// enum-switches don't optimize properly JDK-8161245
switch (species.laneType.switchKey) {
case LaneType.SK_BYTE:
return new ByteMaxVector.ByteMaxMask(maskArray).check(species);
case LaneType.SK_SHORT:
return new ShortMaxVector.ShortMaxMask(maskArray).check(species);
case LaneType.SK_INT:
return new IntMaxVector.IntMaxMask(maskArray).check(species);
case LaneType.SK_LONG:
return new LongMaxVector.LongMaxMask(maskArray).check(species);
case LaneType.SK_FLOAT:
return new FloatMaxVector.FloatMaxMask(maskArray).check(species);
case LaneType.SK_DOUBLE:
return new DoubleMaxVector.DoubleMaxMask(maskArray).check(species);
}
// Should not reach here.
throw new AssertionError(species);
}
// Unary operations
@Override
@ForceInline
public ByteMaxMask not() {
return xor(maskAll(true));
}
// Binary operations
@Override
@ForceInline
public ByteMaxMask and(VectorMask<Byte> mask) {
Objects.requireNonNull(mask);
ByteMaxMask m = (ByteMaxMask)mask;
return VectorSupport.binaryOp(VECTOR_OP_AND, ByteMaxMask.class, byte.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a & b));
}
@Override
@ForceInline
public ByteMaxMask or(VectorMask<Byte> mask) {
Objects.requireNonNull(mask);
ByteMaxMask m = (ByteMaxMask)mask;
return VectorSupport.binaryOp(VECTOR_OP_OR, ByteMaxMask.class, byte.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a | b));
}
@ForceInline
/* package-private */
ByteMaxMask xor(VectorMask<Byte> mask) {
Objects.requireNonNull(mask);
ByteMaxMask m = (ByteMaxMask)mask;
return VectorSupport.binaryOp(VECTOR_OP_XOR, ByteMaxMask.class, byte.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a ^ b));
}
// Reductions
@Override
@ForceInline
public boolean anyTrue() {
return VectorSupport.test(BT_ne, ByteMaxMask.class, byte.class, VLENGTH,
this, vspecies().maskAll(true),
(m, __) -> anyTrueHelper(((ByteMaxMask)m).getBits()));
}
@Override
@ForceInline
public boolean allTrue() {
return VectorSupport.test(BT_overflow, ByteMaxMask.class, byte.class, VLENGTH,
this, vspecies().maskAll(true),
(m, __) -> allTrueHelper(((ByteMaxMask)m).getBits()));
}
@ForceInline
/*package-private*/
static ByteMaxMask maskAll(boolean bit) {
return VectorSupport.broadcastCoerced(ByteMaxMask.class, byte.class, VLENGTH,
(bit ? -1 : 0), null,
(v, __) -> (v != 0 ? TRUE_MASK : FALSE_MASK));
}
private static final ByteMaxMask TRUE_MASK = new ByteMaxMask(true);
private static final ByteMaxMask FALSE_MASK = new ByteMaxMask(false);
}
// Shuffle
static final class ByteMaxShuffle extends AbstractShuffle<Byte> {
static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM
static final Class<Byte> ETYPE = byte.class; // used by the JVM
ByteMaxShuffle(byte[] reorder) {
super(VLENGTH, reorder);
}
public ByteMaxShuffle(int[] reorder) {
super(VLENGTH, reorder);
}
public ByteMaxShuffle(int[] reorder, int i) {
super(VLENGTH, reorder, i);
}
public ByteMaxShuffle(IntUnaryOperator fn) {
super(VLENGTH, fn);
}
@Override
public ByteSpecies vspecies() {
return VSPECIES;
}
static {
// There must be enough bits in the shuffle lanes to encode
// VLENGTH valid indexes and VLENGTH exceptional ones.
assert(VLENGTH < Byte.MAX_VALUE);
assert(Byte.MIN_VALUE <= -VLENGTH);
}
static final ByteMaxShuffle IOTA = new ByteMaxShuffle(IDENTITY);
@Override
@ForceInline
public ByteMaxVector toVector() {
return VectorSupport.shuffleToVector(VCLASS, ETYPE, ByteMaxShuffle.class, this, VLENGTH,
(s) -> ((ByteMaxVector)(((AbstractShuffle<Byte>)(s)).toVectorTemplate())));
}
@Override
@ForceInline
public <F> VectorShuffle<F> cast(VectorSpecies<F> s) {
AbstractSpecies<F> species = (AbstractSpecies<F>) s;
if (length() != species.laneCount())
throw new IllegalArgumentException("VectorShuffle length and species length differ");
int[] shuffleArray = toArray();
// enum-switches don't optimize properly JDK-8161245
switch (species.laneType.switchKey) {
case LaneType.SK_BYTE:
return new ByteMaxVector.ByteMaxShuffle(shuffleArray).check(species);
case LaneType.SK_SHORT:
return new ShortMaxVector.ShortMaxShuffle(shuffleArray).check(species);
case LaneType.SK_INT:
return new IntMaxVector.IntMaxShuffle(shuffleArray).check(species);
case LaneType.SK_LONG:
return new LongMaxVector.LongMaxShuffle(shuffleArray).check(species);
case LaneType.SK_FLOAT:
return new FloatMaxVector.FloatMaxShuffle(shuffleArray).check(species);
case LaneType.SK_DOUBLE:
return new DoubleMaxVector.DoubleMaxShuffle(shuffleArray).check(species);
}
// Should not reach here.
throw new AssertionError(species);
}
@ForceInline
@Override
public ByteMaxShuffle rearrange(VectorShuffle<Byte> shuffle) {
ByteMaxShuffle s = (ByteMaxShuffle) shuffle;
byte[] reorder1 = reorder();
byte[] reorder2 = s.reorder();
byte[] r = new byte[reorder1.length];
for (int i = 0; i < reorder1.length; i++) {
int ssi = reorder2[i];
r[i] = reorder1[ssi]; // throws on exceptional index
}
return new ByteMaxShuffle(r);
}
}
// ================================================
// Specialized low-level memory operations.
@ForceInline
@Override
final
ByteVector fromArray0(byte[] a, int offset) {
return super.fromArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
ByteVector fromByteArray0(byte[] a, int offset) {
return super.fromByteArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
ByteVector fromByteBuffer0(ByteBuffer bb, int offset) {
return super.fromByteBuffer0Template(bb, offset); // specialize
}
@ForceInline
@Override
final
void intoArray0(byte[] a, int offset) {
super.intoArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
void intoByteArray0(byte[] a, int offset) {
super.intoByteArray0Template(a, offset); // specialize
}
// End of specialized low-level memory operations.
// ================================================
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,808 @@
/*
* Copyright (c) 2017, 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
* particular file as subject to the "Classpath" exception as provided
* by Oracle in the LICENSE file that accompanied this code.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package jdk.incubator.vector;
import java.nio.ByteBuffer;
import java.util.Arrays;
import java.util.Objects;
import java.util.function.IntUnaryOperator;
import jdk.internal.vm.annotation.ForceInline;
import jdk.internal.vm.vector.VectorSupport;
import static jdk.internal.vm.vector.VectorSupport.*;
import static jdk.incubator.vector.VectorOperators.*;
// -- This file was mechanically generated: Do not edit! -- //
@SuppressWarnings("cast") // warning: redundant cast
final class Double128Vector extends DoubleVector {
static final DoubleSpecies VSPECIES =
(DoubleSpecies) DoubleVector.SPECIES_128;
static final VectorShape VSHAPE =
VSPECIES.vectorShape();
static final Class<Double128Vector> VCLASS = Double128Vector.class;
static final int VSIZE = VSPECIES.vectorBitSize();
static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM
static final Class<Double> ETYPE = double.class; // used by the JVM
Double128Vector(double[] v) {
super(v);
}
// For compatibility as Double128Vector::new,
// stored into species.vectorFactory.
Double128Vector(Object v) {
this((double[]) v);
}
static final Double128Vector ZERO = new Double128Vector(new double[VLENGTH]);
static final Double128Vector IOTA = new Double128Vector(VSPECIES.iotaArray());
static {
// Warm up a few species caches.
// If we do this too much we will
// get NPEs from bootstrap circularity.
VSPECIES.dummyVector();
VSPECIES.withLanes(LaneType.BYTE);
}
// Specialized extractors
@ForceInline
final @Override
public DoubleSpecies vspecies() {
// ISSUE: This should probably be a @Stable
// field inside AbstractVector, rather than
// a megamorphic method.
return VSPECIES;
}
@ForceInline
@Override
public final Class<Double> elementType() { return double.class; }
@ForceInline
@Override
public final int elementSize() { return Double.SIZE; }
@ForceInline
@Override
public final VectorShape shape() { return VSHAPE; }
@ForceInline
@Override
public final int length() { return VLENGTH; }
@ForceInline
@Override
public final int bitSize() { return VSIZE; }
@ForceInline
@Override
public final int byteSize() { return VSIZE / Byte.SIZE; }
/*package-private*/
@ForceInline
final @Override
double[] vec() {
return (double[])getPayload();
}
// Virtualized constructors
@Override
@ForceInline
public final Double128Vector broadcast(double e) {
return (Double128Vector) super.broadcastTemplate(e); // specialize
}
@Override
@ForceInline
public final Double128Vector broadcast(long e) {
return (Double128Vector) super.broadcastTemplate(e); // specialize
}
@Override
@ForceInline
Double128Mask maskFromArray(boolean[] bits) {
return new Double128Mask(bits);
}
@Override
@ForceInline
Double128Shuffle iotaShuffle() { return Double128Shuffle.IOTA; }
@ForceInline
Double128Shuffle iotaShuffle(int start, int step, boolean wrap) {
if (wrap) {
return (Double128Shuffle)VectorSupport.shuffleIota(ETYPE, Double128Shuffle.class, VSPECIES, VLENGTH, start, step, 1,
(l, lstart, lstep, s) -> s.shuffleFromOp(i -> (VectorIntrinsics.wrapToRange(i*lstep + lstart, l))));
} else {
return (Double128Shuffle)VectorSupport.shuffleIota(ETYPE, Double128Shuffle.class, VSPECIES, VLENGTH, start, step, 0,
(l, lstart, lstep, s) -> s.shuffleFromOp(i -> (i*lstep + lstart)));
}
}
@Override
@ForceInline
Double128Shuffle shuffleFromBytes(byte[] reorder) { return new Double128Shuffle(reorder); }
@Override
@ForceInline
Double128Shuffle shuffleFromArray(int[] indexes, int i) { return new Double128Shuffle(indexes, i); }
@Override
@ForceInline
Double128Shuffle shuffleFromOp(IntUnaryOperator fn) { return new Double128Shuffle(fn); }
// Make a vector of the same species but the given elements:
@ForceInline
final @Override
Double128Vector vectorFactory(double[] vec) {
return new Double128Vector(vec);
}
@ForceInline
final @Override
Byte128Vector asByteVectorRaw() {
return (Byte128Vector) super.asByteVectorRawTemplate(); // specialize
}
@ForceInline
final @Override
AbstractVector<?> asVectorRaw(LaneType laneType) {
return super.asVectorRawTemplate(laneType); // specialize
}
// Unary operator
@ForceInline
final @Override
Double128Vector uOp(FUnOp f) {
return (Double128Vector) super.uOpTemplate(f); // specialize
}
@ForceInline
final @Override
Double128Vector uOp(VectorMask<Double> m, FUnOp f) {
return (Double128Vector)
super.uOpTemplate((Double128Mask)m, f); // specialize
}
// Binary operator
@ForceInline
final @Override
Double128Vector bOp(Vector<Double> v, FBinOp f) {
return (Double128Vector) super.bOpTemplate((Double128Vector)v, f); // specialize
}
@ForceInline
final @Override
Double128Vector bOp(Vector<Double> v,
VectorMask<Double> m, FBinOp f) {
return (Double128Vector)
super.bOpTemplate((Double128Vector)v, (Double128Mask)m,
f); // specialize
}
// Ternary operator
@ForceInline
final @Override
Double128Vector tOp(Vector<Double> v1, Vector<Double> v2, FTriOp f) {
return (Double128Vector)
super.tOpTemplate((Double128Vector)v1, (Double128Vector)v2,
f); // specialize
}
@ForceInline
final @Override
Double128Vector tOp(Vector<Double> v1, Vector<Double> v2,
VectorMask<Double> m, FTriOp f) {
return (Double128Vector)
super.tOpTemplate((Double128Vector)v1, (Double128Vector)v2,
(Double128Mask)m, f); // specialize
}
@ForceInline
final @Override
double rOp(double v, FBinOp f) {
return super.rOpTemplate(v, f); // specialize
}
@Override
@ForceInline
public final <F>
Vector<F> convertShape(VectorOperators.Conversion<Double,F> conv,
VectorSpecies<F> rsp, int part) {
return super.convertShapeTemplate(conv, rsp, part); // specialize
}
@Override
@ForceInline
public final <F>
Vector<F> reinterpretShape(VectorSpecies<F> toSpecies, int part) {
return super.reinterpretShapeTemplate(toSpecies, part); // specialize
}
// Specialized algebraic operations:
// The following definition forces a specialized version of this
// crucial method into the v-table of this class. A call to add()
// will inline to a call to lanewise(ADD,), at which point the JIT
// intrinsic will have the opcode of ADD, plus all the metadata
// for this particular class, enabling it to generate precise
// code.
//
// There is probably no benefit to the JIT to specialize the
// masked or broadcast versions of the lanewise method.
@Override
@ForceInline
public Double128Vector lanewise(Unary op) {
return (Double128Vector) super.lanewiseTemplate(op); // specialize
}
@Override
@ForceInline
public Double128Vector lanewise(Binary op, Vector<Double> v) {
return (Double128Vector) super.lanewiseTemplate(op, v); // specialize
}
/*package-private*/
@Override
@ForceInline
public final
Double128Vector
lanewise(VectorOperators.Ternary op, Vector<Double> v1, Vector<Double> v2) {
return (Double128Vector) super.lanewiseTemplate(op, v1, v2); // specialize
}
@Override
@ForceInline
public final
Double128Vector addIndex(int scale) {
return (Double128Vector) super.addIndexTemplate(scale); // specialize
}
// Type specific horizontal reductions
@Override
@ForceInline
public final double reduceLanes(VectorOperators.Associative op) {
return super.reduceLanesTemplate(op); // specialized
}
@Override
@ForceInline
public final double reduceLanes(VectorOperators.Associative op,
VectorMask<Double> m) {
return super.reduceLanesTemplate(op, m); // specialized
}
@Override
@ForceInline
public final long reduceLanesToLong(VectorOperators.Associative op) {
return (long) super.reduceLanesTemplate(op); // specialized
}
@Override
@ForceInline
public final long reduceLanesToLong(VectorOperators.Associative op,
VectorMask<Double> m) {
return (long) super.reduceLanesTemplate(op, m); // specialized
}
@Override
@ForceInline
public VectorShuffle<Double> toShuffle() {
double[] a = toArray();
int[] sa = new int[a.length];
for (int i = 0; i < a.length; i++) {
sa[i] = (int) a[i];
}
return VectorShuffle.fromArray(VSPECIES, sa, 0);
}
// Specialized unary testing
@Override
@ForceInline
public final Double128Mask test(Test op) {
return super.testTemplate(Double128Mask.class, op); // specialize
}
// Specialized comparisons
@Override
@ForceInline
public final Double128Mask compare(Comparison op, Vector<Double> v) {
return super.compareTemplate(Double128Mask.class, op, v); // specialize
}
@Override
@ForceInline
public final Double128Mask compare(Comparison op, double s) {
return super.compareTemplate(Double128Mask.class, op, s); // specialize
}
@Override
@ForceInline
public final Double128Mask compare(Comparison op, long s) {
return super.compareTemplate(Double128Mask.class, op, s); // specialize
}
@Override
@ForceInline
public Double128Vector blend(Vector<Double> v, VectorMask<Double> m) {
return (Double128Vector)
super.blendTemplate(Double128Mask.class,
(Double128Vector) v,
(Double128Mask) m); // specialize
}
@Override
@ForceInline
public Double128Vector slice(int origin, Vector<Double> v) {
return (Double128Vector) super.sliceTemplate(origin, v); // specialize
}
@Override
@ForceInline
public Double128Vector slice(int origin) {
if ((origin < 0) || (origin >= VLENGTH)) {
throw new ArrayIndexOutOfBoundsException("Index " + origin + " out of bounds for vector length " + VLENGTH);
} else {
Double128Shuffle Iota = iotaShuffle();
VectorMask<Double> BlendMask = Iota.toVector().compare(VectorOperators.LT, (broadcast((double)(VLENGTH-origin))));
Iota = iotaShuffle(origin, 1, true);
return ZERO.blend(this.rearrange(Iota), BlendMask);
}
}
@Override
@ForceInline
public Double128Vector unslice(int origin, Vector<Double> w, int part) {
return (Double128Vector) super.unsliceTemplate(origin, w, part); // specialize
}
@Override
@ForceInline
public Double128Vector unslice(int origin, Vector<Double> w, int part, VectorMask<Double> m) {
return (Double128Vector)
super.unsliceTemplate(Double128Mask.class,
origin, w, part,
(Double128Mask) m); // specialize
}
@Override
@ForceInline
public Double128Vector unslice(int origin) {
if ((origin < 0) || (origin >= VLENGTH)) {
throw new ArrayIndexOutOfBoundsException("Index " + origin + " out of bounds for vector length " + VLENGTH);
} else {
Double128Shuffle Iota = iotaShuffle();
VectorMask<Double> BlendMask = Iota.toVector().compare(VectorOperators.GE, (broadcast((double)(origin))));
Iota = iotaShuffle(-origin, 1, true);
return ZERO.blend(this.rearrange(Iota), BlendMask);
}
}
@Override
@ForceInline
public Double128Vector rearrange(VectorShuffle<Double> s) {
return (Double128Vector)
super.rearrangeTemplate(Double128Shuffle.class,
(Double128Shuffle) s); // specialize
}
@Override
@ForceInline
public Double128Vector rearrange(VectorShuffle<Double> shuffle,
VectorMask<Double> m) {
return (Double128Vector)
super.rearrangeTemplate(Double128Shuffle.class,
(Double128Shuffle) shuffle,
(Double128Mask) m); // specialize
}
@Override
@ForceInline
public Double128Vector rearrange(VectorShuffle<Double> s,
Vector<Double> v) {
return (Double128Vector)
super.rearrangeTemplate(Double128Shuffle.class,
(Double128Shuffle) s,
(Double128Vector) v); // specialize
}
@Override
@ForceInline
public Double128Vector selectFrom(Vector<Double> v) {
return (Double128Vector)
super.selectFromTemplate((Double128Vector) v); // specialize
}
@Override
@ForceInline
public Double128Vector selectFrom(Vector<Double> v,
VectorMask<Double> m) {
return (Double128Vector)
super.selectFromTemplate((Double128Vector) v,
(Double128Mask) m); // specialize
}
@ForceInline
@Override
public double lane(int i) {
long bits;
switch(i) {
case 0: bits = laneHelper(0); break;
case 1: bits = laneHelper(1); break;
default: throw new IllegalArgumentException("Index " + i + " must be zero or positive, and less than " + VLENGTH);
}
return Double.longBitsToDouble(bits);
}
public long laneHelper(int i) {
return (long) VectorSupport.extract(
VCLASS, ETYPE, VLENGTH,
this, i,
(vec, ix) -> {
double[] vecarr = vec.vec();
return (long)Double.doubleToLongBits(vecarr[ix]);
});
}
@ForceInline
@Override
public Double128Vector withLane(int i, double e) {
switch(i) {
case 0: return withLaneHelper(0, e);
case 1: return withLaneHelper(1, e);
default: throw new IllegalArgumentException("Index " + i + " must be zero or positive, and less than " + VLENGTH);
}
}
public Double128Vector withLaneHelper(int i, double e) {
return VectorSupport.insert(
VCLASS, ETYPE, VLENGTH,
this, i, (long)Double.doubleToLongBits(e),
(v, ix, bits) -> {
double[] res = v.vec().clone();
res[ix] = Double.longBitsToDouble((long)bits);
return v.vectorFactory(res);
});
}
// Mask
static final class Double128Mask extends AbstractMask<Double> {
static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM
static final Class<Double> ETYPE = double.class; // used by the JVM
Double128Mask(boolean[] bits) {
this(bits, 0);
}
Double128Mask(boolean[] bits, int offset) {
super(prepare(bits, offset));
}
Double128Mask(boolean val) {
super(prepare(val));
}
private static boolean[] prepare(boolean[] bits, int offset) {
boolean[] newBits = new boolean[VSPECIES.laneCount()];
for (int i = 0; i < newBits.length; i++) {
newBits[i] = bits[offset + i];
}
return newBits;
}
private static boolean[] prepare(boolean val) {
boolean[] bits = new boolean[VSPECIES.laneCount()];
Arrays.fill(bits, val);
return bits;
}
@ForceInline
final @Override
public DoubleSpecies vspecies() {
// ISSUE: This should probably be a @Stable
// field inside AbstractMask, rather than
// a megamorphic method.
return VSPECIES;
}
@ForceInline
boolean[] getBits() {
return (boolean[])getPayload();
}
@Override
Double128Mask uOp(MUnOp f) {
boolean[] res = new boolean[vspecies().laneCount()];
boolean[] bits = getBits();
for (int i = 0; i < res.length; i++) {
res[i] = f.apply(i, bits[i]);
}
return new Double128Mask(res);
}
@Override
Double128Mask bOp(VectorMask<Double> m, MBinOp f) {
boolean[] res = new boolean[vspecies().laneCount()];
boolean[] bits = getBits();
boolean[] mbits = ((Double128Mask)m).getBits();
for (int i = 0; i < res.length; i++) {
res[i] = f.apply(i, bits[i], mbits[i]);
}
return new Double128Mask(res);
}
@ForceInline
@Override
public final
Double128Vector toVector() {
return (Double128Vector) super.toVectorTemplate(); // specialize
}
@Override
@ForceInline
public <E> VectorMask<E> cast(VectorSpecies<E> s) {
AbstractSpecies<E> species = (AbstractSpecies<E>) s;
if (length() != species.laneCount())
throw new IllegalArgumentException("VectorMask length and species length differ");
boolean[] maskArray = toArray();
// enum-switches don't optimize properly JDK-8161245
switch (species.laneType.switchKey) {
case LaneType.SK_BYTE:
return new Byte128Vector.Byte128Mask(maskArray).check(species);
case LaneType.SK_SHORT:
return new Short128Vector.Short128Mask(maskArray).check(species);
case LaneType.SK_INT:
return new Int128Vector.Int128Mask(maskArray).check(species);
case LaneType.SK_LONG:
return new Long128Vector.Long128Mask(maskArray).check(species);
case LaneType.SK_FLOAT:
return new Float128Vector.Float128Mask(maskArray).check(species);
case LaneType.SK_DOUBLE:
return new Double128Vector.Double128Mask(maskArray).check(species);
}
// Should not reach here.
throw new AssertionError(species);
}
// Unary operations
@Override
@ForceInline
public Double128Mask not() {
return xor(maskAll(true));
}
// Binary operations
@Override
@ForceInline
public Double128Mask and(VectorMask<Double> mask) {
Objects.requireNonNull(mask);
Double128Mask m = (Double128Mask)mask;
return VectorSupport.binaryOp(VECTOR_OP_AND, Double128Mask.class, long.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a & b));
}
@Override
@ForceInline
public Double128Mask or(VectorMask<Double> mask) {
Objects.requireNonNull(mask);
Double128Mask m = (Double128Mask)mask;
return VectorSupport.binaryOp(VECTOR_OP_OR, Double128Mask.class, long.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a | b));
}
@ForceInline
/* package-private */
Double128Mask xor(VectorMask<Double> mask) {
Objects.requireNonNull(mask);
Double128Mask m = (Double128Mask)mask;
return VectorSupport.binaryOp(VECTOR_OP_XOR, Double128Mask.class, long.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a ^ b));
}
// Reductions
@Override
@ForceInline
public boolean anyTrue() {
return VectorSupport.test(BT_ne, Double128Mask.class, long.class, VLENGTH,
this, vspecies().maskAll(true),
(m, __) -> anyTrueHelper(((Double128Mask)m).getBits()));
}
@Override
@ForceInline
public boolean allTrue() {
return VectorSupport.test(BT_overflow, Double128Mask.class, long.class, VLENGTH,
this, vspecies().maskAll(true),
(m, __) -> allTrueHelper(((Double128Mask)m).getBits()));
}
@ForceInline
/*package-private*/
static Double128Mask maskAll(boolean bit) {
return VectorSupport.broadcastCoerced(Double128Mask.class, long.class, VLENGTH,
(bit ? -1 : 0), null,
(v, __) -> (v != 0 ? TRUE_MASK : FALSE_MASK));
}
private static final Double128Mask TRUE_MASK = new Double128Mask(true);
private static final Double128Mask FALSE_MASK = new Double128Mask(false);
}
// Shuffle
static final class Double128Shuffle extends AbstractShuffle<Double> {
static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM
static final Class<Double> ETYPE = double.class; // used by the JVM
Double128Shuffle(byte[] reorder) {
super(VLENGTH, reorder);
}
public Double128Shuffle(int[] reorder) {
super(VLENGTH, reorder);
}
public Double128Shuffle(int[] reorder, int i) {
super(VLENGTH, reorder, i);
}
public Double128Shuffle(IntUnaryOperator fn) {
super(VLENGTH, fn);
}
@Override
public DoubleSpecies vspecies() {
return VSPECIES;
}
static {
// There must be enough bits in the shuffle lanes to encode
// VLENGTH valid indexes and VLENGTH exceptional ones.
assert(VLENGTH < Byte.MAX_VALUE);
assert(Byte.MIN_VALUE <= -VLENGTH);
}
static final Double128Shuffle IOTA = new Double128Shuffle(IDENTITY);
@Override
@ForceInline
public Double128Vector toVector() {
return VectorSupport.shuffleToVector(VCLASS, ETYPE, Double128Shuffle.class, this, VLENGTH,
(s) -> ((Double128Vector)(((AbstractShuffle<Double>)(s)).toVectorTemplate())));
}
@Override
@ForceInline
public <F> VectorShuffle<F> cast(VectorSpecies<F> s) {
AbstractSpecies<F> species = (AbstractSpecies<F>) s;
if (length() != species.laneCount())
throw new IllegalArgumentException("VectorShuffle length and species length differ");
int[] shuffleArray = toArray();
// enum-switches don't optimize properly JDK-8161245
switch (species.laneType.switchKey) {
case LaneType.SK_BYTE:
return new Byte128Vector.Byte128Shuffle(shuffleArray).check(species);
case LaneType.SK_SHORT:
return new Short128Vector.Short128Shuffle(shuffleArray).check(species);
case LaneType.SK_INT:
return new Int128Vector.Int128Shuffle(shuffleArray).check(species);
case LaneType.SK_LONG:
return new Long128Vector.Long128Shuffle(shuffleArray).check(species);
case LaneType.SK_FLOAT:
return new Float128Vector.Float128Shuffle(shuffleArray).check(species);
case LaneType.SK_DOUBLE:
return new Double128Vector.Double128Shuffle(shuffleArray).check(species);
}
// Should not reach here.
throw new AssertionError(species);
}
@ForceInline
@Override
public Double128Shuffle rearrange(VectorShuffle<Double> shuffle) {
Double128Shuffle s = (Double128Shuffle) shuffle;
byte[] reorder1 = reorder();
byte[] reorder2 = s.reorder();
byte[] r = new byte[reorder1.length];
for (int i = 0; i < reorder1.length; i++) {
int ssi = reorder2[i];
r[i] = reorder1[ssi]; // throws on exceptional index
}
return new Double128Shuffle(r);
}
}
// ================================================
// Specialized low-level memory operations.
@ForceInline
@Override
final
DoubleVector fromArray0(double[] a, int offset) {
return super.fromArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
DoubleVector fromByteArray0(byte[] a, int offset) {
return super.fromByteArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
DoubleVector fromByteBuffer0(ByteBuffer bb, int offset) {
return super.fromByteBuffer0Template(bb, offset); // specialize
}
@ForceInline
@Override
final
void intoArray0(double[] a, int offset) {
super.intoArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
void intoByteArray0(byte[] a, int offset) {
super.intoByteArray0Template(a, offset); // specialize
}
// End of specialized low-level memory operations.
// ================================================
}

View file

@ -0,0 +1,812 @@
/*
* Copyright (c) 2017, 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
* particular file as subject to the "Classpath" exception as provided
* by Oracle in the LICENSE file that accompanied this code.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package jdk.incubator.vector;
import java.nio.ByteBuffer;
import java.util.Arrays;
import java.util.Objects;
import java.util.function.IntUnaryOperator;
import jdk.internal.vm.annotation.ForceInline;
import jdk.internal.vm.vector.VectorSupport;
import static jdk.internal.vm.vector.VectorSupport.*;
import static jdk.incubator.vector.VectorOperators.*;
// -- This file was mechanically generated: Do not edit! -- //
@SuppressWarnings("cast") // warning: redundant cast
final class Double256Vector extends DoubleVector {
static final DoubleSpecies VSPECIES =
(DoubleSpecies) DoubleVector.SPECIES_256;
static final VectorShape VSHAPE =
VSPECIES.vectorShape();
static final Class<Double256Vector> VCLASS = Double256Vector.class;
static final int VSIZE = VSPECIES.vectorBitSize();
static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM
static final Class<Double> ETYPE = double.class; // used by the JVM
Double256Vector(double[] v) {
super(v);
}
// For compatibility as Double256Vector::new,
// stored into species.vectorFactory.
Double256Vector(Object v) {
this((double[]) v);
}
static final Double256Vector ZERO = new Double256Vector(new double[VLENGTH]);
static final Double256Vector IOTA = new Double256Vector(VSPECIES.iotaArray());
static {
// Warm up a few species caches.
// If we do this too much we will
// get NPEs from bootstrap circularity.
VSPECIES.dummyVector();
VSPECIES.withLanes(LaneType.BYTE);
}
// Specialized extractors
@ForceInline
final @Override
public DoubleSpecies vspecies() {
// ISSUE: This should probably be a @Stable
// field inside AbstractVector, rather than
// a megamorphic method.
return VSPECIES;
}
@ForceInline
@Override
public final Class<Double> elementType() { return double.class; }
@ForceInline
@Override
public final int elementSize() { return Double.SIZE; }
@ForceInline
@Override
public final VectorShape shape() { return VSHAPE; }
@ForceInline
@Override
public final int length() { return VLENGTH; }
@ForceInline
@Override
public final int bitSize() { return VSIZE; }
@ForceInline
@Override
public final int byteSize() { return VSIZE / Byte.SIZE; }
/*package-private*/
@ForceInline
final @Override
double[] vec() {
return (double[])getPayload();
}
// Virtualized constructors
@Override
@ForceInline
public final Double256Vector broadcast(double e) {
return (Double256Vector) super.broadcastTemplate(e); // specialize
}
@Override
@ForceInline
public final Double256Vector broadcast(long e) {
return (Double256Vector) super.broadcastTemplate(e); // specialize
}
@Override
@ForceInline
Double256Mask maskFromArray(boolean[] bits) {
return new Double256Mask(bits);
}
@Override
@ForceInline
Double256Shuffle iotaShuffle() { return Double256Shuffle.IOTA; }
@ForceInline
Double256Shuffle iotaShuffle(int start, int step, boolean wrap) {
if (wrap) {
return (Double256Shuffle)VectorSupport.shuffleIota(ETYPE, Double256Shuffle.class, VSPECIES, VLENGTH, start, step, 1,
(l, lstart, lstep, s) -> s.shuffleFromOp(i -> (VectorIntrinsics.wrapToRange(i*lstep + lstart, l))));
} else {
return (Double256Shuffle)VectorSupport.shuffleIota(ETYPE, Double256Shuffle.class, VSPECIES, VLENGTH, start, step, 0,
(l, lstart, lstep, s) -> s.shuffleFromOp(i -> (i*lstep + lstart)));
}
}
@Override
@ForceInline
Double256Shuffle shuffleFromBytes(byte[] reorder) { return new Double256Shuffle(reorder); }
@Override
@ForceInline
Double256Shuffle shuffleFromArray(int[] indexes, int i) { return new Double256Shuffle(indexes, i); }
@Override
@ForceInline
Double256Shuffle shuffleFromOp(IntUnaryOperator fn) { return new Double256Shuffle(fn); }
// Make a vector of the same species but the given elements:
@ForceInline
final @Override
Double256Vector vectorFactory(double[] vec) {
return new Double256Vector(vec);
}
@ForceInline
final @Override
Byte256Vector asByteVectorRaw() {
return (Byte256Vector) super.asByteVectorRawTemplate(); // specialize
}
@ForceInline
final @Override
AbstractVector<?> asVectorRaw(LaneType laneType) {
return super.asVectorRawTemplate(laneType); // specialize
}
// Unary operator
@ForceInline
final @Override
Double256Vector uOp(FUnOp f) {
return (Double256Vector) super.uOpTemplate(f); // specialize
}
@ForceInline
final @Override
Double256Vector uOp(VectorMask<Double> m, FUnOp f) {
return (Double256Vector)
super.uOpTemplate((Double256Mask)m, f); // specialize
}
// Binary operator
@ForceInline
final @Override
Double256Vector bOp(Vector<Double> v, FBinOp f) {
return (Double256Vector) super.bOpTemplate((Double256Vector)v, f); // specialize
}
@ForceInline
final @Override
Double256Vector bOp(Vector<Double> v,
VectorMask<Double> m, FBinOp f) {
return (Double256Vector)
super.bOpTemplate((Double256Vector)v, (Double256Mask)m,
f); // specialize
}
// Ternary operator
@ForceInline
final @Override
Double256Vector tOp(Vector<Double> v1, Vector<Double> v2, FTriOp f) {
return (Double256Vector)
super.tOpTemplate((Double256Vector)v1, (Double256Vector)v2,
f); // specialize
}
@ForceInline
final @Override
Double256Vector tOp(Vector<Double> v1, Vector<Double> v2,
VectorMask<Double> m, FTriOp f) {
return (Double256Vector)
super.tOpTemplate((Double256Vector)v1, (Double256Vector)v2,
(Double256Mask)m, f); // specialize
}
@ForceInline
final @Override
double rOp(double v, FBinOp f) {
return super.rOpTemplate(v, f); // specialize
}
@Override
@ForceInline
public final <F>
Vector<F> convertShape(VectorOperators.Conversion<Double,F> conv,
VectorSpecies<F> rsp, int part) {
return super.convertShapeTemplate(conv, rsp, part); // specialize
}
@Override
@ForceInline
public final <F>
Vector<F> reinterpretShape(VectorSpecies<F> toSpecies, int part) {
return super.reinterpretShapeTemplate(toSpecies, part); // specialize
}
// Specialized algebraic operations:
// The following definition forces a specialized version of this
// crucial method into the v-table of this class. A call to add()
// will inline to a call to lanewise(ADD,), at which point the JIT
// intrinsic will have the opcode of ADD, plus all the metadata
// for this particular class, enabling it to generate precise
// code.
//
// There is probably no benefit to the JIT to specialize the
// masked or broadcast versions of the lanewise method.
@Override
@ForceInline
public Double256Vector lanewise(Unary op) {
return (Double256Vector) super.lanewiseTemplate(op); // specialize
}
@Override
@ForceInline
public Double256Vector lanewise(Binary op, Vector<Double> v) {
return (Double256Vector) super.lanewiseTemplate(op, v); // specialize
}
/*package-private*/
@Override
@ForceInline
public final
Double256Vector
lanewise(VectorOperators.Ternary op, Vector<Double> v1, Vector<Double> v2) {
return (Double256Vector) super.lanewiseTemplate(op, v1, v2); // specialize
}
@Override
@ForceInline
public final
Double256Vector addIndex(int scale) {
return (Double256Vector) super.addIndexTemplate(scale); // specialize
}
// Type specific horizontal reductions
@Override
@ForceInline
public final double reduceLanes(VectorOperators.Associative op) {
return super.reduceLanesTemplate(op); // specialized
}
@Override
@ForceInline
public final double reduceLanes(VectorOperators.Associative op,
VectorMask<Double> m) {
return super.reduceLanesTemplate(op, m); // specialized
}
@Override
@ForceInline
public final long reduceLanesToLong(VectorOperators.Associative op) {
return (long) super.reduceLanesTemplate(op); // specialized
}
@Override
@ForceInline
public final long reduceLanesToLong(VectorOperators.Associative op,
VectorMask<Double> m) {
return (long) super.reduceLanesTemplate(op, m); // specialized
}
@Override
@ForceInline
public VectorShuffle<Double> toShuffle() {
double[] a = toArray();
int[] sa = new int[a.length];
for (int i = 0; i < a.length; i++) {
sa[i] = (int) a[i];
}
return VectorShuffle.fromArray(VSPECIES, sa, 0);
}
// Specialized unary testing
@Override
@ForceInline
public final Double256Mask test(Test op) {
return super.testTemplate(Double256Mask.class, op); // specialize
}
// Specialized comparisons
@Override
@ForceInline
public final Double256Mask compare(Comparison op, Vector<Double> v) {
return super.compareTemplate(Double256Mask.class, op, v); // specialize
}
@Override
@ForceInline
public final Double256Mask compare(Comparison op, double s) {
return super.compareTemplate(Double256Mask.class, op, s); // specialize
}
@Override
@ForceInline
public final Double256Mask compare(Comparison op, long s) {
return super.compareTemplate(Double256Mask.class, op, s); // specialize
}
@Override
@ForceInline
public Double256Vector blend(Vector<Double> v, VectorMask<Double> m) {
return (Double256Vector)
super.blendTemplate(Double256Mask.class,
(Double256Vector) v,
(Double256Mask) m); // specialize
}
@Override
@ForceInline
public Double256Vector slice(int origin, Vector<Double> v) {
return (Double256Vector) super.sliceTemplate(origin, v); // specialize
}
@Override
@ForceInline
public Double256Vector slice(int origin) {
if ((origin < 0) || (origin >= VLENGTH)) {
throw new ArrayIndexOutOfBoundsException("Index " + origin + " out of bounds for vector length " + VLENGTH);
} else {
Double256Shuffle Iota = iotaShuffle();
VectorMask<Double> BlendMask = Iota.toVector().compare(VectorOperators.LT, (broadcast((double)(VLENGTH-origin))));
Iota = iotaShuffle(origin, 1, true);
return ZERO.blend(this.rearrange(Iota), BlendMask);
}
}
@Override
@ForceInline
public Double256Vector unslice(int origin, Vector<Double> w, int part) {
return (Double256Vector) super.unsliceTemplate(origin, w, part); // specialize
}
@Override
@ForceInline
public Double256Vector unslice(int origin, Vector<Double> w, int part, VectorMask<Double> m) {
return (Double256Vector)
super.unsliceTemplate(Double256Mask.class,
origin, w, part,
(Double256Mask) m); // specialize
}
@Override
@ForceInline
public Double256Vector unslice(int origin) {
if ((origin < 0) || (origin >= VLENGTH)) {
throw new ArrayIndexOutOfBoundsException("Index " + origin + " out of bounds for vector length " + VLENGTH);
} else {
Double256Shuffle Iota = iotaShuffle();
VectorMask<Double> BlendMask = Iota.toVector().compare(VectorOperators.GE, (broadcast((double)(origin))));
Iota = iotaShuffle(-origin, 1, true);
return ZERO.blend(this.rearrange(Iota), BlendMask);
}
}
@Override
@ForceInline
public Double256Vector rearrange(VectorShuffle<Double> s) {
return (Double256Vector)
super.rearrangeTemplate(Double256Shuffle.class,
(Double256Shuffle) s); // specialize
}
@Override
@ForceInline
public Double256Vector rearrange(VectorShuffle<Double> shuffle,
VectorMask<Double> m) {
return (Double256Vector)
super.rearrangeTemplate(Double256Shuffle.class,
(Double256Shuffle) shuffle,
(Double256Mask) m); // specialize
}
@Override
@ForceInline
public Double256Vector rearrange(VectorShuffle<Double> s,
Vector<Double> v) {
return (Double256Vector)
super.rearrangeTemplate(Double256Shuffle.class,
(Double256Shuffle) s,
(Double256Vector) v); // specialize
}
@Override
@ForceInline
public Double256Vector selectFrom(Vector<Double> v) {
return (Double256Vector)
super.selectFromTemplate((Double256Vector) v); // specialize
}
@Override
@ForceInline
public Double256Vector selectFrom(Vector<Double> v,
VectorMask<Double> m) {
return (Double256Vector)
super.selectFromTemplate((Double256Vector) v,
(Double256Mask) m); // specialize
}
@ForceInline
@Override
public double lane(int i) {
long bits;
switch(i) {
case 0: bits = laneHelper(0); break;
case 1: bits = laneHelper(1); break;
case 2: bits = laneHelper(2); break;
case 3: bits = laneHelper(3); break;
default: throw new IllegalArgumentException("Index " + i + " must be zero or positive, and less than " + VLENGTH);
}
return Double.longBitsToDouble(bits);
}
public long laneHelper(int i) {
return (long) VectorSupport.extract(
VCLASS, ETYPE, VLENGTH,
this, i,
(vec, ix) -> {
double[] vecarr = vec.vec();
return (long)Double.doubleToLongBits(vecarr[ix]);
});
}
@ForceInline
@Override
public Double256Vector withLane(int i, double e) {
switch(i) {
case 0: return withLaneHelper(0, e);
case 1: return withLaneHelper(1, e);
case 2: return withLaneHelper(2, e);
case 3: return withLaneHelper(3, e);
default: throw new IllegalArgumentException("Index " + i + " must be zero or positive, and less than " + VLENGTH);
}
}
public Double256Vector withLaneHelper(int i, double e) {
return VectorSupport.insert(
VCLASS, ETYPE, VLENGTH,
this, i, (long)Double.doubleToLongBits(e),
(v, ix, bits) -> {
double[] res = v.vec().clone();
res[ix] = Double.longBitsToDouble((long)bits);
return v.vectorFactory(res);
});
}
// Mask
static final class Double256Mask extends AbstractMask<Double> {
static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM
static final Class<Double> ETYPE = double.class; // used by the JVM
Double256Mask(boolean[] bits) {
this(bits, 0);
}
Double256Mask(boolean[] bits, int offset) {
super(prepare(bits, offset));
}
Double256Mask(boolean val) {
super(prepare(val));
}
private static boolean[] prepare(boolean[] bits, int offset) {
boolean[] newBits = new boolean[VSPECIES.laneCount()];
for (int i = 0; i < newBits.length; i++) {
newBits[i] = bits[offset + i];
}
return newBits;
}
private static boolean[] prepare(boolean val) {
boolean[] bits = new boolean[VSPECIES.laneCount()];
Arrays.fill(bits, val);
return bits;
}
@ForceInline
final @Override
public DoubleSpecies vspecies() {
// ISSUE: This should probably be a @Stable
// field inside AbstractMask, rather than
// a megamorphic method.
return VSPECIES;
}
@ForceInline
boolean[] getBits() {
return (boolean[])getPayload();
}
@Override
Double256Mask uOp(MUnOp f) {
boolean[] res = new boolean[vspecies().laneCount()];
boolean[] bits = getBits();
for (int i = 0; i < res.length; i++) {
res[i] = f.apply(i, bits[i]);
}
return new Double256Mask(res);
}
@Override
Double256Mask bOp(VectorMask<Double> m, MBinOp f) {
boolean[] res = new boolean[vspecies().laneCount()];
boolean[] bits = getBits();
boolean[] mbits = ((Double256Mask)m).getBits();
for (int i = 0; i < res.length; i++) {
res[i] = f.apply(i, bits[i], mbits[i]);
}
return new Double256Mask(res);
}
@ForceInline
@Override
public final
Double256Vector toVector() {
return (Double256Vector) super.toVectorTemplate(); // specialize
}
@Override
@ForceInline
public <E> VectorMask<E> cast(VectorSpecies<E> s) {
AbstractSpecies<E> species = (AbstractSpecies<E>) s;
if (length() != species.laneCount())
throw new IllegalArgumentException("VectorMask length and species length differ");
boolean[] maskArray = toArray();
// enum-switches don't optimize properly JDK-8161245
switch (species.laneType.switchKey) {
case LaneType.SK_BYTE:
return new Byte256Vector.Byte256Mask(maskArray).check(species);
case LaneType.SK_SHORT:
return new Short256Vector.Short256Mask(maskArray).check(species);
case LaneType.SK_INT:
return new Int256Vector.Int256Mask(maskArray).check(species);
case LaneType.SK_LONG:
return new Long256Vector.Long256Mask(maskArray).check(species);
case LaneType.SK_FLOAT:
return new Float256Vector.Float256Mask(maskArray).check(species);
case LaneType.SK_DOUBLE:
return new Double256Vector.Double256Mask(maskArray).check(species);
}
// Should not reach here.
throw new AssertionError(species);
}
// Unary operations
@Override
@ForceInline
public Double256Mask not() {
return xor(maskAll(true));
}
// Binary operations
@Override
@ForceInline
public Double256Mask and(VectorMask<Double> mask) {
Objects.requireNonNull(mask);
Double256Mask m = (Double256Mask)mask;
return VectorSupport.binaryOp(VECTOR_OP_AND, Double256Mask.class, long.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a & b));
}
@Override
@ForceInline
public Double256Mask or(VectorMask<Double> mask) {
Objects.requireNonNull(mask);
Double256Mask m = (Double256Mask)mask;
return VectorSupport.binaryOp(VECTOR_OP_OR, Double256Mask.class, long.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a | b));
}
@ForceInline
/* package-private */
Double256Mask xor(VectorMask<Double> mask) {
Objects.requireNonNull(mask);
Double256Mask m = (Double256Mask)mask;
return VectorSupport.binaryOp(VECTOR_OP_XOR, Double256Mask.class, long.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a ^ b));
}
// Reductions
@Override
@ForceInline
public boolean anyTrue() {
return VectorSupport.test(BT_ne, Double256Mask.class, long.class, VLENGTH,
this, vspecies().maskAll(true),
(m, __) -> anyTrueHelper(((Double256Mask)m).getBits()));
}
@Override
@ForceInline
public boolean allTrue() {
return VectorSupport.test(BT_overflow, Double256Mask.class, long.class, VLENGTH,
this, vspecies().maskAll(true),
(m, __) -> allTrueHelper(((Double256Mask)m).getBits()));
}
@ForceInline
/*package-private*/
static Double256Mask maskAll(boolean bit) {
return VectorSupport.broadcastCoerced(Double256Mask.class, long.class, VLENGTH,
(bit ? -1 : 0), null,
(v, __) -> (v != 0 ? TRUE_MASK : FALSE_MASK));
}
private static final Double256Mask TRUE_MASK = new Double256Mask(true);
private static final Double256Mask FALSE_MASK = new Double256Mask(false);
}
// Shuffle
static final class Double256Shuffle extends AbstractShuffle<Double> {
static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM
static final Class<Double> ETYPE = double.class; // used by the JVM
Double256Shuffle(byte[] reorder) {
super(VLENGTH, reorder);
}
public Double256Shuffle(int[] reorder) {
super(VLENGTH, reorder);
}
public Double256Shuffle(int[] reorder, int i) {
super(VLENGTH, reorder, i);
}
public Double256Shuffle(IntUnaryOperator fn) {
super(VLENGTH, fn);
}
@Override
public DoubleSpecies vspecies() {
return VSPECIES;
}
static {
// There must be enough bits in the shuffle lanes to encode
// VLENGTH valid indexes and VLENGTH exceptional ones.
assert(VLENGTH < Byte.MAX_VALUE);
assert(Byte.MIN_VALUE <= -VLENGTH);
}
static final Double256Shuffle IOTA = new Double256Shuffle(IDENTITY);
@Override
@ForceInline
public Double256Vector toVector() {
return VectorSupport.shuffleToVector(VCLASS, ETYPE, Double256Shuffle.class, this, VLENGTH,
(s) -> ((Double256Vector)(((AbstractShuffle<Double>)(s)).toVectorTemplate())));
}
@Override
@ForceInline
public <F> VectorShuffle<F> cast(VectorSpecies<F> s) {
AbstractSpecies<F> species = (AbstractSpecies<F>) s;
if (length() != species.laneCount())
throw new IllegalArgumentException("VectorShuffle length and species length differ");
int[] shuffleArray = toArray();
// enum-switches don't optimize properly JDK-8161245
switch (species.laneType.switchKey) {
case LaneType.SK_BYTE:
return new Byte256Vector.Byte256Shuffle(shuffleArray).check(species);
case LaneType.SK_SHORT:
return new Short256Vector.Short256Shuffle(shuffleArray).check(species);
case LaneType.SK_INT:
return new Int256Vector.Int256Shuffle(shuffleArray).check(species);
case LaneType.SK_LONG:
return new Long256Vector.Long256Shuffle(shuffleArray).check(species);
case LaneType.SK_FLOAT:
return new Float256Vector.Float256Shuffle(shuffleArray).check(species);
case LaneType.SK_DOUBLE:
return new Double256Vector.Double256Shuffle(shuffleArray).check(species);
}
// Should not reach here.
throw new AssertionError(species);
}
@ForceInline
@Override
public Double256Shuffle rearrange(VectorShuffle<Double> shuffle) {
Double256Shuffle s = (Double256Shuffle) shuffle;
byte[] reorder1 = reorder();
byte[] reorder2 = s.reorder();
byte[] r = new byte[reorder1.length];
for (int i = 0; i < reorder1.length; i++) {
int ssi = reorder2[i];
r[i] = reorder1[ssi]; // throws on exceptional index
}
return new Double256Shuffle(r);
}
}
// ================================================
// Specialized low-level memory operations.
@ForceInline
@Override
final
DoubleVector fromArray0(double[] a, int offset) {
return super.fromArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
DoubleVector fromByteArray0(byte[] a, int offset) {
return super.fromByteArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
DoubleVector fromByteBuffer0(ByteBuffer bb, int offset) {
return super.fromByteBuffer0Template(bb, offset); // specialize
}
@ForceInline
@Override
final
void intoArray0(double[] a, int offset) {
super.intoArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
void intoByteArray0(byte[] a, int offset) {
super.intoByteArray0Template(a, offset); // specialize
}
// End of specialized low-level memory operations.
// ================================================
}

View file

@ -0,0 +1,820 @@
/*
* Copyright (c) 2017, 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
* particular file as subject to the "Classpath" exception as provided
* by Oracle in the LICENSE file that accompanied this code.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package jdk.incubator.vector;
import java.nio.ByteBuffer;
import java.util.Arrays;
import java.util.Objects;
import java.util.function.IntUnaryOperator;
import jdk.internal.vm.annotation.ForceInline;
import jdk.internal.vm.vector.VectorSupport;
import static jdk.internal.vm.vector.VectorSupport.*;
import static jdk.incubator.vector.VectorOperators.*;
// -- This file was mechanically generated: Do not edit! -- //
@SuppressWarnings("cast") // warning: redundant cast
final class Double512Vector extends DoubleVector {
static final DoubleSpecies VSPECIES =
(DoubleSpecies) DoubleVector.SPECIES_512;
static final VectorShape VSHAPE =
VSPECIES.vectorShape();
static final Class<Double512Vector> VCLASS = Double512Vector.class;
static final int VSIZE = VSPECIES.vectorBitSize();
static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM
static final Class<Double> ETYPE = double.class; // used by the JVM
Double512Vector(double[] v) {
super(v);
}
// For compatibility as Double512Vector::new,
// stored into species.vectorFactory.
Double512Vector(Object v) {
this((double[]) v);
}
static final Double512Vector ZERO = new Double512Vector(new double[VLENGTH]);
static final Double512Vector IOTA = new Double512Vector(VSPECIES.iotaArray());
static {
// Warm up a few species caches.
// If we do this too much we will
// get NPEs from bootstrap circularity.
VSPECIES.dummyVector();
VSPECIES.withLanes(LaneType.BYTE);
}
// Specialized extractors
@ForceInline
final @Override
public DoubleSpecies vspecies() {
// ISSUE: This should probably be a @Stable
// field inside AbstractVector, rather than
// a megamorphic method.
return VSPECIES;
}
@ForceInline
@Override
public final Class<Double> elementType() { return double.class; }
@ForceInline
@Override
public final int elementSize() { return Double.SIZE; }
@ForceInline
@Override
public final VectorShape shape() { return VSHAPE; }
@ForceInline
@Override
public final int length() { return VLENGTH; }
@ForceInline
@Override
public final int bitSize() { return VSIZE; }
@ForceInline
@Override
public final int byteSize() { return VSIZE / Byte.SIZE; }
/*package-private*/
@ForceInline
final @Override
double[] vec() {
return (double[])getPayload();
}
// Virtualized constructors
@Override
@ForceInline
public final Double512Vector broadcast(double e) {
return (Double512Vector) super.broadcastTemplate(e); // specialize
}
@Override
@ForceInline
public final Double512Vector broadcast(long e) {
return (Double512Vector) super.broadcastTemplate(e); // specialize
}
@Override
@ForceInline
Double512Mask maskFromArray(boolean[] bits) {
return new Double512Mask(bits);
}
@Override
@ForceInline
Double512Shuffle iotaShuffle() { return Double512Shuffle.IOTA; }
@ForceInline
Double512Shuffle iotaShuffle(int start, int step, boolean wrap) {
if (wrap) {
return (Double512Shuffle)VectorSupport.shuffleIota(ETYPE, Double512Shuffle.class, VSPECIES, VLENGTH, start, step, 1,
(l, lstart, lstep, s) -> s.shuffleFromOp(i -> (VectorIntrinsics.wrapToRange(i*lstep + lstart, l))));
} else {
return (Double512Shuffle)VectorSupport.shuffleIota(ETYPE, Double512Shuffle.class, VSPECIES, VLENGTH, start, step, 0,
(l, lstart, lstep, s) -> s.shuffleFromOp(i -> (i*lstep + lstart)));
}
}
@Override
@ForceInline
Double512Shuffle shuffleFromBytes(byte[] reorder) { return new Double512Shuffle(reorder); }
@Override
@ForceInline
Double512Shuffle shuffleFromArray(int[] indexes, int i) { return new Double512Shuffle(indexes, i); }
@Override
@ForceInline
Double512Shuffle shuffleFromOp(IntUnaryOperator fn) { return new Double512Shuffle(fn); }
// Make a vector of the same species but the given elements:
@ForceInline
final @Override
Double512Vector vectorFactory(double[] vec) {
return new Double512Vector(vec);
}
@ForceInline
final @Override
Byte512Vector asByteVectorRaw() {
return (Byte512Vector) super.asByteVectorRawTemplate(); // specialize
}
@ForceInline
final @Override
AbstractVector<?> asVectorRaw(LaneType laneType) {
return super.asVectorRawTemplate(laneType); // specialize
}
// Unary operator
@ForceInline
final @Override
Double512Vector uOp(FUnOp f) {
return (Double512Vector) super.uOpTemplate(f); // specialize
}
@ForceInline
final @Override
Double512Vector uOp(VectorMask<Double> m, FUnOp f) {
return (Double512Vector)
super.uOpTemplate((Double512Mask)m, f); // specialize
}
// Binary operator
@ForceInline
final @Override
Double512Vector bOp(Vector<Double> v, FBinOp f) {
return (Double512Vector) super.bOpTemplate((Double512Vector)v, f); // specialize
}
@ForceInline
final @Override
Double512Vector bOp(Vector<Double> v,
VectorMask<Double> m, FBinOp f) {
return (Double512Vector)
super.bOpTemplate((Double512Vector)v, (Double512Mask)m,
f); // specialize
}
// Ternary operator
@ForceInline
final @Override
Double512Vector tOp(Vector<Double> v1, Vector<Double> v2, FTriOp f) {
return (Double512Vector)
super.tOpTemplate((Double512Vector)v1, (Double512Vector)v2,
f); // specialize
}
@ForceInline
final @Override
Double512Vector tOp(Vector<Double> v1, Vector<Double> v2,
VectorMask<Double> m, FTriOp f) {
return (Double512Vector)
super.tOpTemplate((Double512Vector)v1, (Double512Vector)v2,
(Double512Mask)m, f); // specialize
}
@ForceInline
final @Override
double rOp(double v, FBinOp f) {
return super.rOpTemplate(v, f); // specialize
}
@Override
@ForceInline
public final <F>
Vector<F> convertShape(VectorOperators.Conversion<Double,F> conv,
VectorSpecies<F> rsp, int part) {
return super.convertShapeTemplate(conv, rsp, part); // specialize
}
@Override
@ForceInline
public final <F>
Vector<F> reinterpretShape(VectorSpecies<F> toSpecies, int part) {
return super.reinterpretShapeTemplate(toSpecies, part); // specialize
}
// Specialized algebraic operations:
// The following definition forces a specialized version of this
// crucial method into the v-table of this class. A call to add()
// will inline to a call to lanewise(ADD,), at which point the JIT
// intrinsic will have the opcode of ADD, plus all the metadata
// for this particular class, enabling it to generate precise
// code.
//
// There is probably no benefit to the JIT to specialize the
// masked or broadcast versions of the lanewise method.
@Override
@ForceInline
public Double512Vector lanewise(Unary op) {
return (Double512Vector) super.lanewiseTemplate(op); // specialize
}
@Override
@ForceInline
public Double512Vector lanewise(Binary op, Vector<Double> v) {
return (Double512Vector) super.lanewiseTemplate(op, v); // specialize
}
/*package-private*/
@Override
@ForceInline
public final
Double512Vector
lanewise(VectorOperators.Ternary op, Vector<Double> v1, Vector<Double> v2) {
return (Double512Vector) super.lanewiseTemplate(op, v1, v2); // specialize
}
@Override
@ForceInline
public final
Double512Vector addIndex(int scale) {
return (Double512Vector) super.addIndexTemplate(scale); // specialize
}
// Type specific horizontal reductions
@Override
@ForceInline
public final double reduceLanes(VectorOperators.Associative op) {
return super.reduceLanesTemplate(op); // specialized
}
@Override
@ForceInline
public final double reduceLanes(VectorOperators.Associative op,
VectorMask<Double> m) {
return super.reduceLanesTemplate(op, m); // specialized
}
@Override
@ForceInline
public final long reduceLanesToLong(VectorOperators.Associative op) {
return (long) super.reduceLanesTemplate(op); // specialized
}
@Override
@ForceInline
public final long reduceLanesToLong(VectorOperators.Associative op,
VectorMask<Double> m) {
return (long) super.reduceLanesTemplate(op, m); // specialized
}
@Override
@ForceInline
public VectorShuffle<Double> toShuffle() {
double[] a = toArray();
int[] sa = new int[a.length];
for (int i = 0; i < a.length; i++) {
sa[i] = (int) a[i];
}
return VectorShuffle.fromArray(VSPECIES, sa, 0);
}
// Specialized unary testing
@Override
@ForceInline
public final Double512Mask test(Test op) {
return super.testTemplate(Double512Mask.class, op); // specialize
}
// Specialized comparisons
@Override
@ForceInline
public final Double512Mask compare(Comparison op, Vector<Double> v) {
return super.compareTemplate(Double512Mask.class, op, v); // specialize
}
@Override
@ForceInline
public final Double512Mask compare(Comparison op, double s) {
return super.compareTemplate(Double512Mask.class, op, s); // specialize
}
@Override
@ForceInline
public final Double512Mask compare(Comparison op, long s) {
return super.compareTemplate(Double512Mask.class, op, s); // specialize
}
@Override
@ForceInline
public Double512Vector blend(Vector<Double> v, VectorMask<Double> m) {
return (Double512Vector)
super.blendTemplate(Double512Mask.class,
(Double512Vector) v,
(Double512Mask) m); // specialize
}
@Override
@ForceInline
public Double512Vector slice(int origin, Vector<Double> v) {
return (Double512Vector) super.sliceTemplate(origin, v); // specialize
}
@Override
@ForceInline
public Double512Vector slice(int origin) {
if ((origin < 0) || (origin >= VLENGTH)) {
throw new ArrayIndexOutOfBoundsException("Index " + origin + " out of bounds for vector length " + VLENGTH);
} else {
Double512Shuffle Iota = iotaShuffle();
VectorMask<Double> BlendMask = Iota.toVector().compare(VectorOperators.LT, (broadcast((double)(VLENGTH-origin))));
Iota = iotaShuffle(origin, 1, true);
return ZERO.blend(this.rearrange(Iota), BlendMask);
}
}
@Override
@ForceInline
public Double512Vector unslice(int origin, Vector<Double> w, int part) {
return (Double512Vector) super.unsliceTemplate(origin, w, part); // specialize
}
@Override
@ForceInline
public Double512Vector unslice(int origin, Vector<Double> w, int part, VectorMask<Double> m) {
return (Double512Vector)
super.unsliceTemplate(Double512Mask.class,
origin, w, part,
(Double512Mask) m); // specialize
}
@Override
@ForceInline
public Double512Vector unslice(int origin) {
if ((origin < 0) || (origin >= VLENGTH)) {
throw new ArrayIndexOutOfBoundsException("Index " + origin + " out of bounds for vector length " + VLENGTH);
} else {
Double512Shuffle Iota = iotaShuffle();
VectorMask<Double> BlendMask = Iota.toVector().compare(VectorOperators.GE, (broadcast((double)(origin))));
Iota = iotaShuffle(-origin, 1, true);
return ZERO.blend(this.rearrange(Iota), BlendMask);
}
}
@Override
@ForceInline
public Double512Vector rearrange(VectorShuffle<Double> s) {
return (Double512Vector)
super.rearrangeTemplate(Double512Shuffle.class,
(Double512Shuffle) s); // specialize
}
@Override
@ForceInline
public Double512Vector rearrange(VectorShuffle<Double> shuffle,
VectorMask<Double> m) {
return (Double512Vector)
super.rearrangeTemplate(Double512Shuffle.class,
(Double512Shuffle) shuffle,
(Double512Mask) m); // specialize
}
@Override
@ForceInline
public Double512Vector rearrange(VectorShuffle<Double> s,
Vector<Double> v) {
return (Double512Vector)
super.rearrangeTemplate(Double512Shuffle.class,
(Double512Shuffle) s,
(Double512Vector) v); // specialize
}
@Override
@ForceInline
public Double512Vector selectFrom(Vector<Double> v) {
return (Double512Vector)
super.selectFromTemplate((Double512Vector) v); // specialize
}
@Override
@ForceInline
public Double512Vector selectFrom(Vector<Double> v,
VectorMask<Double> m) {
return (Double512Vector)
super.selectFromTemplate((Double512Vector) v,
(Double512Mask) m); // specialize
}
@ForceInline
@Override
public double lane(int i) {
long bits;
switch(i) {
case 0: bits = laneHelper(0); break;
case 1: bits = laneHelper(1); break;
case 2: bits = laneHelper(2); break;
case 3: bits = laneHelper(3); break;
case 4: bits = laneHelper(4); break;
case 5: bits = laneHelper(5); break;
case 6: bits = laneHelper(6); break;
case 7: bits = laneHelper(7); break;
default: throw new IllegalArgumentException("Index " + i + " must be zero or positive, and less than " + VLENGTH);
}
return Double.longBitsToDouble(bits);
}
public long laneHelper(int i) {
return (long) VectorSupport.extract(
VCLASS, ETYPE, VLENGTH,
this, i,
(vec, ix) -> {
double[] vecarr = vec.vec();
return (long)Double.doubleToLongBits(vecarr[ix]);
});
}
@ForceInline
@Override
public Double512Vector withLane(int i, double e) {
switch(i) {
case 0: return withLaneHelper(0, e);
case 1: return withLaneHelper(1, e);
case 2: return withLaneHelper(2, e);
case 3: return withLaneHelper(3, e);
case 4: return withLaneHelper(4, e);
case 5: return withLaneHelper(5, e);
case 6: return withLaneHelper(6, e);
case 7: return withLaneHelper(7, e);
default: throw new IllegalArgumentException("Index " + i + " must be zero or positive, and less than " + VLENGTH);
}
}
public Double512Vector withLaneHelper(int i, double e) {
return VectorSupport.insert(
VCLASS, ETYPE, VLENGTH,
this, i, (long)Double.doubleToLongBits(e),
(v, ix, bits) -> {
double[] res = v.vec().clone();
res[ix] = Double.longBitsToDouble((long)bits);
return v.vectorFactory(res);
});
}
// Mask
static final class Double512Mask extends AbstractMask<Double> {
static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM
static final Class<Double> ETYPE = double.class; // used by the JVM
Double512Mask(boolean[] bits) {
this(bits, 0);
}
Double512Mask(boolean[] bits, int offset) {
super(prepare(bits, offset));
}
Double512Mask(boolean val) {
super(prepare(val));
}
private static boolean[] prepare(boolean[] bits, int offset) {
boolean[] newBits = new boolean[VSPECIES.laneCount()];
for (int i = 0; i < newBits.length; i++) {
newBits[i] = bits[offset + i];
}
return newBits;
}
private static boolean[] prepare(boolean val) {
boolean[] bits = new boolean[VSPECIES.laneCount()];
Arrays.fill(bits, val);
return bits;
}
@ForceInline
final @Override
public DoubleSpecies vspecies() {
// ISSUE: This should probably be a @Stable
// field inside AbstractMask, rather than
// a megamorphic method.
return VSPECIES;
}
@ForceInline
boolean[] getBits() {
return (boolean[])getPayload();
}
@Override
Double512Mask uOp(MUnOp f) {
boolean[] res = new boolean[vspecies().laneCount()];
boolean[] bits = getBits();
for (int i = 0; i < res.length; i++) {
res[i] = f.apply(i, bits[i]);
}
return new Double512Mask(res);
}
@Override
Double512Mask bOp(VectorMask<Double> m, MBinOp f) {
boolean[] res = new boolean[vspecies().laneCount()];
boolean[] bits = getBits();
boolean[] mbits = ((Double512Mask)m).getBits();
for (int i = 0; i < res.length; i++) {
res[i] = f.apply(i, bits[i], mbits[i]);
}
return new Double512Mask(res);
}
@ForceInline
@Override
public final
Double512Vector toVector() {
return (Double512Vector) super.toVectorTemplate(); // specialize
}
@Override
@ForceInline
public <E> VectorMask<E> cast(VectorSpecies<E> s) {
AbstractSpecies<E> species = (AbstractSpecies<E>) s;
if (length() != species.laneCount())
throw new IllegalArgumentException("VectorMask length and species length differ");
boolean[] maskArray = toArray();
// enum-switches don't optimize properly JDK-8161245
switch (species.laneType.switchKey) {
case LaneType.SK_BYTE:
return new Byte512Vector.Byte512Mask(maskArray).check(species);
case LaneType.SK_SHORT:
return new Short512Vector.Short512Mask(maskArray).check(species);
case LaneType.SK_INT:
return new Int512Vector.Int512Mask(maskArray).check(species);
case LaneType.SK_LONG:
return new Long512Vector.Long512Mask(maskArray).check(species);
case LaneType.SK_FLOAT:
return new Float512Vector.Float512Mask(maskArray).check(species);
case LaneType.SK_DOUBLE:
return new Double512Vector.Double512Mask(maskArray).check(species);
}
// Should not reach here.
throw new AssertionError(species);
}
// Unary operations
@Override
@ForceInline
public Double512Mask not() {
return xor(maskAll(true));
}
// Binary operations
@Override
@ForceInline
public Double512Mask and(VectorMask<Double> mask) {
Objects.requireNonNull(mask);
Double512Mask m = (Double512Mask)mask;
return VectorSupport.binaryOp(VECTOR_OP_AND, Double512Mask.class, long.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a & b));
}
@Override
@ForceInline
public Double512Mask or(VectorMask<Double> mask) {
Objects.requireNonNull(mask);
Double512Mask m = (Double512Mask)mask;
return VectorSupport.binaryOp(VECTOR_OP_OR, Double512Mask.class, long.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a | b));
}
@ForceInline
/* package-private */
Double512Mask xor(VectorMask<Double> mask) {
Objects.requireNonNull(mask);
Double512Mask m = (Double512Mask)mask;
return VectorSupport.binaryOp(VECTOR_OP_XOR, Double512Mask.class, long.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a ^ b));
}
// Reductions
@Override
@ForceInline
public boolean anyTrue() {
return VectorSupport.test(BT_ne, Double512Mask.class, long.class, VLENGTH,
this, vspecies().maskAll(true),
(m, __) -> anyTrueHelper(((Double512Mask)m).getBits()));
}
@Override
@ForceInline
public boolean allTrue() {
return VectorSupport.test(BT_overflow, Double512Mask.class, long.class, VLENGTH,
this, vspecies().maskAll(true),
(m, __) -> allTrueHelper(((Double512Mask)m).getBits()));
}
@ForceInline
/*package-private*/
static Double512Mask maskAll(boolean bit) {
return VectorSupport.broadcastCoerced(Double512Mask.class, long.class, VLENGTH,
(bit ? -1 : 0), null,
(v, __) -> (v != 0 ? TRUE_MASK : FALSE_MASK));
}
private static final Double512Mask TRUE_MASK = new Double512Mask(true);
private static final Double512Mask FALSE_MASK = new Double512Mask(false);
}
// Shuffle
static final class Double512Shuffle extends AbstractShuffle<Double> {
static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM
static final Class<Double> ETYPE = double.class; // used by the JVM
Double512Shuffle(byte[] reorder) {
super(VLENGTH, reorder);
}
public Double512Shuffle(int[] reorder) {
super(VLENGTH, reorder);
}
public Double512Shuffle(int[] reorder, int i) {
super(VLENGTH, reorder, i);
}
public Double512Shuffle(IntUnaryOperator fn) {
super(VLENGTH, fn);
}
@Override
public DoubleSpecies vspecies() {
return VSPECIES;
}
static {
// There must be enough bits in the shuffle lanes to encode
// VLENGTH valid indexes and VLENGTH exceptional ones.
assert(VLENGTH < Byte.MAX_VALUE);
assert(Byte.MIN_VALUE <= -VLENGTH);
}
static final Double512Shuffle IOTA = new Double512Shuffle(IDENTITY);
@Override
@ForceInline
public Double512Vector toVector() {
return VectorSupport.shuffleToVector(VCLASS, ETYPE, Double512Shuffle.class, this, VLENGTH,
(s) -> ((Double512Vector)(((AbstractShuffle<Double>)(s)).toVectorTemplate())));
}
@Override
@ForceInline
public <F> VectorShuffle<F> cast(VectorSpecies<F> s) {
AbstractSpecies<F> species = (AbstractSpecies<F>) s;
if (length() != species.laneCount())
throw new IllegalArgumentException("VectorShuffle length and species length differ");
int[] shuffleArray = toArray();
// enum-switches don't optimize properly JDK-8161245
switch (species.laneType.switchKey) {
case LaneType.SK_BYTE:
return new Byte512Vector.Byte512Shuffle(shuffleArray).check(species);
case LaneType.SK_SHORT:
return new Short512Vector.Short512Shuffle(shuffleArray).check(species);
case LaneType.SK_INT:
return new Int512Vector.Int512Shuffle(shuffleArray).check(species);
case LaneType.SK_LONG:
return new Long512Vector.Long512Shuffle(shuffleArray).check(species);
case LaneType.SK_FLOAT:
return new Float512Vector.Float512Shuffle(shuffleArray).check(species);
case LaneType.SK_DOUBLE:
return new Double512Vector.Double512Shuffle(shuffleArray).check(species);
}
// Should not reach here.
throw new AssertionError(species);
}
@ForceInline
@Override
public Double512Shuffle rearrange(VectorShuffle<Double> shuffle) {
Double512Shuffle s = (Double512Shuffle) shuffle;
byte[] reorder1 = reorder();
byte[] reorder2 = s.reorder();
byte[] r = new byte[reorder1.length];
for (int i = 0; i < reorder1.length; i++) {
int ssi = reorder2[i];
r[i] = reorder1[ssi]; // throws on exceptional index
}
return new Double512Shuffle(r);
}
}
// ================================================
// Specialized low-level memory operations.
@ForceInline
@Override
final
DoubleVector fromArray0(double[] a, int offset) {
return super.fromArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
DoubleVector fromByteArray0(byte[] a, int offset) {
return super.fromByteArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
DoubleVector fromByteBuffer0(ByteBuffer bb, int offset) {
return super.fromByteBuffer0Template(bb, offset); // specialize
}
@ForceInline
@Override
final
void intoArray0(double[] a, int offset) {
super.intoArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
void intoByteArray0(byte[] a, int offset) {
super.intoByteArray0Template(a, offset); // specialize
}
// End of specialized low-level memory operations.
// ================================================
}

View file

@ -0,0 +1,806 @@
/*
* Copyright (c) 2017, 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
* particular file as subject to the "Classpath" exception as provided
* by Oracle in the LICENSE file that accompanied this code.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package jdk.incubator.vector;
import java.nio.ByteBuffer;
import java.util.Arrays;
import java.util.Objects;
import java.util.function.IntUnaryOperator;
import jdk.internal.vm.annotation.ForceInline;
import jdk.internal.vm.vector.VectorSupport;
import static jdk.internal.vm.vector.VectorSupport.*;
import static jdk.incubator.vector.VectorOperators.*;
// -- This file was mechanically generated: Do not edit! -- //
@SuppressWarnings("cast") // warning: redundant cast
final class Double64Vector extends DoubleVector {
static final DoubleSpecies VSPECIES =
(DoubleSpecies) DoubleVector.SPECIES_64;
static final VectorShape VSHAPE =
VSPECIES.vectorShape();
static final Class<Double64Vector> VCLASS = Double64Vector.class;
static final int VSIZE = VSPECIES.vectorBitSize();
static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM
static final Class<Double> ETYPE = double.class; // used by the JVM
Double64Vector(double[] v) {
super(v);
}
// For compatibility as Double64Vector::new,
// stored into species.vectorFactory.
Double64Vector(Object v) {
this((double[]) v);
}
static final Double64Vector ZERO = new Double64Vector(new double[VLENGTH]);
static final Double64Vector IOTA = new Double64Vector(VSPECIES.iotaArray());
static {
// Warm up a few species caches.
// If we do this too much we will
// get NPEs from bootstrap circularity.
VSPECIES.dummyVector();
VSPECIES.withLanes(LaneType.BYTE);
}
// Specialized extractors
@ForceInline
final @Override
public DoubleSpecies vspecies() {
// ISSUE: This should probably be a @Stable
// field inside AbstractVector, rather than
// a megamorphic method.
return VSPECIES;
}
@ForceInline
@Override
public final Class<Double> elementType() { return double.class; }
@ForceInline
@Override
public final int elementSize() { return Double.SIZE; }
@ForceInline
@Override
public final VectorShape shape() { return VSHAPE; }
@ForceInline
@Override
public final int length() { return VLENGTH; }
@ForceInline
@Override
public final int bitSize() { return VSIZE; }
@ForceInline
@Override
public final int byteSize() { return VSIZE / Byte.SIZE; }
/*package-private*/
@ForceInline
final @Override
double[] vec() {
return (double[])getPayload();
}
// Virtualized constructors
@Override
@ForceInline
public final Double64Vector broadcast(double e) {
return (Double64Vector) super.broadcastTemplate(e); // specialize
}
@Override
@ForceInline
public final Double64Vector broadcast(long e) {
return (Double64Vector) super.broadcastTemplate(e); // specialize
}
@Override
@ForceInline
Double64Mask maskFromArray(boolean[] bits) {
return new Double64Mask(bits);
}
@Override
@ForceInline
Double64Shuffle iotaShuffle() { return Double64Shuffle.IOTA; }
@ForceInline
Double64Shuffle iotaShuffle(int start, int step, boolean wrap) {
if (wrap) {
return (Double64Shuffle)VectorSupport.shuffleIota(ETYPE, Double64Shuffle.class, VSPECIES, VLENGTH, start, step, 1,
(l, lstart, lstep, s) -> s.shuffleFromOp(i -> (VectorIntrinsics.wrapToRange(i*lstep + lstart, l))));
} else {
return (Double64Shuffle)VectorSupport.shuffleIota(ETYPE, Double64Shuffle.class, VSPECIES, VLENGTH, start, step, 0,
(l, lstart, lstep, s) -> s.shuffleFromOp(i -> (i*lstep + lstart)));
}
}
@Override
@ForceInline
Double64Shuffle shuffleFromBytes(byte[] reorder) { return new Double64Shuffle(reorder); }
@Override
@ForceInline
Double64Shuffle shuffleFromArray(int[] indexes, int i) { return new Double64Shuffle(indexes, i); }
@Override
@ForceInline
Double64Shuffle shuffleFromOp(IntUnaryOperator fn) { return new Double64Shuffle(fn); }
// Make a vector of the same species but the given elements:
@ForceInline
final @Override
Double64Vector vectorFactory(double[] vec) {
return new Double64Vector(vec);
}
@ForceInline
final @Override
Byte64Vector asByteVectorRaw() {
return (Byte64Vector) super.asByteVectorRawTemplate(); // specialize
}
@ForceInline
final @Override
AbstractVector<?> asVectorRaw(LaneType laneType) {
return super.asVectorRawTemplate(laneType); // specialize
}
// Unary operator
@ForceInline
final @Override
Double64Vector uOp(FUnOp f) {
return (Double64Vector) super.uOpTemplate(f); // specialize
}
@ForceInline
final @Override
Double64Vector uOp(VectorMask<Double> m, FUnOp f) {
return (Double64Vector)
super.uOpTemplate((Double64Mask)m, f); // specialize
}
// Binary operator
@ForceInline
final @Override
Double64Vector bOp(Vector<Double> v, FBinOp f) {
return (Double64Vector) super.bOpTemplate((Double64Vector)v, f); // specialize
}
@ForceInline
final @Override
Double64Vector bOp(Vector<Double> v,
VectorMask<Double> m, FBinOp f) {
return (Double64Vector)
super.bOpTemplate((Double64Vector)v, (Double64Mask)m,
f); // specialize
}
// Ternary operator
@ForceInline
final @Override
Double64Vector tOp(Vector<Double> v1, Vector<Double> v2, FTriOp f) {
return (Double64Vector)
super.tOpTemplate((Double64Vector)v1, (Double64Vector)v2,
f); // specialize
}
@ForceInline
final @Override
Double64Vector tOp(Vector<Double> v1, Vector<Double> v2,
VectorMask<Double> m, FTriOp f) {
return (Double64Vector)
super.tOpTemplate((Double64Vector)v1, (Double64Vector)v2,
(Double64Mask)m, f); // specialize
}
@ForceInline
final @Override
double rOp(double v, FBinOp f) {
return super.rOpTemplate(v, f); // specialize
}
@Override
@ForceInline
public final <F>
Vector<F> convertShape(VectorOperators.Conversion<Double,F> conv,
VectorSpecies<F> rsp, int part) {
return super.convertShapeTemplate(conv, rsp, part); // specialize
}
@Override
@ForceInline
public final <F>
Vector<F> reinterpretShape(VectorSpecies<F> toSpecies, int part) {
return super.reinterpretShapeTemplate(toSpecies, part); // specialize
}
// Specialized algebraic operations:
// The following definition forces a specialized version of this
// crucial method into the v-table of this class. A call to add()
// will inline to a call to lanewise(ADD,), at which point the JIT
// intrinsic will have the opcode of ADD, plus all the metadata
// for this particular class, enabling it to generate precise
// code.
//
// There is probably no benefit to the JIT to specialize the
// masked or broadcast versions of the lanewise method.
@Override
@ForceInline
public Double64Vector lanewise(Unary op) {
return (Double64Vector) super.lanewiseTemplate(op); // specialize
}
@Override
@ForceInline
public Double64Vector lanewise(Binary op, Vector<Double> v) {
return (Double64Vector) super.lanewiseTemplate(op, v); // specialize
}
/*package-private*/
@Override
@ForceInline
public final
Double64Vector
lanewise(VectorOperators.Ternary op, Vector<Double> v1, Vector<Double> v2) {
return (Double64Vector) super.lanewiseTemplate(op, v1, v2); // specialize
}
@Override
@ForceInline
public final
Double64Vector addIndex(int scale) {
return (Double64Vector) super.addIndexTemplate(scale); // specialize
}
// Type specific horizontal reductions
@Override
@ForceInline
public final double reduceLanes(VectorOperators.Associative op) {
return super.reduceLanesTemplate(op); // specialized
}
@Override
@ForceInline
public final double reduceLanes(VectorOperators.Associative op,
VectorMask<Double> m) {
return super.reduceLanesTemplate(op, m); // specialized
}
@Override
@ForceInline
public final long reduceLanesToLong(VectorOperators.Associative op) {
return (long) super.reduceLanesTemplate(op); // specialized
}
@Override
@ForceInline
public final long reduceLanesToLong(VectorOperators.Associative op,
VectorMask<Double> m) {
return (long) super.reduceLanesTemplate(op, m); // specialized
}
@Override
@ForceInline
public VectorShuffle<Double> toShuffle() {
double[] a = toArray();
int[] sa = new int[a.length];
for (int i = 0; i < a.length; i++) {
sa[i] = (int) a[i];
}
return VectorShuffle.fromArray(VSPECIES, sa, 0);
}
// Specialized unary testing
@Override
@ForceInline
public final Double64Mask test(Test op) {
return super.testTemplate(Double64Mask.class, op); // specialize
}
// Specialized comparisons
@Override
@ForceInline
public final Double64Mask compare(Comparison op, Vector<Double> v) {
return super.compareTemplate(Double64Mask.class, op, v); // specialize
}
@Override
@ForceInline
public final Double64Mask compare(Comparison op, double s) {
return super.compareTemplate(Double64Mask.class, op, s); // specialize
}
@Override
@ForceInline
public final Double64Mask compare(Comparison op, long s) {
return super.compareTemplate(Double64Mask.class, op, s); // specialize
}
@Override
@ForceInline
public Double64Vector blend(Vector<Double> v, VectorMask<Double> m) {
return (Double64Vector)
super.blendTemplate(Double64Mask.class,
(Double64Vector) v,
(Double64Mask) m); // specialize
}
@Override
@ForceInline
public Double64Vector slice(int origin, Vector<Double> v) {
return (Double64Vector) super.sliceTemplate(origin, v); // specialize
}
@Override
@ForceInline
public Double64Vector slice(int origin) {
if ((origin < 0) || (origin >= VLENGTH)) {
throw new ArrayIndexOutOfBoundsException("Index " + origin + " out of bounds for vector length " + VLENGTH);
} else {
Double64Shuffle Iota = iotaShuffle();
VectorMask<Double> BlendMask = Iota.toVector().compare(VectorOperators.LT, (broadcast((double)(VLENGTH-origin))));
Iota = iotaShuffle(origin, 1, true);
return ZERO.blend(this.rearrange(Iota), BlendMask);
}
}
@Override
@ForceInline
public Double64Vector unslice(int origin, Vector<Double> w, int part) {
return (Double64Vector) super.unsliceTemplate(origin, w, part); // specialize
}
@Override
@ForceInline
public Double64Vector unslice(int origin, Vector<Double> w, int part, VectorMask<Double> m) {
return (Double64Vector)
super.unsliceTemplate(Double64Mask.class,
origin, w, part,
(Double64Mask) m); // specialize
}
@Override
@ForceInline
public Double64Vector unslice(int origin) {
if ((origin < 0) || (origin >= VLENGTH)) {
throw new ArrayIndexOutOfBoundsException("Index " + origin + " out of bounds for vector length " + VLENGTH);
} else {
Double64Shuffle Iota = iotaShuffle();
VectorMask<Double> BlendMask = Iota.toVector().compare(VectorOperators.GE, (broadcast((double)(origin))));
Iota = iotaShuffle(-origin, 1, true);
return ZERO.blend(this.rearrange(Iota), BlendMask);
}
}
@Override
@ForceInline
public Double64Vector rearrange(VectorShuffle<Double> s) {
return (Double64Vector)
super.rearrangeTemplate(Double64Shuffle.class,
(Double64Shuffle) s); // specialize
}
@Override
@ForceInline
public Double64Vector rearrange(VectorShuffle<Double> shuffle,
VectorMask<Double> m) {
return (Double64Vector)
super.rearrangeTemplate(Double64Shuffle.class,
(Double64Shuffle) shuffle,
(Double64Mask) m); // specialize
}
@Override
@ForceInline
public Double64Vector rearrange(VectorShuffle<Double> s,
Vector<Double> v) {
return (Double64Vector)
super.rearrangeTemplate(Double64Shuffle.class,
(Double64Shuffle) s,
(Double64Vector) v); // specialize
}
@Override
@ForceInline
public Double64Vector selectFrom(Vector<Double> v) {
return (Double64Vector)
super.selectFromTemplate((Double64Vector) v); // specialize
}
@Override
@ForceInline
public Double64Vector selectFrom(Vector<Double> v,
VectorMask<Double> m) {
return (Double64Vector)
super.selectFromTemplate((Double64Vector) v,
(Double64Mask) m); // specialize
}
@ForceInline
@Override
public double lane(int i) {
long bits;
switch(i) {
case 0: bits = laneHelper(0); break;
default: throw new IllegalArgumentException("Index " + i + " must be zero or positive, and less than " + VLENGTH);
}
return Double.longBitsToDouble(bits);
}
public long laneHelper(int i) {
return (long) VectorSupport.extract(
VCLASS, ETYPE, VLENGTH,
this, i,
(vec, ix) -> {
double[] vecarr = vec.vec();
return (long)Double.doubleToLongBits(vecarr[ix]);
});
}
@ForceInline
@Override
public Double64Vector withLane(int i, double e) {
switch(i) {
case 0: return withLaneHelper(0, e);
default: throw new IllegalArgumentException("Index " + i + " must be zero or positive, and less than " + VLENGTH);
}
}
public Double64Vector withLaneHelper(int i, double e) {
return VectorSupport.insert(
VCLASS, ETYPE, VLENGTH,
this, i, (long)Double.doubleToLongBits(e),
(v, ix, bits) -> {
double[] res = v.vec().clone();
res[ix] = Double.longBitsToDouble((long)bits);
return v.vectorFactory(res);
});
}
// Mask
static final class Double64Mask extends AbstractMask<Double> {
static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM
static final Class<Double> ETYPE = double.class; // used by the JVM
Double64Mask(boolean[] bits) {
this(bits, 0);
}
Double64Mask(boolean[] bits, int offset) {
super(prepare(bits, offset));
}
Double64Mask(boolean val) {
super(prepare(val));
}
private static boolean[] prepare(boolean[] bits, int offset) {
boolean[] newBits = new boolean[VSPECIES.laneCount()];
for (int i = 0; i < newBits.length; i++) {
newBits[i] = bits[offset + i];
}
return newBits;
}
private static boolean[] prepare(boolean val) {
boolean[] bits = new boolean[VSPECIES.laneCount()];
Arrays.fill(bits, val);
return bits;
}
@ForceInline
final @Override
public DoubleSpecies vspecies() {
// ISSUE: This should probably be a @Stable
// field inside AbstractMask, rather than
// a megamorphic method.
return VSPECIES;
}
@ForceInline
boolean[] getBits() {
return (boolean[])getPayload();
}
@Override
Double64Mask uOp(MUnOp f) {
boolean[] res = new boolean[vspecies().laneCount()];
boolean[] bits = getBits();
for (int i = 0; i < res.length; i++) {
res[i] = f.apply(i, bits[i]);
}
return new Double64Mask(res);
}
@Override
Double64Mask bOp(VectorMask<Double> m, MBinOp f) {
boolean[] res = new boolean[vspecies().laneCount()];
boolean[] bits = getBits();
boolean[] mbits = ((Double64Mask)m).getBits();
for (int i = 0; i < res.length; i++) {
res[i] = f.apply(i, bits[i], mbits[i]);
}
return new Double64Mask(res);
}
@ForceInline
@Override
public final
Double64Vector toVector() {
return (Double64Vector) super.toVectorTemplate(); // specialize
}
@Override
@ForceInline
public <E> VectorMask<E> cast(VectorSpecies<E> s) {
AbstractSpecies<E> species = (AbstractSpecies<E>) s;
if (length() != species.laneCount())
throw new IllegalArgumentException("VectorMask length and species length differ");
boolean[] maskArray = toArray();
// enum-switches don't optimize properly JDK-8161245
switch (species.laneType.switchKey) {
case LaneType.SK_BYTE:
return new Byte64Vector.Byte64Mask(maskArray).check(species);
case LaneType.SK_SHORT:
return new Short64Vector.Short64Mask(maskArray).check(species);
case LaneType.SK_INT:
return new Int64Vector.Int64Mask(maskArray).check(species);
case LaneType.SK_LONG:
return new Long64Vector.Long64Mask(maskArray).check(species);
case LaneType.SK_FLOAT:
return new Float64Vector.Float64Mask(maskArray).check(species);
case LaneType.SK_DOUBLE:
return new Double64Vector.Double64Mask(maskArray).check(species);
}
// Should not reach here.
throw new AssertionError(species);
}
// Unary operations
@Override
@ForceInline
public Double64Mask not() {
return xor(maskAll(true));
}
// Binary operations
@Override
@ForceInline
public Double64Mask and(VectorMask<Double> mask) {
Objects.requireNonNull(mask);
Double64Mask m = (Double64Mask)mask;
return VectorSupport.binaryOp(VECTOR_OP_AND, Double64Mask.class, long.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a & b));
}
@Override
@ForceInline
public Double64Mask or(VectorMask<Double> mask) {
Objects.requireNonNull(mask);
Double64Mask m = (Double64Mask)mask;
return VectorSupport.binaryOp(VECTOR_OP_OR, Double64Mask.class, long.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a | b));
}
@ForceInline
/* package-private */
Double64Mask xor(VectorMask<Double> mask) {
Objects.requireNonNull(mask);
Double64Mask m = (Double64Mask)mask;
return VectorSupport.binaryOp(VECTOR_OP_XOR, Double64Mask.class, long.class, VLENGTH,
this, m,
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a ^ b));
}
// Reductions
@Override
@ForceInline
public boolean anyTrue() {
return VectorSupport.test(BT_ne, Double64Mask.class, long.class, VLENGTH,
this, vspecies().maskAll(true),
(m, __) -> anyTrueHelper(((Double64Mask)m).getBits()));
}
@Override
@ForceInline
public boolean allTrue() {
return VectorSupport.test(BT_overflow, Double64Mask.class, long.class, VLENGTH,
this, vspecies().maskAll(true),
(m, __) -> allTrueHelper(((Double64Mask)m).getBits()));
}
@ForceInline
/*package-private*/
static Double64Mask maskAll(boolean bit) {
return VectorSupport.broadcastCoerced(Double64Mask.class, long.class, VLENGTH,
(bit ? -1 : 0), null,
(v, __) -> (v != 0 ? TRUE_MASK : FALSE_MASK));
}
private static final Double64Mask TRUE_MASK = new Double64Mask(true);
private static final Double64Mask FALSE_MASK = new Double64Mask(false);
}
// Shuffle
static final class Double64Shuffle extends AbstractShuffle<Double> {
static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM
static final Class<Double> ETYPE = double.class; // used by the JVM
Double64Shuffle(byte[] reorder) {
super(VLENGTH, reorder);
}
public Double64Shuffle(int[] reorder) {
super(VLENGTH, reorder);
}
public Double64Shuffle(int[] reorder, int i) {
super(VLENGTH, reorder, i);
}
public Double64Shuffle(IntUnaryOperator fn) {
super(VLENGTH, fn);
}
@Override
public DoubleSpecies vspecies() {
return VSPECIES;
}
static {
// There must be enough bits in the shuffle lanes to encode
// VLENGTH valid indexes and VLENGTH exceptional ones.
assert(VLENGTH < Byte.MAX_VALUE);
assert(Byte.MIN_VALUE <= -VLENGTH);
}
static final Double64Shuffle IOTA = new Double64Shuffle(IDENTITY);
@Override
@ForceInline
public Double64Vector toVector() {
return VectorSupport.shuffleToVector(VCLASS, ETYPE, Double64Shuffle.class, this, VLENGTH,
(s) -> ((Double64Vector)(((AbstractShuffle<Double>)(s)).toVectorTemplate())));
}
@Override
@ForceInline
public <F> VectorShuffle<F> cast(VectorSpecies<F> s) {
AbstractSpecies<F> species = (AbstractSpecies<F>) s;
if (length() != species.laneCount())
throw new IllegalArgumentException("VectorShuffle length and species length differ");
int[] shuffleArray = toArray();
// enum-switches don't optimize properly JDK-8161245
switch (species.laneType.switchKey) {
case LaneType.SK_BYTE:
return new Byte64Vector.Byte64Shuffle(shuffleArray).check(species);
case LaneType.SK_SHORT:
return new Short64Vector.Short64Shuffle(shuffleArray).check(species);
case LaneType.SK_INT:
return new Int64Vector.Int64Shuffle(shuffleArray).check(species);
case LaneType.SK_LONG:
return new Long64Vector.Long64Shuffle(shuffleArray).check(species);
case LaneType.SK_FLOAT:
return new Float64Vector.Float64Shuffle(shuffleArray).check(species);
case LaneType.SK_DOUBLE:
return new Double64Vector.Double64Shuffle(shuffleArray).check(species);
}
// Should not reach here.
throw new AssertionError(species);
}
@ForceInline
@Override
public Double64Shuffle rearrange(VectorShuffle<Double> shuffle) {
Double64Shuffle s = (Double64Shuffle) shuffle;
byte[] reorder1 = reorder();
byte[] reorder2 = s.reorder();
byte[] r = new byte[reorder1.length];
for (int i = 0; i < reorder1.length; i++) {
int ssi = reorder2[i];
r[i] = reorder1[ssi]; // throws on exceptional index
}
return new Double64Shuffle(r);
}
}
// ================================================
// Specialized low-level memory operations.
@ForceInline
@Override
final
DoubleVector fromArray0(double[] a, int offset) {
return super.fromArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
DoubleVector fromByteArray0(byte[] a, int offset) {
return super.fromByteArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
DoubleVector fromByteBuffer0(ByteBuffer bb, int offset) {
return super.fromByteBuffer0Template(bb, offset); // specialize
}
@ForceInline
@Override
final
void intoArray0(double[] a, int offset) {
super.intoArray0Template(a, offset); // specialize
}
@ForceInline
@Override
final
void intoByteArray0(byte[] a, int offset) {
super.intoByteArray0Template(a, offset); // specialize
}
// End of specialized low-level memory operations.
// ================================================
}

Some files were not shown because too many files have changed in this diff Show more