6827605: new String intrinsics may prevent EA scalar replacement

6875866: Intrinsic for String.indexOf() is broken on x86 with SSE4.2

Modify String intrinsic methods to pass char[] pointers instead of string oops.

Reviewed-by: never
This commit is contained in:
Vladimir Kozlov 2009-09-14 12:14:20 -07:00
parent c492f4cde5
commit 243514d483
13 changed files with 788 additions and 1214 deletions

View file

@ -8404,6 +8404,319 @@ void MacroAssembler::reinit_heapbase() {
}
#endif // _LP64
// IndexOf substring.
void MacroAssembler::string_indexof(Register str1, Register str2,
Register cnt1, Register cnt2, Register result,
XMMRegister vec, Register tmp) {
assert(UseSSE42Intrinsics, "SSE4.2 is required");
Label RELOAD_SUBSTR, PREP_FOR_SCAN, SCAN_TO_SUBSTR,
SCAN_SUBSTR, RET_NOT_FOUND, CLEANUP;
push(str1); // string addr
push(str2); // substr addr
push(cnt2); // substr count
jmpb(PREP_FOR_SCAN);
// Substr count saved at sp
// Substr saved at sp+1*wordSize
// String saved at sp+2*wordSize
// Reload substr for rescan
bind(RELOAD_SUBSTR);
movl(cnt2, Address(rsp, 0));
movptr(str2, Address(rsp, wordSize));
// We came here after the beginninig of the substring was
// matched but the rest of it was not so we need to search
// again. Start from the next element after the previous match.
subptr(str1, result); // Restore counter
shrl(str1, 1);
addl(cnt1, str1);
lea(str1, Address(result, 2)); // Reload string
// Load substr
bind(PREP_FOR_SCAN);
movdqu(vec, Address(str2, 0));
addl(cnt1, 8); // prime the loop
subptr(str1, 16);
// Scan string for substr in 16-byte vectors
bind(SCAN_TO_SUBSTR);
subl(cnt1, 8);
addptr(str1, 16);
// pcmpestri
// inputs:
// xmm - substring
// rax - substring length (elements count)
// mem - scaned string
// rdx - string length (elements count)
// 0xd - mode: 1100 (substring search) + 01 (unsigned shorts)
// outputs:
// rcx - matched index in string
assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri");
pcmpestri(vec, Address(str1, 0), 0x0d);
jcc(Assembler::above, SCAN_TO_SUBSTR); // CF == 0 && ZF == 0
jccb(Assembler::aboveEqual, RET_NOT_FOUND); // CF == 0
// Fallthrough: found a potential substr
// Make sure string is still long enough
subl(cnt1, tmp);
cmpl(cnt1, cnt2);
jccb(Assembler::negative, RET_NOT_FOUND);
// Compute start addr of substr
lea(str1, Address(str1, tmp, Address::times_2));
movptr(result, str1); // save
// Compare potential substr
addl(cnt1, 8); // prime the loop
addl(cnt2, 8);
subptr(str1, 16);
subptr(str2, 16);
// Scan 16-byte vectors of string and substr
bind(SCAN_SUBSTR);
subl(cnt1, 8);
subl(cnt2, 8);
addptr(str1, 16);
addptr(str2, 16);
movdqu(vec, Address(str2, 0));
pcmpestri(vec, Address(str1, 0), 0x0d);
jcc(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0
jcc(Assembler::positive, SCAN_SUBSTR); // SF == 0
// Compute substr offset
subptr(result, Address(rsp, 2*wordSize));
shrl(result, 1); // index
jmpb(CLEANUP);
bind(RET_NOT_FOUND);
movl(result, -1);
bind(CLEANUP);
addptr(rsp, 3*wordSize);
}
// Compare strings.
void MacroAssembler::string_compare(Register str1, Register str2,
Register cnt1, Register cnt2, Register result,
XMMRegister vec1, XMMRegister vec2) {
Label LENGTH_DIFF_LABEL, POP_LABEL, DONE_LABEL, WHILE_HEAD_LABEL;
// Compute the minimum of the string lengths and the
// difference of the string lengths (stack).
// Do the conditional move stuff
movl(result, cnt1);
subl(cnt1, cnt2);
push(cnt1);
if (VM_Version::supports_cmov()) {
cmovl(Assembler::lessEqual, cnt2, result);
} else {
Label GT_LABEL;
jccb(Assembler::greater, GT_LABEL);
movl(cnt2, result);
bind(GT_LABEL);
}
// Is the minimum length zero?
testl(cnt2, cnt2);
jcc(Assembler::zero, LENGTH_DIFF_LABEL);
// Load first characters
load_unsigned_short(result, Address(str1, 0));
load_unsigned_short(cnt1, Address(str2, 0));
// Compare first characters
subl(result, cnt1);
jcc(Assembler::notZero, POP_LABEL);
decrementl(cnt2);
jcc(Assembler::zero, LENGTH_DIFF_LABEL);
{
// Check after comparing first character to see if strings are equivalent
Label LSkip2;
// Check if the strings start at same location
cmpptr(str1, str2);
jccb(Assembler::notEqual, LSkip2);
// Check if the length difference is zero (from stack)
cmpl(Address(rsp, 0), 0x0);
jcc(Assembler::equal, LENGTH_DIFF_LABEL);
// Strings might not be equivalent
bind(LSkip2);
}
// Advance to next character
addptr(str1, 2);
addptr(str2, 2);
if (UseSSE42Intrinsics) {
// With SSE4.2, use double quad vector compare
Label COMPARE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_TAIL;
// Setup to compare 16-byte vectors
movl(cnt1, cnt2);
andl(cnt2, 0xfffffff8); // cnt2 holds the vector count
andl(cnt1, 0x00000007); // cnt1 holds the tail count
testl(cnt2, cnt2);
jccb(Assembler::zero, COMPARE_TAIL);
lea(str2, Address(str2, cnt2, Address::times_2));
lea(str1, Address(str1, cnt2, Address::times_2));
negptr(cnt2);
bind(COMPARE_VECTORS);
movdqu(vec1, Address(str1, cnt2, Address::times_2));
movdqu(vec2, Address(str2, cnt2, Address::times_2));
pxor(vec1, vec2);
ptest(vec1, vec1);
jccb(Assembler::notZero, VECTOR_NOT_EQUAL);
addptr(cnt2, 8);
jcc(Assembler::notZero, COMPARE_VECTORS);
jmpb(COMPARE_TAIL);
// Mismatched characters in the vectors
bind(VECTOR_NOT_EQUAL);
lea(str1, Address(str1, cnt2, Address::times_2));
lea(str2, Address(str2, cnt2, Address::times_2));
movl(cnt1, 8);
// Compare tail (< 8 chars), or rescan last vectors to
// find 1st mismatched characters
bind(COMPARE_TAIL);
testl(cnt1, cnt1);
jccb(Assembler::zero, LENGTH_DIFF_LABEL);
movl(cnt2, cnt1);
// Fallthru to tail compare
}
// Shift str2 and str1 to the end of the arrays, negate min
lea(str1, Address(str1, cnt2, Address::times_2, 0));
lea(str2, Address(str2, cnt2, Address::times_2, 0));
negptr(cnt2);
// Compare the rest of the characters
bind(WHILE_HEAD_LABEL);
load_unsigned_short(result, Address(str1, cnt2, Address::times_2, 0));
load_unsigned_short(cnt1, Address(str2, cnt2, Address::times_2, 0));
subl(result, cnt1);
jccb(Assembler::notZero, POP_LABEL);
increment(cnt2);
jcc(Assembler::notZero, WHILE_HEAD_LABEL);
// Strings are equal up to min length. Return the length difference.
bind(LENGTH_DIFF_LABEL);
pop(result);
jmpb(DONE_LABEL);
// Discard the stored length difference
bind(POP_LABEL);
addptr(rsp, wordSize);
// That's it
bind(DONE_LABEL);
}
// Compare char[] arrays aligned to 4 bytes or substrings.
void MacroAssembler::char_arrays_equals(bool is_array_equ, Register ary1, Register ary2,
Register limit, Register result, Register chr,
XMMRegister vec1, XMMRegister vec2) {
Label TRUE_LABEL, FALSE_LABEL, DONE, COMPARE_VECTORS, COMPARE_CHAR;
int length_offset = arrayOopDesc::length_offset_in_bytes();
int base_offset = arrayOopDesc::base_offset_in_bytes(T_CHAR);
// Check the input args
cmpptr(ary1, ary2);
jcc(Assembler::equal, TRUE_LABEL);
if (is_array_equ) {
// Need additional checks for arrays_equals.
andptr(ary1, ary2);
jcc(Assembler::zero, FALSE_LABEL); // One pointer is NULL
// Check the lengths
movl(limit, Address(ary1, length_offset));
cmpl(limit, Address(ary2, length_offset));
jcc(Assembler::notEqual, FALSE_LABEL);
}
// count == 0
testl(limit, limit);
jcc(Assembler::zero, TRUE_LABEL);
if (is_array_equ) {
// Load array address
lea(ary1, Address(ary1, base_offset));
lea(ary2, Address(ary2, base_offset));
}
shll(limit, 1); // byte count != 0
movl(result, limit); // copy
if (UseSSE42Intrinsics) {
// With SSE4.2, use double quad vector compare
Label COMPARE_WIDE_VECTORS, COMPARE_TAIL;
// Compare 16-byte vectors
andl(result, 0x0000000e); // tail count (in bytes)
andl(limit, 0xfffffff0); // vector count (in bytes)
jccb(Assembler::zero, COMPARE_TAIL);
lea(ary1, Address(ary1, limit, Address::times_1));
lea(ary2, Address(ary2, limit, Address::times_1));
negptr(limit);
bind(COMPARE_WIDE_VECTORS);
movdqu(vec1, Address(ary1, limit, Address::times_1));
movdqu(vec2, Address(ary2, limit, Address::times_1));
pxor(vec1, vec2);
ptest(vec1, vec1);
jccb(Assembler::notZero, FALSE_LABEL);
addptr(limit, 16);
jcc(Assembler::notZero, COMPARE_WIDE_VECTORS);
bind(COMPARE_TAIL); // limit is zero
movl(limit, result);
// Fallthru to tail compare
}
// Compare 4-byte vectors
andl(limit, 0xfffffffc); // vector count (in bytes)
jccb(Assembler::zero, COMPARE_CHAR);
lea(ary1, Address(ary1, limit, Address::times_1));
lea(ary2, Address(ary2, limit, Address::times_1));
negptr(limit);
bind(COMPARE_VECTORS);
movl(chr, Address(ary1, limit, Address::times_1));
cmpl(chr, Address(ary2, limit, Address::times_1));
jccb(Assembler::notEqual, FALSE_LABEL);
addptr(limit, 4);
jcc(Assembler::notZero, COMPARE_VECTORS);
// Compare trailing char (final 2 bytes), if any
bind(COMPARE_CHAR);
testl(result, 0x2); // tail char
jccb(Assembler::zero, TRUE_LABEL);
load_unsigned_short(chr, Address(ary1, 0));
load_unsigned_short(limit, Address(ary2, 0));
cmpl(chr, limit);
jccb(Assembler::notEqual, FALSE_LABEL);
bind(TRUE_LABEL);
movl(result, 1); // return true
jmpb(DONE);
bind(FALSE_LABEL);
xorl(result, result); // return false
// That's it
bind(DONE);
}
Assembler::Condition MacroAssembler::negate_condition(Assembler::Condition cond) {
switch (cond) {
// Note some conditions are synonyms for others