8214451: PPC64/s390: Clean up unused CRC32 prototype and function

Reviewed-by: mdoerr, lucy
This commit is contained in:
Gustavo Romero 2018-11-28 13:16:54 -05:00
parent d92085431b
commit d6b70fa2ab
5 changed files with 0 additions and 176 deletions

View file

@ -4013,105 +4013,6 @@ void MacroAssembler::update_1word_crc32(Register crc, Register buf, Register tab
xorr(crc, t0, t2); // Now crc contains the final checksum value. xorr(crc, t0, t2); // Now crc contains the final checksum value.
} }
/**
* @param crc register containing existing CRC (32-bit)
* @param buf register pointing to input byte buffer (byte*)
* @param len register containing number of bytes
* @param table register pointing to CRC table
*
* Uses R9..R12 as work register. Must be saved/restored by caller!
*/
void MacroAssembler::kernel_crc32_2word(Register crc, Register buf, Register len, Register table,
Register t0, Register t1, Register t2, Register t3,
Register tc0, Register tc1, Register tc2, Register tc3,
bool invertCRC) {
assert_different_registers(crc, buf, len, table);
Label L_mainLoop, L_tail;
Register tmp = t0;
Register data = t0;
Register tmp2 = t1;
const int mainLoop_stepping = 8;
const int tailLoop_stepping = 1;
const int log_stepping = exact_log2(mainLoop_stepping);
const int mainLoop_alignment = 32; // InputForNewCode > 4 ? InputForNewCode : 32;
const int complexThreshold = 2*mainLoop_stepping;
// Don't test for len <= 0 here. This pathological case should not occur anyway.
// Optimizing for it by adding a test and a branch seems to be a waste of CPU cycles
// for all well-behaved cases. The situation itself is detected and handled correctly
// within update_byteLoop_crc32.
assert(tailLoop_stepping == 1, "check tailLoop_stepping!");
BLOCK_COMMENT("kernel_crc32_2word {");
if (invertCRC) {
nand(crc, crc, crc); // 1s complement of crc
}
// Check for short (<mainLoop_stepping) buffer.
cmpdi(CCR0, len, complexThreshold);
blt(CCR0, L_tail);
// Pre-mainLoop alignment did show a slight (1%) positive effect on performance.
// We leave the code in for reference. Maybe we need alignment when we exploit vector instructions.
{
// Align buf addr to mainLoop_stepping boundary.
neg(tmp2, buf); // Calculate # preLoop iterations for alignment.
rldicl(tmp2, tmp2, 0, 64-log_stepping); // Rotate tmp2 0 bits, insert into tmp2, anding with mask with 1s from 62..63.
if (complexThreshold > mainLoop_stepping) {
sub(len, len, tmp2); // Remaining bytes for main loop (>=mainLoop_stepping is guaranteed).
} else {
sub(tmp, len, tmp2); // Remaining bytes for main loop.
cmpdi(CCR0, tmp, mainLoop_stepping);
blt(CCR0, L_tail); // For less than one mainloop_stepping left, do only tail processing
mr(len, tmp); // remaining bytes for main loop (>=mainLoop_stepping is guaranteed).
}
update_byteLoop_crc32(crc, buf, tmp2, table, data, false);
}
srdi(tmp2, len, log_stepping); // #iterations for mainLoop
andi(len, len, mainLoop_stepping-1); // remaining bytes for tailLoop
mtctr(tmp2);
#ifdef VM_LITTLE_ENDIAN
Register crc_rv = crc;
#else
Register crc_rv = tmp; // Load_reverse needs separate registers to work on.
// Occupies tmp, but frees up crc.
load_reverse_32(crc_rv, crc); // Revert byte order because we are dealing with big-endian data.
tmp = crc;
#endif
int reconstructTableOffset = crc32_table_columns(table, tc0, tc1, tc2, tc3);
align(mainLoop_alignment); // Octoword-aligned loop address. Shows 2% improvement.
BIND(L_mainLoop);
update_1word_crc32(crc_rv, buf, table, 0, 0, crc_rv, t1, t2, t3, tc0, tc1, tc2, tc3);
update_1word_crc32(crc_rv, buf, table, 4, mainLoop_stepping, crc_rv, t1, t2, t3, tc0, tc1, tc2, tc3);
bdnz(L_mainLoop);
#ifndef VM_LITTLE_ENDIAN
load_reverse_32(crc, crc_rv); // Revert byte order because we are dealing with big-endian data.
tmp = crc_rv; // Tmp uses it's original register again.
#endif
// Restore original table address for tailLoop.
if (reconstructTableOffset != 0) {
addi(table, table, -reconstructTableOffset);
}
// Process last few (<complexThreshold) bytes of buffer.
BIND(L_tail);
update_byteLoop_crc32(crc, buf, len, table, data, false);
if (invertCRC) {
nand(crc, crc, crc); // 1s complement of crc
}
BLOCK_COMMENT("} kernel_crc32_2word");
}
/** /**
* @param crc register containing existing CRC (32-bit) * @param crc register containing existing CRC (32-bit)
* @param buf register pointing to input byte buffer (byte*) * @param buf register pointing to input byte buffer (byte*)

View file

@ -835,10 +835,6 @@ class MacroAssembler: public Assembler {
void update_1word_crc32(Register crc, Register buf, Register table, int bufDisp, int bufInc, void update_1word_crc32(Register crc, Register buf, Register table, int bufDisp, int bufInc,
Register t0, Register t1, Register t2, Register t3, Register t0, Register t1, Register t2, Register t3,
Register tc0, Register tc1, Register tc2, Register tc3); Register tc0, Register tc1, Register tc2, Register tc3);
void kernel_crc32_2word(Register crc, Register buf, Register len, Register table,
Register t0, Register t1, Register t2, Register t3,
Register tc0, Register tc1, Register tc2, Register tc3,
bool invertCRC);
void kernel_crc32_1word(Register crc, Register buf, Register len, Register table, void kernel_crc32_1word(Register crc, Register buf, Register len, Register table,
Register t0, Register t1, Register t2, Register t3, Register t0, Register t1, Register t2, Register t3,
Register tc0, Register tc1, Register tc2, Register tc3, Register tc0, Register tc1, Register tc2, Register tc3,

View file

@ -62,7 +62,6 @@ class ppc64 {
public: public:
// CRC32 Intrinsics. // CRC32 Intrinsics.
static void generate_load_table_addr(MacroAssembler* masm, Register table, address table_addr, uint64_t table_contents);
static void generate_load_crc_table_addr(MacroAssembler* masm, Register table); static void generate_load_crc_table_addr(MacroAssembler* masm, Register table);
static void generate_load_crc_constants_addr(MacroAssembler* masm, Register table); static void generate_load_crc_constants_addr(MacroAssembler* masm, Register table);
static void generate_load_crc_barret_constants_addr(MacroAssembler* masm, Register table); static void generate_load_crc_barret_constants_addr(MacroAssembler* masm, Register table);

View file

@ -6325,75 +6325,6 @@ void MacroAssembler::update_1word_crc32(Register crc, Register buf, Register tab
lgr_if_needed(crc, t0); lgr_if_needed(crc, t0);
} }
/**
* @param crc register containing existing CRC (32-bit)
* @param buf register pointing to input byte buffer (byte*)
* @param len register containing number of bytes
* @param table register pointing to CRC table
*
* uses Z_R10..Z_R13 as work register. Must be saved/restored by caller!
*/
void MacroAssembler::kernel_crc32_2word(Register crc, Register buf, Register len, Register table,
Register t0, Register t1, Register t2, Register t3,
bool invertCRC) {
assert_different_registers(crc, buf, len, table);
Label L_mainLoop, L_tail;
Register data = t0;
Register ctr = Z_R0;
const int mainLoop_stepping = 8;
const int tailLoop_stepping = 1;
const int log_stepping = exact_log2(mainLoop_stepping);
// Don't test for len <= 0 here. This pathological case should not occur anyway.
// Optimizing for it by adding a test and a branch seems to be a waste of CPU cycles.
// The situation itself is detected and handled correctly by the conditional branches
// following aghi(len, -stepping) and aghi(len, +stepping).
if (invertCRC) {
not_(crc, noreg, false); // 1s complement of crc
}
#if 0
{
// Pre-mainLoop alignment did not show any positive effect on performance.
// We leave the code in for reference. Maybe the vector instructions in z13 depend on alignment.
z_cghi(len, mainLoop_stepping); // Alignment is useless for short data streams.
z_brnh(L_tail);
// Align buf to word (4-byte) boundary.
z_lcr(ctr, buf);
rotate_then_insert(ctr, ctr, 62, 63, 0, true); // TODO: should set cc
z_sgfr(len, ctr); // Remaining len after alignment.
update_byteLoop_crc32(crc, buf, ctr, table, data);
}
#endif
// Check for short (<mainLoop_stepping bytes) buffer.
z_srag(ctr, len, log_stepping);
z_brnh(L_tail);
z_lrvr(crc, crc); // Revert byte order because we are dealing with big-endian data.
rotate_then_insert(len, len, 64-log_stepping, 63, 0, true); // #bytes for tailLoop
BIND(L_mainLoop);
update_1word_crc32(crc, buf, table, 0, 0, crc, t1, t2, t3);
update_1word_crc32(crc, buf, table, 4, mainLoop_stepping, crc, t1, t2, t3);
z_brct(ctr, L_mainLoop); // Iterate.
z_lrvr(crc, crc); // Revert byte order back to original.
// Process last few (<8) bytes of buffer.
BIND(L_tail);
update_byteLoop_crc32(crc, buf, len, table, data);
if (invertCRC) {
not_(crc, noreg, false); // 1s complement of crc
}
}
/** /**
* @param crc register containing existing CRC (32-bit) * @param crc register containing existing CRC (32-bit)
* @param buf register pointing to input byte buffer (byte*) * @param buf register pointing to input byte buffer (byte*)

View file

@ -1056,9 +1056,6 @@ class MacroAssembler: public Assembler {
void kernel_crc32_1word(Register crc, Register buf, Register len, Register table, void kernel_crc32_1word(Register crc, Register buf, Register len, Register table,
Register t0, Register t1, Register t2, Register t3, Register t0, Register t1, Register t2, Register t3,
bool invertCRC); bool invertCRC);
void kernel_crc32_2word(Register crc, Register buf, Register len, Register table,
Register t0, Register t1, Register t2, Register t3,
bool invertCRC);
// Emitters for BigInteger.multiplyToLen intrinsic // Emitters for BigInteger.multiplyToLen intrinsic
// note: length of result array (zlen) is passed on the stack // note: length of result array (zlen) is passed on the stack