mirror of
https://github.com/openjdk/jdk.git
synced 2025-09-19 02:24:40 +02:00
8214451: PPC64/s390: Clean up unused CRC32 prototype and function
Reviewed-by: mdoerr, lucy
This commit is contained in:
parent
d92085431b
commit
d6b70fa2ab
5 changed files with 0 additions and 176 deletions
|
@ -4013,105 +4013,6 @@ void MacroAssembler::update_1word_crc32(Register crc, Register buf, Register tab
|
||||||
xorr(crc, t0, t2); // Now crc contains the final checksum value.
|
xorr(crc, t0, t2); // Now crc contains the final checksum value.
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* @param crc register containing existing CRC (32-bit)
|
|
||||||
* @param buf register pointing to input byte buffer (byte*)
|
|
||||||
* @param len register containing number of bytes
|
|
||||||
* @param table register pointing to CRC table
|
|
||||||
*
|
|
||||||
* Uses R9..R12 as work register. Must be saved/restored by caller!
|
|
||||||
*/
|
|
||||||
void MacroAssembler::kernel_crc32_2word(Register crc, Register buf, Register len, Register table,
|
|
||||||
Register t0, Register t1, Register t2, Register t3,
|
|
||||||
Register tc0, Register tc1, Register tc2, Register tc3,
|
|
||||||
bool invertCRC) {
|
|
||||||
assert_different_registers(crc, buf, len, table);
|
|
||||||
|
|
||||||
Label L_mainLoop, L_tail;
|
|
||||||
Register tmp = t0;
|
|
||||||
Register data = t0;
|
|
||||||
Register tmp2 = t1;
|
|
||||||
const int mainLoop_stepping = 8;
|
|
||||||
const int tailLoop_stepping = 1;
|
|
||||||
const int log_stepping = exact_log2(mainLoop_stepping);
|
|
||||||
const int mainLoop_alignment = 32; // InputForNewCode > 4 ? InputForNewCode : 32;
|
|
||||||
const int complexThreshold = 2*mainLoop_stepping;
|
|
||||||
|
|
||||||
// Don't test for len <= 0 here. This pathological case should not occur anyway.
|
|
||||||
// Optimizing for it by adding a test and a branch seems to be a waste of CPU cycles
|
|
||||||
// for all well-behaved cases. The situation itself is detected and handled correctly
|
|
||||||
// within update_byteLoop_crc32.
|
|
||||||
assert(tailLoop_stepping == 1, "check tailLoop_stepping!");
|
|
||||||
|
|
||||||
BLOCK_COMMENT("kernel_crc32_2word {");
|
|
||||||
|
|
||||||
if (invertCRC) {
|
|
||||||
nand(crc, crc, crc); // 1s complement of crc
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check for short (<mainLoop_stepping) buffer.
|
|
||||||
cmpdi(CCR0, len, complexThreshold);
|
|
||||||
blt(CCR0, L_tail);
|
|
||||||
|
|
||||||
// Pre-mainLoop alignment did show a slight (1%) positive effect on performance.
|
|
||||||
// We leave the code in for reference. Maybe we need alignment when we exploit vector instructions.
|
|
||||||
{
|
|
||||||
// Align buf addr to mainLoop_stepping boundary.
|
|
||||||
neg(tmp2, buf); // Calculate # preLoop iterations for alignment.
|
|
||||||
rldicl(tmp2, tmp2, 0, 64-log_stepping); // Rotate tmp2 0 bits, insert into tmp2, anding with mask with 1s from 62..63.
|
|
||||||
|
|
||||||
if (complexThreshold > mainLoop_stepping) {
|
|
||||||
sub(len, len, tmp2); // Remaining bytes for main loop (>=mainLoop_stepping is guaranteed).
|
|
||||||
} else {
|
|
||||||
sub(tmp, len, tmp2); // Remaining bytes for main loop.
|
|
||||||
cmpdi(CCR0, tmp, mainLoop_stepping);
|
|
||||||
blt(CCR0, L_tail); // For less than one mainloop_stepping left, do only tail processing
|
|
||||||
mr(len, tmp); // remaining bytes for main loop (>=mainLoop_stepping is guaranteed).
|
|
||||||
}
|
|
||||||
update_byteLoop_crc32(crc, buf, tmp2, table, data, false);
|
|
||||||
}
|
|
||||||
|
|
||||||
srdi(tmp2, len, log_stepping); // #iterations for mainLoop
|
|
||||||
andi(len, len, mainLoop_stepping-1); // remaining bytes for tailLoop
|
|
||||||
mtctr(tmp2);
|
|
||||||
|
|
||||||
#ifdef VM_LITTLE_ENDIAN
|
|
||||||
Register crc_rv = crc;
|
|
||||||
#else
|
|
||||||
Register crc_rv = tmp; // Load_reverse needs separate registers to work on.
|
|
||||||
// Occupies tmp, but frees up crc.
|
|
||||||
load_reverse_32(crc_rv, crc); // Revert byte order because we are dealing with big-endian data.
|
|
||||||
tmp = crc;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int reconstructTableOffset = crc32_table_columns(table, tc0, tc1, tc2, tc3);
|
|
||||||
|
|
||||||
align(mainLoop_alignment); // Octoword-aligned loop address. Shows 2% improvement.
|
|
||||||
BIND(L_mainLoop);
|
|
||||||
update_1word_crc32(crc_rv, buf, table, 0, 0, crc_rv, t1, t2, t3, tc0, tc1, tc2, tc3);
|
|
||||||
update_1word_crc32(crc_rv, buf, table, 4, mainLoop_stepping, crc_rv, t1, t2, t3, tc0, tc1, tc2, tc3);
|
|
||||||
bdnz(L_mainLoop);
|
|
||||||
|
|
||||||
#ifndef VM_LITTLE_ENDIAN
|
|
||||||
load_reverse_32(crc, crc_rv); // Revert byte order because we are dealing with big-endian data.
|
|
||||||
tmp = crc_rv; // Tmp uses it's original register again.
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// Restore original table address for tailLoop.
|
|
||||||
if (reconstructTableOffset != 0) {
|
|
||||||
addi(table, table, -reconstructTableOffset);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Process last few (<complexThreshold) bytes of buffer.
|
|
||||||
BIND(L_tail);
|
|
||||||
update_byteLoop_crc32(crc, buf, len, table, data, false);
|
|
||||||
|
|
||||||
if (invertCRC) {
|
|
||||||
nand(crc, crc, crc); // 1s complement of crc
|
|
||||||
}
|
|
||||||
BLOCK_COMMENT("} kernel_crc32_2word");
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @param crc register containing existing CRC (32-bit)
|
* @param crc register containing existing CRC (32-bit)
|
||||||
* @param buf register pointing to input byte buffer (byte*)
|
* @param buf register pointing to input byte buffer (byte*)
|
||||||
|
|
|
@ -835,10 +835,6 @@ class MacroAssembler: public Assembler {
|
||||||
void update_1word_crc32(Register crc, Register buf, Register table, int bufDisp, int bufInc,
|
void update_1word_crc32(Register crc, Register buf, Register table, int bufDisp, int bufInc,
|
||||||
Register t0, Register t1, Register t2, Register t3,
|
Register t0, Register t1, Register t2, Register t3,
|
||||||
Register tc0, Register tc1, Register tc2, Register tc3);
|
Register tc0, Register tc1, Register tc2, Register tc3);
|
||||||
void kernel_crc32_2word(Register crc, Register buf, Register len, Register table,
|
|
||||||
Register t0, Register t1, Register t2, Register t3,
|
|
||||||
Register tc0, Register tc1, Register tc2, Register tc3,
|
|
||||||
bool invertCRC);
|
|
||||||
void kernel_crc32_1word(Register crc, Register buf, Register len, Register table,
|
void kernel_crc32_1word(Register crc, Register buf, Register len, Register table,
|
||||||
Register t0, Register t1, Register t2, Register t3,
|
Register t0, Register t1, Register t2, Register t3,
|
||||||
Register tc0, Register tc1, Register tc2, Register tc3,
|
Register tc0, Register tc1, Register tc2, Register tc3,
|
||||||
|
|
|
@ -62,7 +62,6 @@ class ppc64 {
|
||||||
public:
|
public:
|
||||||
|
|
||||||
// CRC32 Intrinsics.
|
// CRC32 Intrinsics.
|
||||||
static void generate_load_table_addr(MacroAssembler* masm, Register table, address table_addr, uint64_t table_contents);
|
|
||||||
static void generate_load_crc_table_addr(MacroAssembler* masm, Register table);
|
static void generate_load_crc_table_addr(MacroAssembler* masm, Register table);
|
||||||
static void generate_load_crc_constants_addr(MacroAssembler* masm, Register table);
|
static void generate_load_crc_constants_addr(MacroAssembler* masm, Register table);
|
||||||
static void generate_load_crc_barret_constants_addr(MacroAssembler* masm, Register table);
|
static void generate_load_crc_barret_constants_addr(MacroAssembler* masm, Register table);
|
||||||
|
|
|
@ -6325,75 +6325,6 @@ void MacroAssembler::update_1word_crc32(Register crc, Register buf, Register tab
|
||||||
lgr_if_needed(crc, t0);
|
lgr_if_needed(crc, t0);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* @param crc register containing existing CRC (32-bit)
|
|
||||||
* @param buf register pointing to input byte buffer (byte*)
|
|
||||||
* @param len register containing number of bytes
|
|
||||||
* @param table register pointing to CRC table
|
|
||||||
*
|
|
||||||
* uses Z_R10..Z_R13 as work register. Must be saved/restored by caller!
|
|
||||||
*/
|
|
||||||
void MacroAssembler::kernel_crc32_2word(Register crc, Register buf, Register len, Register table,
|
|
||||||
Register t0, Register t1, Register t2, Register t3,
|
|
||||||
bool invertCRC) {
|
|
||||||
assert_different_registers(crc, buf, len, table);
|
|
||||||
|
|
||||||
Label L_mainLoop, L_tail;
|
|
||||||
Register data = t0;
|
|
||||||
Register ctr = Z_R0;
|
|
||||||
const int mainLoop_stepping = 8;
|
|
||||||
const int tailLoop_stepping = 1;
|
|
||||||
const int log_stepping = exact_log2(mainLoop_stepping);
|
|
||||||
|
|
||||||
// Don't test for len <= 0 here. This pathological case should not occur anyway.
|
|
||||||
// Optimizing for it by adding a test and a branch seems to be a waste of CPU cycles.
|
|
||||||
// The situation itself is detected and handled correctly by the conditional branches
|
|
||||||
// following aghi(len, -stepping) and aghi(len, +stepping).
|
|
||||||
|
|
||||||
if (invertCRC) {
|
|
||||||
not_(crc, noreg, false); // 1s complement of crc
|
|
||||||
}
|
|
||||||
|
|
||||||
#if 0
|
|
||||||
{
|
|
||||||
// Pre-mainLoop alignment did not show any positive effect on performance.
|
|
||||||
// We leave the code in for reference. Maybe the vector instructions in z13 depend on alignment.
|
|
||||||
|
|
||||||
z_cghi(len, mainLoop_stepping); // Alignment is useless for short data streams.
|
|
||||||
z_brnh(L_tail);
|
|
||||||
|
|
||||||
// Align buf to word (4-byte) boundary.
|
|
||||||
z_lcr(ctr, buf);
|
|
||||||
rotate_then_insert(ctr, ctr, 62, 63, 0, true); // TODO: should set cc
|
|
||||||
z_sgfr(len, ctr); // Remaining len after alignment.
|
|
||||||
|
|
||||||
update_byteLoop_crc32(crc, buf, ctr, table, data);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// Check for short (<mainLoop_stepping bytes) buffer.
|
|
||||||
z_srag(ctr, len, log_stepping);
|
|
||||||
z_brnh(L_tail);
|
|
||||||
|
|
||||||
z_lrvr(crc, crc); // Revert byte order because we are dealing with big-endian data.
|
|
||||||
rotate_then_insert(len, len, 64-log_stepping, 63, 0, true); // #bytes for tailLoop
|
|
||||||
|
|
||||||
BIND(L_mainLoop);
|
|
||||||
update_1word_crc32(crc, buf, table, 0, 0, crc, t1, t2, t3);
|
|
||||||
update_1word_crc32(crc, buf, table, 4, mainLoop_stepping, crc, t1, t2, t3);
|
|
||||||
z_brct(ctr, L_mainLoop); // Iterate.
|
|
||||||
|
|
||||||
z_lrvr(crc, crc); // Revert byte order back to original.
|
|
||||||
|
|
||||||
// Process last few (<8) bytes of buffer.
|
|
||||||
BIND(L_tail);
|
|
||||||
update_byteLoop_crc32(crc, buf, len, table, data);
|
|
||||||
|
|
||||||
if (invertCRC) {
|
|
||||||
not_(crc, noreg, false); // 1s complement of crc
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @param crc register containing existing CRC (32-bit)
|
* @param crc register containing existing CRC (32-bit)
|
||||||
* @param buf register pointing to input byte buffer (byte*)
|
* @param buf register pointing to input byte buffer (byte*)
|
||||||
|
|
|
@ -1056,9 +1056,6 @@ class MacroAssembler: public Assembler {
|
||||||
void kernel_crc32_1word(Register crc, Register buf, Register len, Register table,
|
void kernel_crc32_1word(Register crc, Register buf, Register len, Register table,
|
||||||
Register t0, Register t1, Register t2, Register t3,
|
Register t0, Register t1, Register t2, Register t3,
|
||||||
bool invertCRC);
|
bool invertCRC);
|
||||||
void kernel_crc32_2word(Register crc, Register buf, Register len, Register table,
|
|
||||||
Register t0, Register t1, Register t2, Register t3,
|
|
||||||
bool invertCRC);
|
|
||||||
|
|
||||||
// Emitters for BigInteger.multiplyToLen intrinsic
|
// Emitters for BigInteger.multiplyToLen intrinsic
|
||||||
// note: length of result array (zlen) is passed on the stack
|
// note: length of result array (zlen) is passed on the stack
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue