mirror of
https://github.com/openjdk/jdk.git
synced 2025-08-28 23:34:52 +02:00
8241040: Support for AVX-512 Ternary Logic Instruction
A new pass has been added which folds expression tree involving vector boolean logic operations into a MacroLogic node. Reviewed-by: vlivanov, neliasso
This commit is contained in:
parent
fb56759d08
commit
5532b27d22
15 changed files with 842 additions and 1 deletions
|
@ -76,6 +76,7 @@
|
|||
#include "utilities/align.hpp"
|
||||
#include "utilities/copy.hpp"
|
||||
#include "utilities/macros.hpp"
|
||||
#include "utilities/resourceHash.hpp"
|
||||
|
||||
|
||||
// -------------------- Compile::mach_constant_base_node -----------------------
|
||||
|
@ -2228,6 +2229,11 @@ void Compile::Optimize() {
|
|||
igvn.optimize();
|
||||
}
|
||||
|
||||
if (C->max_vector_size() > 0) {
|
||||
C->optimize_logic_cones(igvn);
|
||||
igvn.optimize();
|
||||
}
|
||||
|
||||
DEBUG_ONLY( _modified_nodes = NULL; )
|
||||
} // (End scope of igvn; run destructor if necessary for asserts.)
|
||||
|
||||
|
@ -2244,6 +2250,317 @@ void Compile::Optimize() {
|
|||
print_method(PHASE_OPTIMIZE_FINISHED, 2);
|
||||
}
|
||||
|
||||
//---------------------------- Bitwise operation packing optimization ---------------------------
|
||||
|
||||
static bool is_vector_unary_bitwise_op(Node* n) {
|
||||
return n->Opcode() == Op_XorV &&
|
||||
VectorNode::is_vector_bitwise_not_pattern(n);
|
||||
}
|
||||
|
||||
static bool is_vector_binary_bitwise_op(Node* n) {
|
||||
switch (n->Opcode()) {
|
||||
case Op_AndV:
|
||||
case Op_OrV:
|
||||
return true;
|
||||
|
||||
case Op_XorV:
|
||||
return !is_vector_unary_bitwise_op(n);
|
||||
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static bool is_vector_ternary_bitwise_op(Node* n) {
|
||||
return n->Opcode() == Op_MacroLogicV;
|
||||
}
|
||||
|
||||
static bool is_vector_bitwise_op(Node* n) {
|
||||
return is_vector_unary_bitwise_op(n) ||
|
||||
is_vector_binary_bitwise_op(n) ||
|
||||
is_vector_ternary_bitwise_op(n);
|
||||
}
|
||||
|
||||
static bool is_vector_bitwise_cone_root(Node* n) {
|
||||
if (!is_vector_bitwise_op(n)) {
|
||||
return false;
|
||||
}
|
||||
for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
|
||||
if (is_vector_bitwise_op(n->fast_out(i))) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static uint collect_unique_inputs(Node* n, Unique_Node_List& partition, Unique_Node_List& inputs) {
|
||||
uint cnt = 0;
|
||||
if (is_vector_bitwise_op(n)) {
|
||||
if (VectorNode::is_vector_bitwise_not_pattern(n)) {
|
||||
for (uint i = 1; i < n->req(); i++) {
|
||||
Node* in = n->in(i);
|
||||
bool skip = VectorNode::is_all_ones_vector(in);
|
||||
if (!skip && !inputs.member(in)) {
|
||||
inputs.push(in);
|
||||
cnt++;
|
||||
}
|
||||
}
|
||||
assert(cnt <= 1, "not unary");
|
||||
} else {
|
||||
uint last_req = n->req();
|
||||
if (is_vector_ternary_bitwise_op(n)) {
|
||||
last_req = n->req() - 1; // skip last input
|
||||
}
|
||||
for (uint i = 1; i < last_req; i++) {
|
||||
Node* def = n->in(i);
|
||||
if (!inputs.member(def)) {
|
||||
inputs.push(def);
|
||||
cnt++;
|
||||
}
|
||||
}
|
||||
}
|
||||
partition.push(n);
|
||||
} else { // not a bitwise operations
|
||||
if (!inputs.member(n)) {
|
||||
inputs.push(n);
|
||||
cnt++;
|
||||
}
|
||||
}
|
||||
return cnt;
|
||||
}
|
||||
|
||||
void Compile::collect_logic_cone_roots(Unique_Node_List& list) {
|
||||
Unique_Node_List useful_nodes;
|
||||
C->identify_useful_nodes(useful_nodes);
|
||||
|
||||
for (uint i = 0; i < useful_nodes.size(); i++) {
|
||||
Node* n = useful_nodes.at(i);
|
||||
if (is_vector_bitwise_cone_root(n)) {
|
||||
list.push(n);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Node* Compile::xform_to_MacroLogicV(PhaseIterGVN& igvn,
|
||||
const TypeVect* vt,
|
||||
Unique_Node_List& partition,
|
||||
Unique_Node_List& inputs) {
|
||||
assert(partition.size() == 2 || partition.size() == 3, "not supported");
|
||||
assert(inputs.size() == 2 || inputs.size() == 3, "not supported");
|
||||
assert(Matcher::match_rule_supported_vector(Op_MacroLogicV, vt->length(), vt->element_basic_type()), "not supported");
|
||||
|
||||
Node* in1 = inputs.at(0);
|
||||
Node* in2 = inputs.at(1);
|
||||
Node* in3 = (inputs.size() == 3 ? inputs.at(2) : in2);
|
||||
|
||||
uint func = compute_truth_table(partition, inputs);
|
||||
return igvn.transform(MacroLogicVNode::make(igvn, in3, in2, in1, func, vt));
|
||||
}
|
||||
|
||||
static uint extract_bit(uint func, uint pos) {
|
||||
return (func & (1 << pos)) >> pos;
|
||||
}
|
||||
|
||||
//
|
||||
// A macro logic node represents a truth table. It has 4 inputs,
|
||||
// First three inputs corresponds to 3 columns of a truth table
|
||||
// and fourth input captures the logic function.
|
||||
//
|
||||
// eg. fn = (in1 AND in2) OR in3;
|
||||
//
|
||||
// MacroNode(in1,in2,in3,fn)
|
||||
//
|
||||
// -----------------
|
||||
// in1 in2 in3 fn
|
||||
// -----------------
|
||||
// 0 0 0 0
|
||||
// 0 0 1 1
|
||||
// 0 1 0 0
|
||||
// 0 1 1 1
|
||||
// 1 0 0 0
|
||||
// 1 0 1 1
|
||||
// 1 1 0 1
|
||||
// 1 1 1 1
|
||||
//
|
||||
|
||||
uint Compile::eval_macro_logic_op(uint func, uint in1 , uint in2, uint in3) {
|
||||
int res = 0;
|
||||
for (int i = 0; i < 8; i++) {
|
||||
int bit1 = extract_bit(in1, i);
|
||||
int bit2 = extract_bit(in2, i);
|
||||
int bit3 = extract_bit(in3, i);
|
||||
|
||||
int func_bit_pos = (bit1 << 2 | bit2 << 1 | bit3);
|
||||
int func_bit = extract_bit(func, func_bit_pos);
|
||||
|
||||
res |= func_bit << i;
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
static uint eval_operand(Node* n, ResourceHashtable<Node*,uint>& eval_map) {
|
||||
assert(n != NULL, "");
|
||||
assert(eval_map.contains(n), "absent");
|
||||
return *(eval_map.get(n));
|
||||
}
|
||||
|
||||
static void eval_operands(Node* n,
|
||||
uint& func1, uint& func2, uint& func3,
|
||||
ResourceHashtable<Node*,uint>& eval_map) {
|
||||
assert(is_vector_bitwise_op(n), "");
|
||||
func1 = eval_operand(n->in(1), eval_map);
|
||||
|
||||
if (is_vector_binary_bitwise_op(n)) {
|
||||
func2 = eval_operand(n->in(2), eval_map);
|
||||
} else if (is_vector_ternary_bitwise_op(n)) {
|
||||
func2 = eval_operand(n->in(2), eval_map);
|
||||
func3 = eval_operand(n->in(3), eval_map);
|
||||
} else {
|
||||
assert(is_vector_unary_bitwise_op(n), "not unary");
|
||||
}
|
||||
}
|
||||
|
||||
uint Compile::compute_truth_table(Unique_Node_List& partition, Unique_Node_List& inputs) {
|
||||
assert(inputs.size() <= 3, "sanity");
|
||||
ResourceMark rm;
|
||||
uint res = 0;
|
||||
ResourceHashtable<Node*,uint> eval_map;
|
||||
|
||||
// Populate precomputed functions for inputs.
|
||||
// Each input corresponds to one column of 3 input truth-table.
|
||||
uint input_funcs[] = { 0xAA, // (_, _, a) -> a
|
||||
0xCC, // (_, b, _) -> b
|
||||
0xF0 }; // (c, _, _) -> c
|
||||
for (uint i = 0; i < inputs.size(); i++) {
|
||||
eval_map.put(inputs.at(i), input_funcs[i]);
|
||||
}
|
||||
|
||||
for (uint i = 0; i < partition.size(); i++) {
|
||||
Node* n = partition.at(i);
|
||||
|
||||
uint func1 = 0, func2 = 0, func3 = 0;
|
||||
eval_operands(n, func1, func2, func3, eval_map);
|
||||
|
||||
switch (n->Opcode()) {
|
||||
case Op_OrV:
|
||||
assert(func3 == 0, "not binary");
|
||||
res = func1 | func2;
|
||||
break;
|
||||
case Op_AndV:
|
||||
assert(func3 == 0, "not binary");
|
||||
res = func1 & func2;
|
||||
break;
|
||||
case Op_XorV:
|
||||
if (VectorNode::is_vector_bitwise_not_pattern(n)) {
|
||||
assert(func2 == 0 && func3 == 0, "not unary");
|
||||
res = (~func1) & 0xFF;
|
||||
} else {
|
||||
assert(func3 == 0, "not binary");
|
||||
res = func1 ^ func2;
|
||||
}
|
||||
break;
|
||||
case Op_MacroLogicV:
|
||||
// Ordering of inputs may change during evaluation of sub-tree
|
||||
// containing MacroLogic node as a child node, thus a re-evaluation
|
||||
// makes sure that function is evaluated in context of current
|
||||
// inputs.
|
||||
res = eval_macro_logic_op(n->in(4)->get_int(), func1, func2, func3);
|
||||
break;
|
||||
|
||||
default: assert(false, "not supported: %s", n->Name());
|
||||
}
|
||||
assert(res <= 0xFF, "invalid");
|
||||
eval_map.put(n, res);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
bool Compile::compute_logic_cone(Node* n, Unique_Node_List& partition, Unique_Node_List& inputs) {
|
||||
assert(partition.size() == 0, "not empty");
|
||||
assert(inputs.size() == 0, "not empty");
|
||||
assert(!is_vector_ternary_bitwise_op(n), "not supported");
|
||||
|
||||
bool is_unary_op = is_vector_unary_bitwise_op(n);
|
||||
if (is_unary_op) {
|
||||
assert(collect_unique_inputs(n, partition, inputs) == 1, "not unary");
|
||||
return false; // too few inputs
|
||||
}
|
||||
|
||||
assert(is_vector_binary_bitwise_op(n), "not binary");
|
||||
Node* in1 = n->in(1);
|
||||
Node* in2 = n->in(2);
|
||||
|
||||
int in1_unique_inputs_cnt = collect_unique_inputs(in1, partition, inputs);
|
||||
int in2_unique_inputs_cnt = collect_unique_inputs(in2, partition, inputs);
|
||||
partition.push(n);
|
||||
|
||||
// Too many inputs?
|
||||
if (inputs.size() > 3) {
|
||||
partition.clear();
|
||||
inputs.clear();
|
||||
{ // Recompute in2 inputs
|
||||
Unique_Node_List not_used;
|
||||
in2_unique_inputs_cnt = collect_unique_inputs(in2, not_used, not_used);
|
||||
}
|
||||
// Pick the node with minimum number of inputs.
|
||||
if (in1_unique_inputs_cnt >= 3 && in2_unique_inputs_cnt >= 3) {
|
||||
return false; // still too many inputs
|
||||
}
|
||||
// Recompute partition & inputs.
|
||||
Node* child = (in1_unique_inputs_cnt < in2_unique_inputs_cnt ? in1 : in2);
|
||||
collect_unique_inputs(child, partition, inputs);
|
||||
|
||||
Node* other_input = (in1_unique_inputs_cnt < in2_unique_inputs_cnt ? in2 : in1);
|
||||
inputs.push(other_input);
|
||||
|
||||
partition.push(n);
|
||||
}
|
||||
|
||||
return (partition.size() == 2 || partition.size() == 3) &&
|
||||
(inputs.size() == 2 || inputs.size() == 3);
|
||||
}
|
||||
|
||||
void Compile::process_logic_cone_root(PhaseIterGVN &igvn, Node *n, VectorSet &visited) {
|
||||
assert(is_vector_bitwise_op(n), "not a root");
|
||||
|
||||
visited.set(n->_idx);
|
||||
|
||||
// 1) Do a DFS walk over the logic cone.
|
||||
for (uint i = 1; i < n->req(); i++) {
|
||||
Node* in = n->in(i);
|
||||
if (!visited.test(in->_idx) && is_vector_bitwise_op(in)) {
|
||||
process_logic_cone_root(igvn, in, visited);
|
||||
}
|
||||
}
|
||||
|
||||
// 2) Bottom up traversal: Merge node[s] with
|
||||
// the parent to form macro logic node.
|
||||
Unique_Node_List partition;
|
||||
Unique_Node_List inputs;
|
||||
if (compute_logic_cone(n, partition, inputs)) {
|
||||
const TypeVect* vt = n->bottom_type()->is_vect();
|
||||
Node* macro_logic = xform_to_MacroLogicV(igvn, vt, partition, inputs);
|
||||
igvn.replace_node(n, macro_logic);
|
||||
}
|
||||
}
|
||||
|
||||
void Compile::optimize_logic_cones(PhaseIterGVN &igvn) {
|
||||
ResourceMark rm;
|
||||
if (Matcher::match_rule_supported(Op_MacroLogicV)) {
|
||||
Unique_Node_List list;
|
||||
collect_logic_cone_roots(list);
|
||||
|
||||
while (list.size() > 0) {
|
||||
Node* n = list.pop();
|
||||
const TypeVect* vt = n->bottom_type()->is_vect();
|
||||
bool supported = Matcher::match_rule_supported_vector(Op_MacroLogicV, vt->length(), vt->element_basic_type());
|
||||
if (supported) {
|
||||
VectorSet visited(comp_arena());
|
||||
process_logic_cone_root(igvn, n, visited);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------Code_Gen---------------------------------------
|
||||
// Given a graph, generate code for it
|
||||
|
@ -4225,6 +4542,7 @@ void CloneMap::dump(node_idx_t key) const {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
// Move Allocate nodes to the start of the list
|
||||
void Compile::sort_macro_nodes() {
|
||||
int count = macro_count();
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue