8241040: Support for AVX-512 Ternary Logic Instruction

A new pass has been added which folds expression tree involving vector boolean logic operations into a MacroLogic node.

Reviewed-by: vlivanov, neliasso
This commit is contained in:
Jatin Bhateja 2020-04-02 22:38:23 +05:30
parent fb56759d08
commit 5532b27d22
15 changed files with 842 additions and 1 deletions

View file

@ -76,6 +76,7 @@
#include "utilities/align.hpp"
#include "utilities/copy.hpp"
#include "utilities/macros.hpp"
#include "utilities/resourceHash.hpp"
// -------------------- Compile::mach_constant_base_node -----------------------
@ -2228,6 +2229,11 @@ void Compile::Optimize() {
igvn.optimize();
}
if (C->max_vector_size() > 0) {
C->optimize_logic_cones(igvn);
igvn.optimize();
}
DEBUG_ONLY( _modified_nodes = NULL; )
} // (End scope of igvn; run destructor if necessary for asserts.)
@ -2244,6 +2250,317 @@ void Compile::Optimize() {
print_method(PHASE_OPTIMIZE_FINISHED, 2);
}
//---------------------------- Bitwise operation packing optimization ---------------------------
static bool is_vector_unary_bitwise_op(Node* n) {
return n->Opcode() == Op_XorV &&
VectorNode::is_vector_bitwise_not_pattern(n);
}
static bool is_vector_binary_bitwise_op(Node* n) {
switch (n->Opcode()) {
case Op_AndV:
case Op_OrV:
return true;
case Op_XorV:
return !is_vector_unary_bitwise_op(n);
default:
return false;
}
}
static bool is_vector_ternary_bitwise_op(Node* n) {
return n->Opcode() == Op_MacroLogicV;
}
static bool is_vector_bitwise_op(Node* n) {
return is_vector_unary_bitwise_op(n) ||
is_vector_binary_bitwise_op(n) ||
is_vector_ternary_bitwise_op(n);
}
static bool is_vector_bitwise_cone_root(Node* n) {
if (!is_vector_bitwise_op(n)) {
return false;
}
for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
if (is_vector_bitwise_op(n->fast_out(i))) {
return false;
}
}
return true;
}
static uint collect_unique_inputs(Node* n, Unique_Node_List& partition, Unique_Node_List& inputs) {
uint cnt = 0;
if (is_vector_bitwise_op(n)) {
if (VectorNode::is_vector_bitwise_not_pattern(n)) {
for (uint i = 1; i < n->req(); i++) {
Node* in = n->in(i);
bool skip = VectorNode::is_all_ones_vector(in);
if (!skip && !inputs.member(in)) {
inputs.push(in);
cnt++;
}
}
assert(cnt <= 1, "not unary");
} else {
uint last_req = n->req();
if (is_vector_ternary_bitwise_op(n)) {
last_req = n->req() - 1; // skip last input
}
for (uint i = 1; i < last_req; i++) {
Node* def = n->in(i);
if (!inputs.member(def)) {
inputs.push(def);
cnt++;
}
}
}
partition.push(n);
} else { // not a bitwise operations
if (!inputs.member(n)) {
inputs.push(n);
cnt++;
}
}
return cnt;
}
void Compile::collect_logic_cone_roots(Unique_Node_List& list) {
Unique_Node_List useful_nodes;
C->identify_useful_nodes(useful_nodes);
for (uint i = 0; i < useful_nodes.size(); i++) {
Node* n = useful_nodes.at(i);
if (is_vector_bitwise_cone_root(n)) {
list.push(n);
}
}
}
Node* Compile::xform_to_MacroLogicV(PhaseIterGVN& igvn,
const TypeVect* vt,
Unique_Node_List& partition,
Unique_Node_List& inputs) {
assert(partition.size() == 2 || partition.size() == 3, "not supported");
assert(inputs.size() == 2 || inputs.size() == 3, "not supported");
assert(Matcher::match_rule_supported_vector(Op_MacroLogicV, vt->length(), vt->element_basic_type()), "not supported");
Node* in1 = inputs.at(0);
Node* in2 = inputs.at(1);
Node* in3 = (inputs.size() == 3 ? inputs.at(2) : in2);
uint func = compute_truth_table(partition, inputs);
return igvn.transform(MacroLogicVNode::make(igvn, in3, in2, in1, func, vt));
}
static uint extract_bit(uint func, uint pos) {
return (func & (1 << pos)) >> pos;
}
//
// A macro logic node represents a truth table. It has 4 inputs,
// First three inputs corresponds to 3 columns of a truth table
// and fourth input captures the logic function.
//
// eg. fn = (in1 AND in2) OR in3;
//
// MacroNode(in1,in2,in3,fn)
//
// -----------------
// in1 in2 in3 fn
// -----------------
// 0 0 0 0
// 0 0 1 1
// 0 1 0 0
// 0 1 1 1
// 1 0 0 0
// 1 0 1 1
// 1 1 0 1
// 1 1 1 1
//
uint Compile::eval_macro_logic_op(uint func, uint in1 , uint in2, uint in3) {
int res = 0;
for (int i = 0; i < 8; i++) {
int bit1 = extract_bit(in1, i);
int bit2 = extract_bit(in2, i);
int bit3 = extract_bit(in3, i);
int func_bit_pos = (bit1 << 2 | bit2 << 1 | bit3);
int func_bit = extract_bit(func, func_bit_pos);
res |= func_bit << i;
}
return res;
}
static uint eval_operand(Node* n, ResourceHashtable<Node*,uint>& eval_map) {
assert(n != NULL, "");
assert(eval_map.contains(n), "absent");
return *(eval_map.get(n));
}
static void eval_operands(Node* n,
uint& func1, uint& func2, uint& func3,
ResourceHashtable<Node*,uint>& eval_map) {
assert(is_vector_bitwise_op(n), "");
func1 = eval_operand(n->in(1), eval_map);
if (is_vector_binary_bitwise_op(n)) {
func2 = eval_operand(n->in(2), eval_map);
} else if (is_vector_ternary_bitwise_op(n)) {
func2 = eval_operand(n->in(2), eval_map);
func3 = eval_operand(n->in(3), eval_map);
} else {
assert(is_vector_unary_bitwise_op(n), "not unary");
}
}
uint Compile::compute_truth_table(Unique_Node_List& partition, Unique_Node_List& inputs) {
assert(inputs.size() <= 3, "sanity");
ResourceMark rm;
uint res = 0;
ResourceHashtable<Node*,uint> eval_map;
// Populate precomputed functions for inputs.
// Each input corresponds to one column of 3 input truth-table.
uint input_funcs[] = { 0xAA, // (_, _, a) -> a
0xCC, // (_, b, _) -> b
0xF0 }; // (c, _, _) -> c
for (uint i = 0; i < inputs.size(); i++) {
eval_map.put(inputs.at(i), input_funcs[i]);
}
for (uint i = 0; i < partition.size(); i++) {
Node* n = partition.at(i);
uint func1 = 0, func2 = 0, func3 = 0;
eval_operands(n, func1, func2, func3, eval_map);
switch (n->Opcode()) {
case Op_OrV:
assert(func3 == 0, "not binary");
res = func1 | func2;
break;
case Op_AndV:
assert(func3 == 0, "not binary");
res = func1 & func2;
break;
case Op_XorV:
if (VectorNode::is_vector_bitwise_not_pattern(n)) {
assert(func2 == 0 && func3 == 0, "not unary");
res = (~func1) & 0xFF;
} else {
assert(func3 == 0, "not binary");
res = func1 ^ func2;
}
break;
case Op_MacroLogicV:
// Ordering of inputs may change during evaluation of sub-tree
// containing MacroLogic node as a child node, thus a re-evaluation
// makes sure that function is evaluated in context of current
// inputs.
res = eval_macro_logic_op(n->in(4)->get_int(), func1, func2, func3);
break;
default: assert(false, "not supported: %s", n->Name());
}
assert(res <= 0xFF, "invalid");
eval_map.put(n, res);
}
return res;
}
bool Compile::compute_logic_cone(Node* n, Unique_Node_List& partition, Unique_Node_List& inputs) {
assert(partition.size() == 0, "not empty");
assert(inputs.size() == 0, "not empty");
assert(!is_vector_ternary_bitwise_op(n), "not supported");
bool is_unary_op = is_vector_unary_bitwise_op(n);
if (is_unary_op) {
assert(collect_unique_inputs(n, partition, inputs) == 1, "not unary");
return false; // too few inputs
}
assert(is_vector_binary_bitwise_op(n), "not binary");
Node* in1 = n->in(1);
Node* in2 = n->in(2);
int in1_unique_inputs_cnt = collect_unique_inputs(in1, partition, inputs);
int in2_unique_inputs_cnt = collect_unique_inputs(in2, partition, inputs);
partition.push(n);
// Too many inputs?
if (inputs.size() > 3) {
partition.clear();
inputs.clear();
{ // Recompute in2 inputs
Unique_Node_List not_used;
in2_unique_inputs_cnt = collect_unique_inputs(in2, not_used, not_used);
}
// Pick the node with minimum number of inputs.
if (in1_unique_inputs_cnt >= 3 && in2_unique_inputs_cnt >= 3) {
return false; // still too many inputs
}
// Recompute partition & inputs.
Node* child = (in1_unique_inputs_cnt < in2_unique_inputs_cnt ? in1 : in2);
collect_unique_inputs(child, partition, inputs);
Node* other_input = (in1_unique_inputs_cnt < in2_unique_inputs_cnt ? in2 : in1);
inputs.push(other_input);
partition.push(n);
}
return (partition.size() == 2 || partition.size() == 3) &&
(inputs.size() == 2 || inputs.size() == 3);
}
void Compile::process_logic_cone_root(PhaseIterGVN &igvn, Node *n, VectorSet &visited) {
assert(is_vector_bitwise_op(n), "not a root");
visited.set(n->_idx);
// 1) Do a DFS walk over the logic cone.
for (uint i = 1; i < n->req(); i++) {
Node* in = n->in(i);
if (!visited.test(in->_idx) && is_vector_bitwise_op(in)) {
process_logic_cone_root(igvn, in, visited);
}
}
// 2) Bottom up traversal: Merge node[s] with
// the parent to form macro logic node.
Unique_Node_List partition;
Unique_Node_List inputs;
if (compute_logic_cone(n, partition, inputs)) {
const TypeVect* vt = n->bottom_type()->is_vect();
Node* macro_logic = xform_to_MacroLogicV(igvn, vt, partition, inputs);
igvn.replace_node(n, macro_logic);
}
}
void Compile::optimize_logic_cones(PhaseIterGVN &igvn) {
ResourceMark rm;
if (Matcher::match_rule_supported(Op_MacroLogicV)) {
Unique_Node_List list;
collect_logic_cone_roots(list);
while (list.size() > 0) {
Node* n = list.pop();
const TypeVect* vt = n->bottom_type()->is_vect();
bool supported = Matcher::match_rule_supported_vector(Op_MacroLogicV, vt->length(), vt->element_basic_type());
if (supported) {
VectorSet visited(comp_arena());
process_logic_cone_root(igvn, n, visited);
}
}
}
}
//------------------------------Code_Gen---------------------------------------
// Given a graph, generate code for it
@ -4225,6 +4542,7 @@ void CloneMap::dump(node_idx_t key) const {
}
}
// Move Allocate nodes to the start of the list
void Compile::sort_macro_nodes() {
int count = macro_count();