mirror of
https://github.com/openjdk/jdk.git
synced 2025-08-27 14:54:52 +02:00
8325651
: C2 SuperWord: refactor the dependency graph
Reviewed-by: chagedorn, vlivanov
This commit is contained in:
parent
d451f818cf
commit
ca5ca85d24
5 changed files with 309 additions and 428 deletions
|
@ -46,7 +46,6 @@ SuperWord::SuperWord(const VLoopAnalyzer &vloop_analyzer) :
|
||||||
_node_info(arena(), _vloop.estimated_body_length(), 0, SWNodeInfo::initial), // info needed per node
|
_node_info(arena(), _vloop.estimated_body_length(), 0, SWNodeInfo::initial), // info needed per node
|
||||||
_clone_map(phase()->C->clone_map()), // map of nodes created in cloning
|
_clone_map(phase()->C->clone_map()), // map of nodes created in cloning
|
||||||
_align_to_ref(nullptr), // memory reference to align vectors to
|
_align_to_ref(nullptr), // memory reference to align vectors to
|
||||||
_dg(arena()), // dependence graph
|
|
||||||
_race_possible(false), // cases where SDMU is true
|
_race_possible(false), // cases where SDMU is true
|
||||||
_do_vector_loop(phase()->C->do_vector_loop()), // whether to do vectorization/simd style
|
_do_vector_loop(phase()->C->do_vector_loop()), // whether to do vectorization/simd style
|
||||||
_num_work_vecs(0), // amount of vector work we have
|
_num_work_vecs(0), // amount of vector work we have
|
||||||
|
@ -452,12 +451,6 @@ bool SuperWord::SLP_extract() {
|
||||||
// Ensure extra info is allocated.
|
// Ensure extra info is allocated.
|
||||||
initialize_node_info();
|
initialize_node_info();
|
||||||
|
|
||||||
// build _dg
|
|
||||||
dependence_graph();
|
|
||||||
|
|
||||||
// compute function depth(Node*)
|
|
||||||
compute_max_depth();
|
|
||||||
|
|
||||||
// Attempt vectorization
|
// Attempt vectorization
|
||||||
find_adjacent_refs();
|
find_adjacent_refs();
|
||||||
|
|
||||||
|
@ -749,86 +742,6 @@ int SuperWord::get_iv_adjustment(MemNode* mem_ref) {
|
||||||
return iv_adjustment;
|
return iv_adjustment;
|
||||||
}
|
}
|
||||||
|
|
||||||
//---------------------------dependence_graph---------------------------
|
|
||||||
// Construct dependency graph.
|
|
||||||
// Add dependence edges to load/store nodes for memory dependence
|
|
||||||
// A.out()->DependNode.in(1) and DependNode.out()->B.prec(x)
|
|
||||||
void SuperWord::dependence_graph() {
|
|
||||||
CountedLoopNode *cl = lpt()->_head->as_CountedLoop();
|
|
||||||
assert(cl->is_main_loop(), "SLP should only work on main loops");
|
|
||||||
|
|
||||||
// First, assign a dependence node to each memory node
|
|
||||||
for (int i = 0; i < body().length(); i++ ) {
|
|
||||||
Node* n = body().at(i);
|
|
||||||
if (n->is_Mem() || n->is_memory_phi()) {
|
|
||||||
_dg.make_node(n);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const GrowableArray<PhiNode*>& mem_slice_head = _vloop_analyzer.memory_slices().heads();
|
|
||||||
const GrowableArray<MemNode*>& mem_slice_tail = _vloop_analyzer.memory_slices().tails();
|
|
||||||
|
|
||||||
ResourceMark rm;
|
|
||||||
GrowableArray<Node*> slice_nodes;
|
|
||||||
|
|
||||||
// For each memory slice, create the dependences
|
|
||||||
for (int i = 0; i < mem_slice_head.length(); i++) {
|
|
||||||
PhiNode* head = mem_slice_head.at(i);
|
|
||||||
MemNode* tail = mem_slice_tail.at(i);
|
|
||||||
|
|
||||||
// Get slice in predecessor order (last is first)
|
|
||||||
_vloop_analyzer.memory_slices().get_slice_in_reverse_order(head, tail, slice_nodes);
|
|
||||||
|
|
||||||
// Make the slice dependent on the root
|
|
||||||
DepMem* slice = _dg.dep(head);
|
|
||||||
_dg.make_edge(_dg.root(), slice);
|
|
||||||
|
|
||||||
// Create a sink for the slice
|
|
||||||
DepMem* slice_sink = _dg.make_node(nullptr);
|
|
||||||
_dg.make_edge(slice_sink, _dg.tail());
|
|
||||||
|
|
||||||
// Now visit each pair of memory ops, creating the edges
|
|
||||||
for (int j = slice_nodes.length() - 1; j >= 0 ; j--) {
|
|
||||||
Node* s1 = slice_nodes.at(j);
|
|
||||||
|
|
||||||
// If no dependency yet, use slice
|
|
||||||
if (_dg.dep(s1)->in_cnt() == 0) {
|
|
||||||
_dg.make_edge(slice, s1);
|
|
||||||
}
|
|
||||||
VPointer p1(s1->as_Mem(), _vloop);
|
|
||||||
bool sink_dependent = true;
|
|
||||||
for (int k = j - 1; k >= 0; k--) {
|
|
||||||
Node* s2 = slice_nodes.at(k);
|
|
||||||
if (s1->is_Load() && s2->is_Load())
|
|
||||||
continue;
|
|
||||||
VPointer p2(s2->as_Mem(), _vloop);
|
|
||||||
|
|
||||||
int cmp = p1.cmp(p2);
|
|
||||||
if (!VPointer::not_equal(cmp)) {
|
|
||||||
// Possibly same address
|
|
||||||
_dg.make_edge(s1, s2);
|
|
||||||
sink_dependent = false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (sink_dependent) {
|
|
||||||
_dg.make_edge(s1, slice_sink);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifndef PRODUCT
|
|
||||||
if (is_trace_superword_dependence_graph()) {
|
|
||||||
tty->print_cr("\nDependence graph for slice: %d", head->_idx);
|
|
||||||
for (int q = 0; q < slice_nodes.length(); q++) {
|
|
||||||
_dg.print(slice_nodes.at(q));
|
|
||||||
}
|
|
||||||
tty->cr();
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
slice_nodes.clear();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void VLoopMemorySlices::find_memory_slices() {
|
void VLoopMemorySlices::find_memory_slices() {
|
||||||
assert(_heads.is_empty(), "not yet computed");
|
assert(_heads.is_empty(), "not yet computed");
|
||||||
assert(_tails.is_empty(), "not yet computed");
|
assert(_tails.is_empty(), "not yet computed");
|
||||||
|
@ -861,7 +774,7 @@ void VLoopMemorySlices::print() const {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Get all memory nodes of a slice, in reverse order
|
// Get all memory nodes of a slice, in reverse order
|
||||||
void VLoopMemorySlices::get_slice_in_reverse_order(PhiNode* head, MemNode* tail, GrowableArray<Node*> &slice) const {
|
void VLoopMemorySlices::get_slice_in_reverse_order(PhiNode* head, MemNode* tail, GrowableArray<MemNode*> &slice) const {
|
||||||
assert(slice.is_empty(), "start empty");
|
assert(slice.is_empty(), "start empty");
|
||||||
Node* n = tail;
|
Node* n = tail;
|
||||||
Node* prev = nullptr;
|
Node* prev = nullptr;
|
||||||
|
@ -871,7 +784,7 @@ void VLoopMemorySlices::get_slice_in_reverse_order(PhiNode* head, MemNode* tail,
|
||||||
Node* out = n->fast_out(i);
|
Node* out = n->fast_out(i);
|
||||||
if (out->is_Load()) {
|
if (out->is_Load()) {
|
||||||
if (_vloop.in_bb(out)) {
|
if (_vloop.in_bb(out)) {
|
||||||
slice.push(out);
|
slice.push(out->as_Load());
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// FIXME
|
// FIXME
|
||||||
|
@ -889,7 +802,7 @@ void VLoopMemorySlices::get_slice_in_reverse_order(PhiNode* head, MemNode* tail,
|
||||||
}//else
|
}//else
|
||||||
}//for
|
}//for
|
||||||
if (n == head) { break; }
|
if (n == head) { break; }
|
||||||
slice.push(n);
|
slice.push(n->as_Mem());
|
||||||
prev = n;
|
prev = n;
|
||||||
assert(n->is_Mem(), "unexpected node %s", n->Name());
|
assert(n->is_Mem(), "unexpected node %s", n->Name());
|
||||||
n = n->in(MemNode::Memory);
|
n = n->in(MemNode::Memory);
|
||||||
|
@ -1001,9 +914,8 @@ bool SuperWord::isomorphic(Node* s1, Node* s2) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
//------------------------------independent---------------------------
|
|
||||||
// Is there no data path from s1 to s2 or s2 to s1?
|
// Is there no data path from s1 to s2 or s2 to s1?
|
||||||
bool SuperWord::independent(Node* s1, Node* s2) {
|
bool VLoopDependencyGraph::independent(Node* s1, Node* s2) const {
|
||||||
int d1 = depth(s1);
|
int d1 = depth(s1);
|
||||||
int d2 = depth(s2);
|
int d2 = depth(s2);
|
||||||
|
|
||||||
|
@ -1024,9 +936,9 @@ bool SuperWord::independent(Node* s1, Node* s2) {
|
||||||
worklist.push(deep);
|
worklist.push(deep);
|
||||||
for (uint i = 0; i < worklist.size(); i++) {
|
for (uint i = 0; i < worklist.size(); i++) {
|
||||||
Node* n = worklist.at(i);
|
Node* n = worklist.at(i);
|
||||||
for (DepPreds preds(n, _dg); !preds.done(); preds.next()) {
|
for (PredsIterator preds(*this, n); !preds.done(); preds.next()) {
|
||||||
Node* pred = preds.current();
|
Node* pred = preds.current();
|
||||||
if (in_bb(pred) && depth(pred) >= min_d) {
|
if (_vloop.in_bb(pred) && depth(pred) >= min_d) {
|
||||||
if (pred == shallow) {
|
if (pred == shallow) {
|
||||||
return false; // found it -> dependent
|
return false; // found it -> dependent
|
||||||
}
|
}
|
||||||
|
@ -1045,7 +957,7 @@ bool SuperWord::independent(Node* s1, Node* s2) {
|
||||||
// is the smallest depth of all nodes from the nodes list. Once we have
|
// is the smallest depth of all nodes from the nodes list. Once we have
|
||||||
// traversed all those nodes, and have not found another node from the
|
// traversed all those nodes, and have not found another node from the
|
||||||
// nodes list, we know that all nodes in the nodes list are independent.
|
// nodes list, we know that all nodes in the nodes list are independent.
|
||||||
bool SuperWord::mutually_independent(const Node_List* nodes) const {
|
bool VLoopDependencyGraph::mutually_independent(const Node_List* nodes) const {
|
||||||
ResourceMark rm;
|
ResourceMark rm;
|
||||||
Unique_Node_List worklist;
|
Unique_Node_List worklist;
|
||||||
VectorSet nodes_set;
|
VectorSet nodes_set;
|
||||||
|
@ -1054,14 +966,14 @@ bool SuperWord::mutually_independent(const Node_List* nodes) const {
|
||||||
Node* n = nodes->at(k);
|
Node* n = nodes->at(k);
|
||||||
min_d = MIN2(min_d, depth(n));
|
min_d = MIN2(min_d, depth(n));
|
||||||
worklist.push(n); // start traversal at all nodes in nodes list
|
worklist.push(n); // start traversal at all nodes in nodes list
|
||||||
nodes_set.set(bb_idx(n));
|
nodes_set.set(_body.bb_idx(n));
|
||||||
}
|
}
|
||||||
for (uint i = 0; i < worklist.size(); i++) {
|
for (uint i = 0; i < worklist.size(); i++) {
|
||||||
Node* n = worklist.at(i);
|
Node* n = worklist.at(i);
|
||||||
for (DepPreds preds(n, _dg); !preds.done(); preds.next()) {
|
for (PredsIterator preds(*this, n); !preds.done(); preds.next()) {
|
||||||
Node* pred = preds.current();
|
Node* pred = preds.current();
|
||||||
if (in_bb(pred) && depth(pred) >= min_d) {
|
if (_vloop.in_bb(pred) && depth(pred) >= min_d) {
|
||||||
if (nodes_set.test(bb_idx(pred))) {
|
if (nodes_set.test(_body.bb_idx(pred))) {
|
||||||
return false; // found one -> dependent
|
return false; // found one -> dependent
|
||||||
}
|
}
|
||||||
worklist.push(pred);
|
worklist.push(pred);
|
||||||
|
@ -1982,16 +1894,16 @@ void SuperWord::verify_packs() {
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// The PacksetGraph combines the DepPreds graph with the packset. In the PackSet
|
// The PacksetGraph combines the dependency graph with the packset. In the PackSet
|
||||||
// graph, we have two kinds of nodes:
|
// graph, we have two kinds of nodes:
|
||||||
// (1) pack-node: Represents all nodes of some pack p in a single node, which
|
// (1) pack-node: Represents all nodes of some pack p in a single node, which
|
||||||
// shall later become a vector node.
|
// shall later become a vector node.
|
||||||
// (2) scalar-node: Represents a node that is not in any pack.
|
// (2) scalar-node: Represents a node that is not in any pack.
|
||||||
// For any edge (n1, n2) in DepPreds, we add an edge to the PacksetGraph for the
|
// For any edge (n1, n2) in the dependency graph, we add an edge to the PacksetGraph for
|
||||||
// PacksetGraph nodes corresponding to n1 and n2.
|
// the PacksetGraph nodes corresponding to n1 and n2.
|
||||||
// We work from the DepPreds graph, because it gives us all the data-dependencies,
|
// We work from the dependency graph, because it gives us all the data-dependencies,
|
||||||
// as well as more refined memory-dependencies than the C2 graph. DepPreds does
|
// as well as more refined memory-dependencies than the C2 graph. The dependency graph
|
||||||
// not have cycles. But packing nodes can introduce cyclic dependencies. Example:
|
// does not have cycles. But packing nodes can introduce cyclic dependencies. Example:
|
||||||
//
|
//
|
||||||
// +--------+
|
// +--------+
|
||||||
// A -> X | v
|
// A -> X | v
|
||||||
|
@ -2055,11 +1967,10 @@ public:
|
||||||
GrowableArray<int>& out(int pid) { return _out.at(pid - 1); }
|
GrowableArray<int>& out(int pid) { return _out.at(pid - 1); }
|
||||||
bool schedule_success() const { return _schedule_success; }
|
bool schedule_success() const { return _schedule_success; }
|
||||||
|
|
||||||
// Create nodes (from packs and scalar-nodes), and add edges, based on DepPreds.
|
// Create nodes (from packs and scalar-nodes), and add edges, based on the dependency graph.
|
||||||
void build() {
|
void build() {
|
||||||
const GrowableArray<Node_List*>& packset = _slp->packset();
|
const GrowableArray<Node_List*>& packset = _slp->packset();
|
||||||
const GrowableArray<Node*>& body = _slp->body();
|
const GrowableArray<Node*>& body = _slp->body();
|
||||||
const DepGraph& dg = _slp->dg();
|
|
||||||
// Map nodes in packsets
|
// Map nodes in packsets
|
||||||
for (int i = 0; i < packset.length(); i++) {
|
for (int i = 0; i < packset.length(); i++) {
|
||||||
Node_List* p = packset.at(i);
|
Node_List* p = packset.at(i);
|
||||||
|
@ -2096,7 +2007,7 @@ public:
|
||||||
for (uint k = 0; k < p->size(); k++) {
|
for (uint k = 0; k < p->size(); k++) {
|
||||||
Node* n = p->at(k);
|
Node* n = p->at(k);
|
||||||
assert(pid == get_pid(n), "all nodes in pack have same pid");
|
assert(pid == get_pid(n), "all nodes in pack have same pid");
|
||||||
for (DepPreds preds(n, dg); !preds.done(); preds.next()) {
|
for (VLoopDependencyGraph::PredsIterator preds(_slp->dependency_graph(), n); !preds.done(); preds.next()) {
|
||||||
Node* pred = preds.current();
|
Node* pred = preds.current();
|
||||||
int pred_pid = get_pid_or_zero(pred);
|
int pred_pid = get_pid_or_zero(pred);
|
||||||
if (pred_pid == pid && _slp->is_marked_reduction(n)) {
|
if (pred_pid == pid && _slp->is_marked_reduction(n)) {
|
||||||
|
@ -2118,7 +2029,7 @@ public:
|
||||||
if (pid <= max_pid_packset) {
|
if (pid <= max_pid_packset) {
|
||||||
continue; // Only scalar-nodes
|
continue; // Only scalar-nodes
|
||||||
}
|
}
|
||||||
for (DepPreds preds(n, dg); !preds.done(); preds.next()) {
|
for (VLoopDependencyGraph::PredsIterator preds(_slp->dependency_graph(), n); !preds.done(); preds.next()) {
|
||||||
Node* pred = preds.current();
|
Node* pred = preds.current();
|
||||||
int pred_pid = get_pid_or_zero(pred);
|
int pred_pid = get_pid_or_zero(pred);
|
||||||
// Only add edges for mapped nodes (in body)
|
// Only add edges for mapped nodes (in body)
|
||||||
|
@ -2209,7 +2120,7 @@ public:
|
||||||
};
|
};
|
||||||
|
|
||||||
// The C2 graph (specifically the memory graph), needs to be re-ordered.
|
// The C2 graph (specifically the memory graph), needs to be re-ordered.
|
||||||
// (1) Build the PacksetGraph. It combines the DepPreds graph with the
|
// (1) Build the PacksetGraph. It combines the dependency graph with the
|
||||||
// packset. The PacksetGraph gives us the dependencies that must be
|
// packset. The PacksetGraph gives us the dependencies that must be
|
||||||
// respected after scheduling.
|
// respected after scheduling.
|
||||||
// (2) Schedule the PacksetGraph to the memops_schedule, which represents
|
// (2) Schedule the PacksetGraph to the memops_schedule, which represents
|
||||||
|
@ -3042,41 +2953,6 @@ void SuperWord::initialize_node_info() {
|
||||||
grow_node_info(bb_idx(last));
|
grow_node_info(bb_idx(last));
|
||||||
}
|
}
|
||||||
|
|
||||||
//------------------------------compute_max_depth---------------------------
|
|
||||||
// Compute max depth for expressions from beginning of block
|
|
||||||
// Use to prune search paths during test for independence.
|
|
||||||
void SuperWord::compute_max_depth() {
|
|
||||||
int ct = 0;
|
|
||||||
bool again;
|
|
||||||
do {
|
|
||||||
again = false;
|
|
||||||
for (int i = 0; i < body().length(); i++) {
|
|
||||||
Node* n = body().at(i);
|
|
||||||
if (!n->is_Phi()) {
|
|
||||||
int d_orig = depth(n);
|
|
||||||
int d_in = 0;
|
|
||||||
for (DepPreds preds(n, _dg); !preds.done(); preds.next()) {
|
|
||||||
Node* pred = preds.current();
|
|
||||||
if (in_bb(pred)) {
|
|
||||||
d_in = MAX2(d_in, depth(pred));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (d_in + 1 != d_orig) {
|
|
||||||
set_depth(n, d_in + 1);
|
|
||||||
again = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
ct++;
|
|
||||||
} while (again);
|
|
||||||
|
|
||||||
#ifndef PRODUCT
|
|
||||||
if (is_trace_superword_dependence_graph()) {
|
|
||||||
tty->print_cr("compute_max_depth iterated: %d times", ct);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
BasicType SuperWord::longer_type_for_conversion(Node* n) {
|
BasicType SuperWord::longer_type_for_conversion(Node* n) {
|
||||||
if (!(VectorNode::is_convert_opcode(n->Opcode()) ||
|
if (!(VectorNode::is_convert_opcode(n->Opcode()) ||
|
||||||
requires_long_to_int_conversion(n->Opcode())) ||
|
requires_long_to_int_conversion(n->Opcode())) ||
|
||||||
|
@ -3734,141 +3610,6 @@ void SuperWord::print_stmt(Node* s) {
|
||||||
|
|
||||||
const SWNodeInfo SWNodeInfo::initial;
|
const SWNodeInfo SWNodeInfo::initial;
|
||||||
|
|
||||||
|
|
||||||
// ============================ DepGraph ===========================
|
|
||||||
|
|
||||||
//------------------------------make_node---------------------------
|
|
||||||
// Make a new dependence graph node for an ideal node.
|
|
||||||
DepMem* DepGraph::make_node(Node* node) {
|
|
||||||
DepMem* m = new (_arena) DepMem(node);
|
|
||||||
if (node != nullptr) {
|
|
||||||
assert(_map.at_grow(node->_idx) == nullptr, "one init only");
|
|
||||||
_map.at_put_grow(node->_idx, m);
|
|
||||||
}
|
|
||||||
return m;
|
|
||||||
}
|
|
||||||
|
|
||||||
//------------------------------make_edge---------------------------
|
|
||||||
// Make a new dependence graph edge from dpred -> dsucc
|
|
||||||
DepEdge* DepGraph::make_edge(DepMem* dpred, DepMem* dsucc) {
|
|
||||||
DepEdge* e = new (_arena) DepEdge(dpred, dsucc, dsucc->in_head(), dpred->out_head());
|
|
||||||
dpred->set_out_head(e);
|
|
||||||
dsucc->set_in_head(e);
|
|
||||||
return e;
|
|
||||||
}
|
|
||||||
|
|
||||||
// ========================== DepMem ========================
|
|
||||||
|
|
||||||
//------------------------------in_cnt---------------------------
|
|
||||||
int DepMem::in_cnt() {
|
|
||||||
int ct = 0;
|
|
||||||
for (DepEdge* e = _in_head; e != nullptr; e = e->next_in()) ct++;
|
|
||||||
return ct;
|
|
||||||
}
|
|
||||||
|
|
||||||
//------------------------------out_cnt---------------------------
|
|
||||||
int DepMem::out_cnt() {
|
|
||||||
int ct = 0;
|
|
||||||
for (DepEdge* e = _out_head; e != nullptr; e = e->next_out()) ct++;
|
|
||||||
return ct;
|
|
||||||
}
|
|
||||||
|
|
||||||
//------------------------------print-----------------------------
|
|
||||||
void DepMem::print() {
|
|
||||||
#ifndef PRODUCT
|
|
||||||
tty->print(" DepNode %d (", _node->_idx);
|
|
||||||
for (DepEdge* p = _in_head; p != nullptr; p = p->next_in()) {
|
|
||||||
Node* pred = p->pred()->node();
|
|
||||||
tty->print(" %d", pred != nullptr ? pred->_idx : 0);
|
|
||||||
}
|
|
||||||
tty->print(") [");
|
|
||||||
for (DepEdge* s = _out_head; s != nullptr; s = s->next_out()) {
|
|
||||||
Node* succ = s->succ()->node();
|
|
||||||
tty->print(" %d", succ != nullptr ? succ->_idx : 0);
|
|
||||||
}
|
|
||||||
tty->print_cr(" ]");
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
// =========================== DepEdge =========================
|
|
||||||
|
|
||||||
//------------------------------DepPreds---------------------------
|
|
||||||
void DepEdge::print() {
|
|
||||||
#ifndef PRODUCT
|
|
||||||
tty->print_cr("DepEdge: %d [ %d ]", _pred->node()->_idx, _succ->node()->_idx);
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
// =========================== DepPreds =========================
|
|
||||||
// Iterator over predecessor edges in the dependence graph.
|
|
||||||
|
|
||||||
//------------------------------DepPreds---------------------------
|
|
||||||
DepPreds::DepPreds(Node* n, const DepGraph& dg) {
|
|
||||||
_n = n;
|
|
||||||
_done = false;
|
|
||||||
if (_n->is_Store() || _n->is_Load()) {
|
|
||||||
_next_idx = MemNode::Address;
|
|
||||||
_end_idx = n->req();
|
|
||||||
_dep_next = dg.dep(_n)->in_head();
|
|
||||||
} else if (_n->is_Mem()) {
|
|
||||||
_next_idx = 0;
|
|
||||||
_end_idx = 0;
|
|
||||||
_dep_next = dg.dep(_n)->in_head();
|
|
||||||
} else {
|
|
||||||
_next_idx = 1;
|
|
||||||
_end_idx = _n->req();
|
|
||||||
_dep_next = nullptr;
|
|
||||||
}
|
|
||||||
next();
|
|
||||||
}
|
|
||||||
|
|
||||||
//------------------------------next---------------------------
|
|
||||||
void DepPreds::next() {
|
|
||||||
if (_dep_next != nullptr) {
|
|
||||||
_current = _dep_next->pred()->node();
|
|
||||||
_dep_next = _dep_next->next_in();
|
|
||||||
} else if (_next_idx < _end_idx) {
|
|
||||||
_current = _n->in(_next_idx++);
|
|
||||||
} else {
|
|
||||||
_done = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// =========================== DepSuccs =========================
|
|
||||||
// Iterator over successor edges in the dependence graph.
|
|
||||||
|
|
||||||
//------------------------------DepSuccs---------------------------
|
|
||||||
DepSuccs::DepSuccs(Node* n, DepGraph& dg) {
|
|
||||||
_n = n;
|
|
||||||
_done = false;
|
|
||||||
if (_n->is_Load()) {
|
|
||||||
_next_idx = 0;
|
|
||||||
_end_idx = _n->outcnt();
|
|
||||||
_dep_next = dg.dep(_n)->out_head();
|
|
||||||
} else if (_n->is_Mem() || _n->is_memory_phi()) {
|
|
||||||
_next_idx = 0;
|
|
||||||
_end_idx = 0;
|
|
||||||
_dep_next = dg.dep(_n)->out_head();
|
|
||||||
} else {
|
|
||||||
_next_idx = 0;
|
|
||||||
_end_idx = _n->outcnt();
|
|
||||||
_dep_next = nullptr;
|
|
||||||
}
|
|
||||||
next();
|
|
||||||
}
|
|
||||||
|
|
||||||
//-------------------------------next---------------------------
|
|
||||||
void DepSuccs::next() {
|
|
||||||
if (_dep_next != nullptr) {
|
|
||||||
_current = _dep_next->succ()->node();
|
|
||||||
_dep_next = _dep_next->next_out();
|
|
||||||
} else if (_next_idx < _end_idx) {
|
|
||||||
_current = _n->raw_out(_next_idx++);
|
|
||||||
} else {
|
|
||||||
_done = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
//
|
//
|
||||||
// --------------------------------- vectorization/simd -----------------------------------
|
// --------------------------------- vectorization/simd -----------------------------------
|
||||||
//
|
//
|
||||||
|
|
|
@ -57,128 +57,6 @@
|
||||||
|
|
||||||
class VPointer;
|
class VPointer;
|
||||||
|
|
||||||
// ========================= Dependence Graph =====================
|
|
||||||
|
|
||||||
class DepMem;
|
|
||||||
|
|
||||||
//------------------------------DepEdge---------------------------
|
|
||||||
// An edge in the dependence graph. The edges incident to a dependence
|
|
||||||
// node are threaded through _next_in for incoming edges and _next_out
|
|
||||||
// for outgoing edges.
|
|
||||||
class DepEdge : public ArenaObj {
|
|
||||||
protected:
|
|
||||||
DepMem* _pred;
|
|
||||||
DepMem* _succ;
|
|
||||||
DepEdge* _next_in; // list of in edges, null terminated
|
|
||||||
DepEdge* _next_out; // list of out edges, null terminated
|
|
||||||
|
|
||||||
public:
|
|
||||||
DepEdge(DepMem* pred, DepMem* succ, DepEdge* next_in, DepEdge* next_out) :
|
|
||||||
_pred(pred), _succ(succ), _next_in(next_in), _next_out(next_out) {}
|
|
||||||
|
|
||||||
DepEdge* next_in() { return _next_in; }
|
|
||||||
DepEdge* next_out() { return _next_out; }
|
|
||||||
DepMem* pred() { return _pred; }
|
|
||||||
DepMem* succ() { return _succ; }
|
|
||||||
|
|
||||||
void print();
|
|
||||||
};
|
|
||||||
|
|
||||||
//------------------------------DepMem---------------------------
|
|
||||||
// A node in the dependence graph. _in_head starts the threaded list of
|
|
||||||
// incoming edges, and _out_head starts the list of outgoing edges.
|
|
||||||
class DepMem : public ArenaObj {
|
|
||||||
protected:
|
|
||||||
Node* _node; // Corresponding ideal node
|
|
||||||
DepEdge* _in_head; // Head of list of in edges, null terminated
|
|
||||||
DepEdge* _out_head; // Head of list of out edges, null terminated
|
|
||||||
|
|
||||||
public:
|
|
||||||
DepMem(Node* node) : _node(node), _in_head(nullptr), _out_head(nullptr) {}
|
|
||||||
|
|
||||||
Node* node() { return _node; }
|
|
||||||
DepEdge* in_head() { return _in_head; }
|
|
||||||
DepEdge* out_head() { return _out_head; }
|
|
||||||
void set_in_head(DepEdge* hd) { _in_head = hd; }
|
|
||||||
void set_out_head(DepEdge* hd) { _out_head = hd; }
|
|
||||||
|
|
||||||
int in_cnt(); // Incoming edge count
|
|
||||||
int out_cnt(); // Outgoing edge count
|
|
||||||
|
|
||||||
void print();
|
|
||||||
};
|
|
||||||
|
|
||||||
//------------------------------DepGraph---------------------------
|
|
||||||
class DepGraph {
|
|
||||||
protected:
|
|
||||||
Arena* _arena;
|
|
||||||
GrowableArray<DepMem*> _map;
|
|
||||||
DepMem* _root;
|
|
||||||
DepMem* _tail;
|
|
||||||
|
|
||||||
public:
|
|
||||||
DepGraph(Arena* a) : _arena(a), _map(a, 8, 0, nullptr) {
|
|
||||||
_root = new (_arena) DepMem(nullptr);
|
|
||||||
_tail = new (_arena) DepMem(nullptr);
|
|
||||||
}
|
|
||||||
|
|
||||||
DepMem* root() { return _root; }
|
|
||||||
DepMem* tail() { return _tail; }
|
|
||||||
|
|
||||||
// Return dependence node corresponding to an ideal node
|
|
||||||
DepMem* dep(Node* node) const { return _map.at(node->_idx); }
|
|
||||||
|
|
||||||
// Make a new dependence graph node for an ideal node.
|
|
||||||
DepMem* make_node(Node* node);
|
|
||||||
|
|
||||||
// Make a new dependence graph edge dprec->dsucc
|
|
||||||
DepEdge* make_edge(DepMem* dpred, DepMem* dsucc);
|
|
||||||
|
|
||||||
DepEdge* make_edge(Node* pred, Node* succ) { return make_edge(dep(pred), dep(succ)); }
|
|
||||||
DepEdge* make_edge(DepMem* pred, Node* succ) { return make_edge(pred, dep(succ)); }
|
|
||||||
DepEdge* make_edge(Node* pred, DepMem* succ) { return make_edge(dep(pred), succ); }
|
|
||||||
|
|
||||||
void print(Node* n) { dep(n)->print(); }
|
|
||||||
void print(DepMem* d) { d->print(); }
|
|
||||||
};
|
|
||||||
|
|
||||||
//------------------------------DepPreds---------------------------
|
|
||||||
// Iterator over predecessors in the dependence graph and
|
|
||||||
// non-memory-graph inputs of ideal nodes.
|
|
||||||
class DepPreds : public StackObj {
|
|
||||||
private:
|
|
||||||
Node* _n;
|
|
||||||
int _next_idx, _end_idx;
|
|
||||||
DepEdge* _dep_next;
|
|
||||||
Node* _current;
|
|
||||||
bool _done;
|
|
||||||
|
|
||||||
public:
|
|
||||||
DepPreds(Node* n, const DepGraph& dg);
|
|
||||||
Node* current() { return _current; }
|
|
||||||
bool done() { return _done; }
|
|
||||||
void next();
|
|
||||||
};
|
|
||||||
|
|
||||||
//------------------------------DepSuccs---------------------------
|
|
||||||
// Iterator over successors in the dependence graph and
|
|
||||||
// non-memory-graph outputs of ideal nodes.
|
|
||||||
class DepSuccs : public StackObj {
|
|
||||||
private:
|
|
||||||
Node* _n;
|
|
||||||
int _next_idx, _end_idx;
|
|
||||||
DepEdge* _dep_next;
|
|
||||||
Node* _current;
|
|
||||||
bool _done;
|
|
||||||
|
|
||||||
public:
|
|
||||||
DepSuccs(Node* n, DepGraph& dg);
|
|
||||||
Node* current() { return _current; }
|
|
||||||
bool done() { return _done; }
|
|
||||||
void next();
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
// ========================= SuperWord =====================
|
// ========================= SuperWord =====================
|
||||||
|
|
||||||
// -----------------------------SWNodeInfo---------------------------------
|
// -----------------------------SWNodeInfo---------------------------------
|
||||||
|
@ -186,10 +64,9 @@ public:
|
||||||
class SWNodeInfo {
|
class SWNodeInfo {
|
||||||
public:
|
public:
|
||||||
int _alignment; // memory alignment for a node
|
int _alignment; // memory alignment for a node
|
||||||
int _depth; // Max expression (DAG) depth from block start
|
|
||||||
Node_List* _my_pack; // pack containing this node
|
Node_List* _my_pack; // pack containing this node
|
||||||
|
|
||||||
SWNodeInfo() : _alignment(-1), _depth(0), _my_pack(nullptr) {}
|
SWNodeInfo() : _alignment(-1), _my_pack(nullptr) {}
|
||||||
static const SWNodeInfo initial;
|
static const SWNodeInfo initial;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -212,8 +89,6 @@ class SuperWord : public ResourceObj {
|
||||||
CloneMap& _clone_map; // map of nodes created in cloning
|
CloneMap& _clone_map; // map of nodes created in cloning
|
||||||
MemNode const* _align_to_ref; // Memory reference that pre-loop will align to
|
MemNode const* _align_to_ref; // Memory reference that pre-loop will align to
|
||||||
|
|
||||||
DepGraph _dg; // Dependence graph
|
|
||||||
|
|
||||||
public:
|
public:
|
||||||
SuperWord(const VLoopAnalyzer &vloop_analyzer);
|
SuperWord(const VLoopAnalyzer &vloop_analyzer);
|
||||||
|
|
||||||
|
@ -280,6 +155,19 @@ class SuperWord : public ResourceObj {
|
||||||
return _vloop_analyzer.types().vector_width_in_bytes(n);
|
return _vloop_analyzer.types().vector_width_in_bytes(n);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// VLoopDependencyGraph Accessors
|
||||||
|
const VLoopDependencyGraph& dependency_graph() const {
|
||||||
|
return _vloop_analyzer.dependency_graph();
|
||||||
|
}
|
||||||
|
|
||||||
|
bool independent(Node* n1, Node* n2) const {
|
||||||
|
return _vloop_analyzer.dependency_graph().independent(n1, n2);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool mutually_independent(const Node_List* nodes) const {
|
||||||
|
return _vloop_analyzer.dependency_graph().mutually_independent(nodes);
|
||||||
|
}
|
||||||
|
|
||||||
#ifndef PRODUCT
|
#ifndef PRODUCT
|
||||||
// TraceAutoVectorization and TraceSuperWord
|
// TraceAutoVectorization and TraceSuperWord
|
||||||
bool is_trace_superword_alignment() const {
|
bool is_trace_superword_alignment() const {
|
||||||
|
@ -287,11 +175,6 @@ class SuperWord : public ResourceObj {
|
||||||
return _vloop.vtrace().is_trace(TraceAutoVectorizationTag::SW_ALIGNMENT);
|
return _vloop.vtrace().is_trace(TraceAutoVectorizationTag::SW_ALIGNMENT);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool is_trace_superword_dependence_graph() const {
|
|
||||||
return TraceSuperWord ||
|
|
||||||
_vloop.vtrace().is_trace(TraceAutoVectorizationTag::SW_DEPENDENCE_GRAPH);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool is_trace_superword_adjacent_memops() const {
|
bool is_trace_superword_adjacent_memops() const {
|
||||||
return TraceSuperWord ||
|
return TraceSuperWord ||
|
||||||
_vloop.vtrace().is_trace(TraceAutoVectorizationTag::SW_ADJACENT_MEMOPS);
|
_vloop.vtrace().is_trace(TraceAutoVectorizationTag::SW_ADJACENT_MEMOPS);
|
||||||
|
@ -321,7 +204,6 @@ class SuperWord : public ResourceObj {
|
||||||
return TraceSuperWord ||
|
return TraceSuperWord ||
|
||||||
is_trace_align_vector() ||
|
is_trace_align_vector() ||
|
||||||
_vloop.vtrace().is_trace(TraceAutoVectorizationTag::SW_ALIGNMENT) ||
|
_vloop.vtrace().is_trace(TraceAutoVectorizationTag::SW_ALIGNMENT) ||
|
||||||
_vloop.vtrace().is_trace(TraceAutoVectorizationTag::SW_DEPENDENCE_GRAPH) ||
|
|
||||||
_vloop.vtrace().is_trace(TraceAutoVectorizationTag::SW_ADJACENT_MEMOPS) ||
|
_vloop.vtrace().is_trace(TraceAutoVectorizationTag::SW_ADJACENT_MEMOPS) ||
|
||||||
_vloop.vtrace().is_trace(TraceAutoVectorizationTag::SW_REJECTIONS) ||
|
_vloop.vtrace().is_trace(TraceAutoVectorizationTag::SW_REJECTIONS) ||
|
||||||
_vloop.vtrace().is_trace(TraceAutoVectorizationTag::SW_PACKSET) ||
|
_vloop.vtrace().is_trace(TraceAutoVectorizationTag::SW_PACKSET) ||
|
||||||
|
@ -338,7 +220,6 @@ class SuperWord : public ResourceObj {
|
||||||
bool do_vector_loop() { return _do_vector_loop; }
|
bool do_vector_loop() { return _do_vector_loop; }
|
||||||
|
|
||||||
const GrowableArray<Node_List*>& packset() const { return _packset; }
|
const GrowableArray<Node_List*>& packset() const { return _packset; }
|
||||||
const DepGraph& dg() const { return _dg; }
|
|
||||||
private:
|
private:
|
||||||
bool _race_possible; // In cases where SDMU is true
|
bool _race_possible; // In cases where SDMU is true
|
||||||
bool _do_vector_loop; // whether to do vectorization/simd style
|
bool _do_vector_loop; // whether to do vectorization/simd style
|
||||||
|
@ -362,10 +243,6 @@ class SuperWord : public ResourceObj {
|
||||||
int alignment(Node* n) { return _node_info.adr_at(bb_idx(n))->_alignment; }
|
int alignment(Node* n) { return _node_info.adr_at(bb_idx(n))->_alignment; }
|
||||||
void set_alignment(Node* n, int a) { int i = bb_idx(n); grow_node_info(i); _node_info.adr_at(i)->_alignment = a; }
|
void set_alignment(Node* n, int a) { int i = bb_idx(n); grow_node_info(i); _node_info.adr_at(i)->_alignment = a; }
|
||||||
|
|
||||||
// Max expression (DAG) depth from beginning of the block for each node
|
|
||||||
int depth(Node* n) const { return _node_info.adr_at(bb_idx(n))->_depth; }
|
|
||||||
void set_depth(Node* n, int d) { int i = bb_idx(n); grow_node_info(i); _node_info.adr_at(i)->_depth = d; }
|
|
||||||
|
|
||||||
// my_pack
|
// my_pack
|
||||||
public:
|
public:
|
||||||
Node_List* my_pack(Node* n) { return !in_bb(n) ? nullptr : _node_info.adr_at(bb_idx(n))->_my_pack; }
|
Node_List* my_pack(Node* n) { return !in_bb(n) ? nullptr : _node_info.adr_at(bb_idx(n))->_my_pack; }
|
||||||
|
@ -387,8 +264,6 @@ private:
|
||||||
MemNode* find_align_to_ref(Node_List &memops, int &idx);
|
MemNode* find_align_to_ref(Node_List &memops, int &idx);
|
||||||
// Calculate loop's iv adjustment for this memory ops.
|
// Calculate loop's iv adjustment for this memory ops.
|
||||||
int get_iv_adjustment(MemNode* mem);
|
int get_iv_adjustment(MemNode* mem);
|
||||||
// Construct dependency graph.
|
|
||||||
void dependence_graph();
|
|
||||||
|
|
||||||
// Can s1 and s2 be in a pack with s1 immediately preceding s2 and s1 aligned at "align"
|
// Can s1 and s2 be in a pack with s1 immediately preceding s2 and s1 aligned at "align"
|
||||||
bool stmts_can_pack(Node* s1, Node* s2, int align);
|
bool stmts_can_pack(Node* s1, Node* s2, int align);
|
||||||
|
@ -398,10 +273,6 @@ private:
|
||||||
bool are_adjacent_refs(Node* s1, Node* s2);
|
bool are_adjacent_refs(Node* s1, Node* s2);
|
||||||
// Are s1 and s2 similar?
|
// Are s1 and s2 similar?
|
||||||
bool isomorphic(Node* s1, Node* s2);
|
bool isomorphic(Node* s1, Node* s2);
|
||||||
// Is there no data path from s1 to s2 or s2 to s1?
|
|
||||||
bool independent(Node* s1, Node* s2);
|
|
||||||
// Are all nodes in nodes list mutually independent?
|
|
||||||
bool mutually_independent(const Node_List* nodes) const;
|
|
||||||
// For a node pair (s1, s2) which is isomorphic and independent,
|
// For a node pair (s1, s2) which is isomorphic and independent,
|
||||||
// do s1 and s2 have similar input edges?
|
// do s1 and s2 have similar input edges?
|
||||||
bool have_similar_inputs(Node* s1, Node* s2);
|
bool have_similar_inputs(Node* s1, Node* s2);
|
||||||
|
|
|
@ -35,8 +35,8 @@
|
||||||
flags(MEMORY_SLICES, "Trace VLoopMemorySlices") \
|
flags(MEMORY_SLICES, "Trace VLoopMemorySlices") \
|
||||||
flags(BODY, "Trace VLoopBody") \
|
flags(BODY, "Trace VLoopBody") \
|
||||||
flags(TYPES, "Trace VLoopTypes") \
|
flags(TYPES, "Trace VLoopTypes") \
|
||||||
|
flags(DEPENDENCY_GRAPH, "Trace VLoopDependencyGraph") \
|
||||||
flags(SW_ALIGNMENT, "Trace SuperWord alignment analysis") \
|
flags(SW_ALIGNMENT, "Trace SuperWord alignment analysis") \
|
||||||
flags(SW_DEPENDENCE_GRAPH, "Trace SuperWord::dependence_graph") \
|
|
||||||
flags(SW_ADJACENT_MEMOPS, "Trace SuperWord::find_adjacent_refs") \
|
flags(SW_ADJACENT_MEMOPS, "Trace SuperWord::find_adjacent_refs") \
|
||||||
flags(SW_REJECTIONS, "Trace SuperWord rejections (non vectorizations)") \
|
flags(SW_REJECTIONS, "Trace SuperWord rejections (non vectorizations)") \
|
||||||
flags(SW_PACKSET, "Trace SuperWord packset at different stages") \
|
flags(SW_PACKSET, "Trace SuperWord packset at different stages") \
|
||||||
|
@ -115,14 +115,12 @@ class TraceAutoVectorizationTagValidator {
|
||||||
_tags.set_range(0, TRACE_AUTO_VECTORIZATION_TAG_NUM);
|
_tags.set_range(0, TRACE_AUTO_VECTORIZATION_TAG_NUM);
|
||||||
} else if (SW_VERBOSE == tag) {
|
} else if (SW_VERBOSE == tag) {
|
||||||
_tags.at_put(SW_ALIGNMENT, set_bit);
|
_tags.at_put(SW_ALIGNMENT, set_bit);
|
||||||
_tags.at_put(SW_DEPENDENCE_GRAPH, set_bit);
|
|
||||||
_tags.at_put(SW_ADJACENT_MEMOPS, set_bit);
|
_tags.at_put(SW_ADJACENT_MEMOPS, set_bit);
|
||||||
_tags.at_put(SW_REJECTIONS, set_bit);
|
_tags.at_put(SW_REJECTIONS, set_bit);
|
||||||
_tags.at_put(SW_PACKSET, set_bit);
|
_tags.at_put(SW_PACKSET, set_bit);
|
||||||
_tags.at_put(SW_INFO, set_bit);
|
_tags.at_put(SW_INFO, set_bit);
|
||||||
_tags.at_put(SW_VERBOSE, set_bit);
|
_tags.at_put(SW_VERBOSE, set_bit);
|
||||||
} else if (SW_INFO == tag) {
|
} else if (SW_INFO == tag) {
|
||||||
_tags.at_put(SW_DEPENDENCE_GRAPH, set_bit);
|
|
||||||
_tags.at_put(SW_ADJACENT_MEMOPS, set_bit);
|
_tags.at_put(SW_ADJACENT_MEMOPS, set_bit);
|
||||||
_tags.at_put(SW_REJECTIONS, set_bit);
|
_tags.at_put(SW_REJECTIONS, set_bit);
|
||||||
_tags.at_put(SW_PACKSET, set_bit);
|
_tags.at_put(SW_PACKSET, set_bit);
|
||||||
|
|
|
@ -161,9 +161,170 @@ VStatus VLoopAnalyzer::setup_submodules_helper() {
|
||||||
|
|
||||||
_types.compute_vector_element_type();
|
_types.compute_vector_element_type();
|
||||||
|
|
||||||
|
_dependency_graph.construct();
|
||||||
|
|
||||||
return VStatus::make_success();
|
return VStatus::make_success();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Construct the dependency graph:
|
||||||
|
// - Data-dependencies: implicit (taken from C2 node inputs).
|
||||||
|
// - Memory-dependencies:
|
||||||
|
// - No edges between different slices.
|
||||||
|
// - No Load-Load edges.
|
||||||
|
// - Inside a slice, add all Store-Load, Load-Store, Store-Store edges,
|
||||||
|
// except if we can prove that the memory does not overlap.
|
||||||
|
void VLoopDependencyGraph::construct() {
|
||||||
|
const GrowableArray<PhiNode*>& mem_slice_heads = _memory_slices.heads();
|
||||||
|
const GrowableArray<MemNode*>& mem_slice_tails = _memory_slices.tails();
|
||||||
|
|
||||||
|
ResourceMark rm;
|
||||||
|
GrowableArray<MemNode*> slice_nodes;
|
||||||
|
GrowableArray<int> memory_pred_edges;
|
||||||
|
|
||||||
|
// For each memory slice, create the memory subgraph
|
||||||
|
for (int i = 0; i < mem_slice_heads.length(); i++) {
|
||||||
|
PhiNode* head = mem_slice_heads.at(i);
|
||||||
|
MemNode* tail = mem_slice_tails.at(i);
|
||||||
|
|
||||||
|
_memory_slices.get_slice_in_reverse_order(head, tail, slice_nodes);
|
||||||
|
|
||||||
|
// In forward order (reverse of reverse), visit all memory nodes in the slice.
|
||||||
|
for (int j = slice_nodes.length() - 1; j >= 0 ; j--) {
|
||||||
|
MemNode* n1 = slice_nodes.at(j);
|
||||||
|
memory_pred_edges.clear();
|
||||||
|
|
||||||
|
VPointer p1(n1, _vloop);
|
||||||
|
// For all memory nodes before it, check if we need to add a memory edge.
|
||||||
|
for (int k = slice_nodes.length() - 1; k > j; k--) {
|
||||||
|
MemNode* n2 = slice_nodes.at(k);
|
||||||
|
|
||||||
|
// Ignore Load-Load dependencies:
|
||||||
|
if (n1->is_Load() && n2->is_Load()) { continue; }
|
||||||
|
|
||||||
|
VPointer p2(n2, _vloop);
|
||||||
|
if (!VPointer::not_equal(p1.cmp(p2))) {
|
||||||
|
// Possibly overlapping memory
|
||||||
|
memory_pred_edges.append(_body.bb_idx(n2));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (memory_pred_edges.is_nonempty()) {
|
||||||
|
// Data edges are taken implicitly from the C2 graph, thus we only add
|
||||||
|
// a dependency node if we have memory edges.
|
||||||
|
add_node(n1, memory_pred_edges);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
slice_nodes.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
compute_depth();
|
||||||
|
|
||||||
|
NOT_PRODUCT( if (_vloop.is_trace_dependency_graph()) { print(); } )
|
||||||
|
}
|
||||||
|
|
||||||
|
void VLoopDependencyGraph::add_node(MemNode* n, GrowableArray<int>& memory_pred_edges) {
|
||||||
|
assert(_dependency_nodes.at_grow(_body.bb_idx(n), nullptr) == nullptr, "not yet created");
|
||||||
|
assert(!memory_pred_edges.is_empty(), "no need to create a node without edges");
|
||||||
|
DependencyNode* dn = new (_arena) DependencyNode(n, memory_pred_edges, _arena);
|
||||||
|
_dependency_nodes.at_put_grow(_body.bb_idx(n), dn, nullptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
// We iterate over the body, which is already ordered by the dependencies, i.e. pred comes
|
||||||
|
// before use. With a single pass, we can compute the depth of every node, since we can
|
||||||
|
// assume that the depth of all preds is already computed when we compute the depth of use.
|
||||||
|
void VLoopDependencyGraph::compute_depth() {
|
||||||
|
for (int i = 0; i < _body.body().length(); i++) {
|
||||||
|
Node* n = _body.body().at(i);
|
||||||
|
int max_pred_depth = 0;
|
||||||
|
if (n->is_Phi()) {
|
||||||
|
for (PredsIterator it(*this, n); !it.done(); it.next()) {
|
||||||
|
Node* pred = it.current();
|
||||||
|
if (_vloop.in_bb(pred)) {
|
||||||
|
max_pred_depth = MAX2(max_pred_depth, depth(pred));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
set_depth(n, max_pred_depth + 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifndef PRODUCT
|
||||||
|
void VLoopDependencyGraph::print() const {
|
||||||
|
tty->print_cr("\nVLoopDependencyGraph::print:");
|
||||||
|
|
||||||
|
tty->print_cr(" Memory pred edges:");
|
||||||
|
for (int i = 0; i < _body.body().length(); i++) {
|
||||||
|
Node* n = _body.body().at(i);
|
||||||
|
const DependencyNode* dn = dependency_node(n);
|
||||||
|
if (dn != nullptr) {
|
||||||
|
tty->print(" DependencyNode[%d %s:", n->_idx, n->Name());
|
||||||
|
for (uint j = 0; j < dn->memory_pred_edges_length(); j++) {
|
||||||
|
Node* pred = _body.body().at(dn->memory_pred_edge(j));
|
||||||
|
tty->print(" %d %s", pred->_idx, pred->Name());
|
||||||
|
}
|
||||||
|
tty->print_cr("]");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
tty->cr();
|
||||||
|
|
||||||
|
tty->print_cr(" Complete dependency graph:");
|
||||||
|
for (int i = 0; i < _body.body().length(); i++) {
|
||||||
|
Node* n = _body.body().at(i);
|
||||||
|
tty->print(" d%02d Dependencies[%d %s:", depth(n), n->_idx, n->Name());
|
||||||
|
for (PredsIterator it(*this, n); !it.done(); it.next()) {
|
||||||
|
Node* pred = it.current();
|
||||||
|
tty->print(" %d %s", pred->_idx, pred->Name());
|
||||||
|
}
|
||||||
|
tty->print_cr("]");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
VLoopDependencyGraph::DependencyNode::DependencyNode(MemNode* n,
|
||||||
|
GrowableArray<int>& memory_pred_edges,
|
||||||
|
Arena* arena) :
|
||||||
|
_node(n),
|
||||||
|
_memory_pred_edges_length(memory_pred_edges.length()),
|
||||||
|
_memory_pred_edges(nullptr)
|
||||||
|
{
|
||||||
|
assert(memory_pred_edges.is_nonempty(), "not empty");
|
||||||
|
uint bytes = memory_pred_edges.length() * sizeof(int);
|
||||||
|
_memory_pred_edges = (int*)arena->Amalloc(bytes);
|
||||||
|
memcpy(_memory_pred_edges, memory_pred_edges.adr_at(0), bytes);
|
||||||
|
}
|
||||||
|
|
||||||
|
VLoopDependencyGraph::PredsIterator::PredsIterator(const VLoopDependencyGraph& dependency_graph,
|
||||||
|
const Node* node) :
|
||||||
|
_dependency_graph(dependency_graph),
|
||||||
|
_node(node),
|
||||||
|
_dependency_node(dependency_graph.dependency_node(node)),
|
||||||
|
_current(nullptr),
|
||||||
|
_next_pred(0),
|
||||||
|
_end_pred(node->req()),
|
||||||
|
_next_memory_pred(0),
|
||||||
|
_end_memory_pred((_dependency_node != nullptr) ? _dependency_node->memory_pred_edges_length() : 0)
|
||||||
|
{
|
||||||
|
if (_node->is_Store() || _node->is_Load()) {
|
||||||
|
// Load: address
|
||||||
|
// Store: address, value
|
||||||
|
_next_pred = MemNode::Address;
|
||||||
|
} else {
|
||||||
|
assert(!_node->is_Mem(), "only loads and stores are expected mem nodes");
|
||||||
|
_next_pred = 1; // skip control
|
||||||
|
}
|
||||||
|
next();
|
||||||
|
}
|
||||||
|
|
||||||
|
void VLoopDependencyGraph::PredsIterator::next() {
|
||||||
|
if (_next_pred < _end_pred) {
|
||||||
|
_current = _node->in(_next_pred++);
|
||||||
|
} else if (_next_memory_pred < _end_memory_pred) {
|
||||||
|
int pred_bb_idx = _dependency_node->memory_pred_edge(_next_memory_pred++);
|
||||||
|
_current = _dependency_graph._body.body().at(pred_bb_idx);
|
||||||
|
} else {
|
||||||
|
_current = nullptr; // done
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#ifndef PRODUCT
|
#ifndef PRODUCT
|
||||||
int VPointer::Tracer::_depth = 0;
|
int VPointer::Tracer::_depth = 0;
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -150,6 +150,10 @@ public:
|
||||||
return _vtrace.is_trace(TraceAutoVectorizationTag::TYPES);
|
return _vtrace.is_trace(TraceAutoVectorizationTag::TYPES);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool is_trace_dependency_graph() const {
|
||||||
|
return _vtrace.is_trace(TraceAutoVectorizationTag::DEPENDENCY_GRAPH);
|
||||||
|
}
|
||||||
|
|
||||||
bool is_trace_pointer_analysis() const {
|
bool is_trace_pointer_analysis() const {
|
||||||
return _vtrace.is_trace(TraceAutoVectorizationTag::POINTER_ANALYSIS);
|
return _vtrace.is_trace(TraceAutoVectorizationTag::POINTER_ANALYSIS);
|
||||||
}
|
}
|
||||||
|
@ -308,7 +312,7 @@ public:
|
||||||
const GrowableArray<MemNode*>& tails() const { return _tails; }
|
const GrowableArray<MemNode*>& tails() const { return _tails; }
|
||||||
|
|
||||||
// Get all memory nodes of a slice, in reverse order
|
// Get all memory nodes of a slice, in reverse order
|
||||||
void get_slice_in_reverse_order(PhiNode* head, MemNode* tail, GrowableArray<Node*>& slice) const;
|
void get_slice_in_reverse_order(PhiNode* head, MemNode* tail, GrowableArray<MemNode*>& slice) const;
|
||||||
|
|
||||||
bool same_memory_slice(MemNode* m1, MemNode* m2) const;
|
bool same_memory_slice(MemNode* m1, MemNode* m2) const;
|
||||||
|
|
||||||
|
@ -441,6 +445,109 @@ private:
|
||||||
const Type* container_type(Node* n) const;
|
const Type* container_type(Node* n) const;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Submodule of VLoopAnalyzer.
|
||||||
|
// The dependency graph is used to determine if nodes are independent, and can thus potentially
|
||||||
|
// be executed in parallel. That is a prerequisite for packing nodes into vector operations.
|
||||||
|
// The dependency graph is a combination:
|
||||||
|
// - Data-dependencies: they can directly be taken from the C2 node inputs.
|
||||||
|
// - Memory-dependencies: the edges in the C2 memory-slice are too restrictive: for example all
|
||||||
|
// stores are serialized, even if their memory does not overlap. Thus,
|
||||||
|
// we refine the memory-dependencies (see construct method).
|
||||||
|
class VLoopDependencyGraph : public StackObj {
|
||||||
|
private:
|
||||||
|
class DependencyNode;
|
||||||
|
|
||||||
|
Arena* _arena;
|
||||||
|
const VLoop& _vloop;
|
||||||
|
const VLoopBody& _body;
|
||||||
|
const VLoopMemorySlices& _memory_slices;
|
||||||
|
|
||||||
|
// bb_idx -> DependenceNode*
|
||||||
|
GrowableArray<DependencyNode*> _dependency_nodes;
|
||||||
|
|
||||||
|
// Node depth in DAG: bb_idx -> depth
|
||||||
|
GrowableArray<int> _depths;
|
||||||
|
|
||||||
|
public:
|
||||||
|
VLoopDependencyGraph(Arena* arena,
|
||||||
|
const VLoop& vloop,
|
||||||
|
const VLoopBody& body,
|
||||||
|
const VLoopMemorySlices& memory_slices) :
|
||||||
|
_arena(arena),
|
||||||
|
_vloop(vloop),
|
||||||
|
_body(body),
|
||||||
|
_memory_slices(memory_slices),
|
||||||
|
_dependency_nodes(arena,
|
||||||
|
vloop.estimated_body_length(),
|
||||||
|
vloop.estimated_body_length(),
|
||||||
|
nullptr),
|
||||||
|
_depths(arena,
|
||||||
|
vloop.estimated_body_length(),
|
||||||
|
vloop.estimated_body_length(),
|
||||||
|
0) {}
|
||||||
|
NONCOPYABLE(VLoopDependencyGraph);
|
||||||
|
|
||||||
|
void construct();
|
||||||
|
bool independent(Node* s1, Node* s2) const;
|
||||||
|
bool mutually_independent(const Node_List* nodes) const;
|
||||||
|
|
||||||
|
private:
|
||||||
|
void add_node(MemNode* n, GrowableArray<int>& memory_pred_edges);
|
||||||
|
int depth(const Node* n) const { return _depths.at(_body.bb_idx(n)); }
|
||||||
|
void set_depth(const Node* n, int d) { _depths.at_put(_body.bb_idx(n), d); }
|
||||||
|
void compute_depth();
|
||||||
|
NOT_PRODUCT( void print() const; )
|
||||||
|
|
||||||
|
const DependencyNode* dependency_node(const Node* n) const {
|
||||||
|
return _dependency_nodes.at(_body.bb_idx(n));
|
||||||
|
}
|
||||||
|
|
||||||
|
class DependencyNode : public ArenaObj {
|
||||||
|
private:
|
||||||
|
MemNode* _node; // Corresponding ideal node
|
||||||
|
const uint _memory_pred_edges_length;
|
||||||
|
int* _memory_pred_edges; // memory pred-edges, mapping to bb_idx
|
||||||
|
public:
|
||||||
|
DependencyNode(MemNode* n, GrowableArray<int>& memory_pred_edges, Arena* arena);
|
||||||
|
NONCOPYABLE(DependencyNode);
|
||||||
|
uint memory_pred_edges_length() const { return _memory_pred_edges_length; }
|
||||||
|
|
||||||
|
int memory_pred_edge(uint i) const {
|
||||||
|
assert(i < _memory_pred_edges_length, "bounds check");
|
||||||
|
return _memory_pred_edges[i];
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
public:
|
||||||
|
// Iterator for dependency graph predecessors of a node.
|
||||||
|
class PredsIterator : public StackObj {
|
||||||
|
private:
|
||||||
|
const VLoopDependencyGraph& _dependency_graph;
|
||||||
|
|
||||||
|
const Node* _node;
|
||||||
|
const DependencyNode* _dependency_node;
|
||||||
|
|
||||||
|
Node* _current;
|
||||||
|
|
||||||
|
// Iterate in node->in(i)
|
||||||
|
int _next_pred;
|
||||||
|
int _end_pred;
|
||||||
|
|
||||||
|
// Iterate in dependency_node->memory_pred_edge(i)
|
||||||
|
int _next_memory_pred;
|
||||||
|
int _end_memory_pred;
|
||||||
|
public:
|
||||||
|
PredsIterator(const VLoopDependencyGraph& dependency_graph, const Node* node);
|
||||||
|
NONCOPYABLE(PredsIterator);
|
||||||
|
void next();
|
||||||
|
bool done() const { return _current == nullptr; }
|
||||||
|
Node* current() const {
|
||||||
|
assert(!done(), "not done yet");
|
||||||
|
return _current;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
// Analyze the loop in preparation for auto-vectorization. This class is
|
// Analyze the loop in preparation for auto-vectorization. This class is
|
||||||
// deliberately structured into many submodules, which are as independent
|
// deliberately structured into many submodules, which are as independent
|
||||||
// as possible, though some submodules do require other submodules.
|
// as possible, though some submodules do require other submodules.
|
||||||
|
@ -463,6 +570,7 @@ private:
|
||||||
VLoopMemorySlices _memory_slices;
|
VLoopMemorySlices _memory_slices;
|
||||||
VLoopBody _body;
|
VLoopBody _body;
|
||||||
VLoopTypes _types;
|
VLoopTypes _types;
|
||||||
|
VLoopDependencyGraph _dependency_graph;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
VLoopAnalyzer(const VLoop& vloop, VSharedData& vshared) :
|
VLoopAnalyzer(const VLoop& vloop, VSharedData& vshared) :
|
||||||
|
@ -472,7 +580,8 @@ public:
|
||||||
_reductions (&_arena, vloop),
|
_reductions (&_arena, vloop),
|
||||||
_memory_slices (&_arena, vloop),
|
_memory_slices (&_arena, vloop),
|
||||||
_body (&_arena, vloop, vshared),
|
_body (&_arena, vloop, vshared),
|
||||||
_types (&_arena, vloop, _body)
|
_types (&_arena, vloop, _body),
|
||||||
|
_dependency_graph(&_arena, vloop, _body, _memory_slices)
|
||||||
{
|
{
|
||||||
_success = setup_submodules();
|
_success = setup_submodules();
|
||||||
}
|
}
|
||||||
|
@ -486,6 +595,7 @@ public:
|
||||||
const VLoopMemorySlices& memory_slices() const { return _memory_slices; }
|
const VLoopMemorySlices& memory_slices() const { return _memory_slices; }
|
||||||
const VLoopBody& body() const { return _body; }
|
const VLoopBody& body() const { return _body; }
|
||||||
const VLoopTypes& types() const { return _types; }
|
const VLoopTypes& types() const { return _types; }
|
||||||
|
const VLoopDependencyGraph& dependency_graph() const { return _dependency_graph; }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
bool setup_submodules();
|
bool setup_submodules();
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue