mirror of
https://github.com/openjdk/jdk.git
synced 2025-09-21 03:24:38 +02:00
6743900: frequency based block layout
Post-register allocation pass that drives block layout by edge frequencies Reviewed-by: never, kvn
This commit is contained in:
parent
7bcfb5965d
commit
0e63b7609a
9 changed files with 1003 additions and 134 deletions
|
@ -57,6 +57,14 @@ void Block_List::insert(uint i, Block *b) {
|
|||
_blocks[i] = b;
|
||||
}
|
||||
|
||||
#ifndef PRODUCT
|
||||
void Block_List::print() {
|
||||
for (uint i=0; i < size(); i++) {
|
||||
tty->print("B%d ", _blocks[i]->_pre_order);
|
||||
}
|
||||
tty->print("size = %d\n", size());
|
||||
}
|
||||
#endif
|
||||
|
||||
//=============================================================================
|
||||
|
||||
|
@ -66,6 +74,12 @@ uint Block::code_alignment() {
|
|||
// Check for Start block
|
||||
if( _pre_order == 1 ) return InteriorEntryAlignment;
|
||||
// Check for loop alignment
|
||||
if (has_loop_alignment()) return loop_alignment();
|
||||
|
||||
return 1; // no particular alignment
|
||||
}
|
||||
|
||||
uint Block::compute_loop_alignment() {
|
||||
Node *h = head();
|
||||
if( h->is_Loop() && h->as_Loop()->is_inner_loop() ) {
|
||||
// Pre- and post-loops have low trip count so do not bother with
|
||||
|
@ -83,13 +97,15 @@ uint Block::code_alignment() {
|
|||
}
|
||||
return OptoLoopAlignment; // Otherwise align loop head
|
||||
}
|
||||
|
||||
return 1; // no particular alignment
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Compute the size of first 'inst_cnt' instructions in this block.
|
||||
// Return the number of instructions left to compute if the block has
|
||||
// less then 'inst_cnt' instructions.
|
||||
// less then 'inst_cnt' instructions. Stop, and return 0 if sum_size
|
||||
// exceeds OptoLoopAlignment.
|
||||
uint Block::compute_first_inst_size(uint& sum_size, uint inst_cnt,
|
||||
PhaseRegAlloc* ra) {
|
||||
uint last_inst = _nodes.size();
|
||||
|
@ -307,6 +323,8 @@ void Block::dump_head( const Block_Array *bbs ) const {
|
|||
tty->print("\tLoop: B%d-B%d ", bhead->_pre_order, bx->_pre_order);
|
||||
// Dump any loop-specific bits, especially for CountedLoops.
|
||||
loop->dump_spec(tty);
|
||||
} else if (has_loop_alignment()) {
|
||||
tty->print(" top-of-loop");
|
||||
}
|
||||
tty->print(" Freq: %g",_freq);
|
||||
if( Verbose || WizardMode ) {
|
||||
|
@ -509,9 +527,11 @@ static bool no_flip_branch( Block *b ) {
|
|||
int branch_idx = b->_nodes.size() - b->_num_succs-1;
|
||||
if( branch_idx < 1 ) return false;
|
||||
Node *bra = b->_nodes[branch_idx];
|
||||
if( bra->is_Catch() ) return true;
|
||||
if( bra->is_Catch() )
|
||||
return true;
|
||||
if( bra->is_Mach() ) {
|
||||
if( bra->is_MachNullCheck() ) return true;
|
||||
if( bra->is_MachNullCheck() )
|
||||
return true;
|
||||
int iop = bra->as_Mach()->ideal_Opcode();
|
||||
if( iop == Op_FastLock || iop == Op_FastUnlock )
|
||||
return true;
|
||||
|
@ -557,10 +577,10 @@ void PhaseCFG::convert_NeverBranch_to_Goto(Block *b) {
|
|||
dead->_nodes[k]->del_req(j);
|
||||
}
|
||||
|
||||
//------------------------------MoveToNext-------------------------------------
|
||||
//------------------------------move_to_next-----------------------------------
|
||||
// Helper function to move block bx to the slot following b_index. Return
|
||||
// true if the move is successful, otherwise false
|
||||
bool PhaseCFG::MoveToNext(Block* bx, uint b_index) {
|
||||
bool PhaseCFG::move_to_next(Block* bx, uint b_index) {
|
||||
if (bx == NULL) return false;
|
||||
|
||||
// Return false if bx is already scheduled.
|
||||
|
@ -591,9 +611,9 @@ bool PhaseCFG::MoveToNext(Block* bx, uint b_index) {
|
|||
return true;
|
||||
}
|
||||
|
||||
//------------------------------MoveToEnd--------------------------------------
|
||||
//------------------------------move_to_end------------------------------------
|
||||
// Move empty and uncommon blocks to the end.
|
||||
void PhaseCFG::MoveToEnd(Block *b, uint i) {
|
||||
void PhaseCFG::move_to_end(Block *b, uint i) {
|
||||
int e = b->is_Empty();
|
||||
if (e != Block::not_empty) {
|
||||
if (e == Block::empty_with_goto) {
|
||||
|
@ -609,15 +629,31 @@ void PhaseCFG::MoveToEnd(Block *b, uint i) {
|
|||
_blocks.push(b);
|
||||
}
|
||||
|
||||
//------------------------------RemoveEmpty------------------------------------
|
||||
// Remove empty basic blocks and useless branches.
|
||||
void PhaseCFG::RemoveEmpty() {
|
||||
//---------------------------set_loop_alignment--------------------------------
|
||||
// Set loop alignment for every block
|
||||
void PhaseCFG::set_loop_alignment() {
|
||||
uint last = _num_blocks;
|
||||
assert( _blocks[0] == _broot, "" );
|
||||
|
||||
for (uint i = 1; i < last; i++ ) {
|
||||
Block *b = _blocks[i];
|
||||
if (b->head()->is_Loop()) {
|
||||
b->set_loop_alignment(b);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//-----------------------------remove_empty------------------------------------
|
||||
// Make empty basic blocks to be "connector" blocks, Move uncommon blocks
|
||||
// to the end.
|
||||
void PhaseCFG::remove_empty() {
|
||||
// Move uncommon blocks to the end
|
||||
uint last = _num_blocks;
|
||||
uint i;
|
||||
assert( _blocks[0] == _broot, "" );
|
||||
for( i = 1; i < last; i++ ) {
|
||||
|
||||
for (uint i = 1; i < last; i++) {
|
||||
Block *b = _blocks[i];
|
||||
if (b->is_connector()) break;
|
||||
|
||||
// Check for NeverBranch at block end. This needs to become a GOTO to the
|
||||
// true target. NeverBranch are treated as a conditional branch that
|
||||
|
@ -629,37 +665,40 @@ void PhaseCFG::RemoveEmpty() {
|
|||
convert_NeverBranch_to_Goto(b);
|
||||
|
||||
// Look for uncommon blocks and move to end.
|
||||
if (!C->do_freq_based_layout()) {
|
||||
if( b->is_uncommon(_bbs) ) {
|
||||
MoveToEnd(b, i);
|
||||
move_to_end(b, i);
|
||||
last--; // No longer check for being uncommon!
|
||||
if( no_flip_branch(b) ) { // Fall-thru case must follow?
|
||||
b = _blocks[i]; // Find the fall-thru block
|
||||
MoveToEnd(b, i);
|
||||
move_to_end(b, i);
|
||||
last--;
|
||||
}
|
||||
i--; // backup block counter post-increment
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Remove empty blocks
|
||||
uint j1;
|
||||
// Move empty blocks to the end
|
||||
last = _num_blocks;
|
||||
for( i=0; i < last; i++ ) {
|
||||
for (uint i = 1; i < last; i++) {
|
||||
Block *b = _blocks[i];
|
||||
if (i > 0) {
|
||||
if (b->is_Empty() != Block::not_empty) {
|
||||
MoveToEnd(b, i);
|
||||
move_to_end(b, i);
|
||||
last--;
|
||||
i--;
|
||||
}
|
||||
}
|
||||
} // End of for all blocks
|
||||
}
|
||||
|
||||
//-----------------------------fixup_flow--------------------------------------
|
||||
// Fix up the final control flow for basic blocks.
|
||||
void PhaseCFG::fixup_flow() {
|
||||
// Fixup final control flow for the blocks. Remove jump-to-next
|
||||
// block. If neither arm of a IF follows the conditional branch, we
|
||||
// have to add a second jump after the conditional. We place the
|
||||
// TRUE branch target in succs[0] for both GOTOs and IFs.
|
||||
for( i=0; i < _num_blocks; i++ ) {
|
||||
for (uint i=0; i < _num_blocks; i++) {
|
||||
Block *b = _blocks[i];
|
||||
b->_pre_order = i; // turn pre-order into block-index
|
||||
|
||||
|
@ -700,7 +739,7 @@ void PhaseCFG::RemoveEmpty() {
|
|||
}
|
||||
}
|
||||
// Remove all CatchProjs
|
||||
for (j1 = 0; j1 < b->_num_succs; j1++) b->_nodes.pop();
|
||||
for (uint j1 = 0; j1 < b->_num_succs; j1++) b->_nodes.pop();
|
||||
|
||||
} else if (b->_num_succs == 1) {
|
||||
// Block ends in a Goto?
|
||||
|
@ -730,8 +769,7 @@ void PhaseCFG::RemoveEmpty() {
|
|||
// successors after the current one, provided that the
|
||||
// successor was previously unscheduled, but moveable
|
||||
// (i.e., all paths to it involve a branch).
|
||||
if( bnext != bs0 && bnext != bs1 ) {
|
||||
|
||||
if( !C->do_freq_based_layout() && bnext != bs0 && bnext != bs1 ) {
|
||||
// Choose the more common successor based on the probability
|
||||
// of the conditional branch.
|
||||
Block *bx = bs0;
|
||||
|
@ -751,9 +789,9 @@ void PhaseCFG::RemoveEmpty() {
|
|||
}
|
||||
|
||||
// Attempt the more common successor first
|
||||
if (MoveToNext(bx, i)) {
|
||||
if (move_to_next(bx, i)) {
|
||||
bnext = bx;
|
||||
} else if (MoveToNext(by, i)) {
|
||||
} else if (move_to_next(by, i)) {
|
||||
bnext = by;
|
||||
}
|
||||
}
|
||||
|
@ -774,10 +812,8 @@ void PhaseCFG::RemoveEmpty() {
|
|||
// Flip projection for each target
|
||||
{ ProjNode *tmp = proj0; proj0 = proj1; proj1 = tmp; }
|
||||
|
||||
} else if( bnext == bs1 ) { // Fall-thru is already in succs[1]
|
||||
|
||||
} else { // Else need a double-branch
|
||||
|
||||
} else if( bnext != bs1 ) {
|
||||
// Need a double-branch
|
||||
// The existing conditional branch need not change.
|
||||
// Add a unconditional branch to the false target.
|
||||
// Alas, it must appear in its own block and adding a
|
||||
|
@ -786,8 +822,9 @@ void PhaseCFG::RemoveEmpty() {
|
|||
}
|
||||
|
||||
// Make sure we TRUE branch to the target
|
||||
if( proj0->Opcode() == Op_IfFalse )
|
||||
if( proj0->Opcode() == Op_IfFalse ) {
|
||||
iff->negate();
|
||||
}
|
||||
|
||||
b->_nodes.pop(); // Remove IfFalse & IfTrue projections
|
||||
b->_nodes.pop();
|
||||
|
@ -796,9 +833,7 @@ void PhaseCFG::RemoveEmpty() {
|
|||
// Multi-exit block, e.g. a switch statement
|
||||
// But we don't need to do anything here
|
||||
}
|
||||
|
||||
} // End of for all blocks
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
@ -905,7 +940,7 @@ void UnionFind::reset( uint max ) {
|
|||
// Force the Union-Find mapping to be at least this large
|
||||
extend(max,0);
|
||||
// Initialize to be the ID mapping.
|
||||
for( uint i=0; i<_max; i++ ) map(i,i);
|
||||
for( uint i=0; i<max; i++ ) map(i,i);
|
||||
}
|
||||
|
||||
//------------------------------Find_compress----------------------------------
|
||||
|
@ -937,7 +972,6 @@ uint UnionFind::Find_const( uint idx ) const {
|
|||
if( idx >= _max ) return idx;
|
||||
uint next = lookup(idx);
|
||||
while( next != idx ) { // Scan chain of equivalences
|
||||
assert( next < idx, "always union smaller" );
|
||||
idx = next; // until find a fixed-point
|
||||
next = lookup(idx);
|
||||
}
|
||||
|
@ -956,3 +990,491 @@ void UnionFind::Union( uint idx1, uint idx2 ) {
|
|||
assert( src < dst, "always union smaller" );
|
||||
map(dst,src);
|
||||
}
|
||||
|
||||
#ifndef PRODUCT
|
||||
static void edge_dump(GrowableArray<CFGEdge *> *edges) {
|
||||
tty->print_cr("---- Edges ----");
|
||||
for (int i = 0; i < edges->length(); i++) {
|
||||
CFGEdge *e = edges->at(i);
|
||||
if (e != NULL) {
|
||||
edges->at(i)->dump();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void trace_dump(Trace *traces[], int count) {
|
||||
tty->print_cr("---- Traces ----");
|
||||
for (int i = 0; i < count; i++) {
|
||||
Trace *tr = traces[i];
|
||||
if (tr != NULL) {
|
||||
tr->dump();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Trace::dump( ) const {
|
||||
tty->print_cr("Trace (freq %f)", first_block()->_freq);
|
||||
for (Block *b = first_block(); b != NULL; b = next(b)) {
|
||||
tty->print(" B%d", b->_pre_order);
|
||||
if (b->head()->is_Loop()) {
|
||||
tty->print(" (L%d)", b->compute_loop_alignment());
|
||||
}
|
||||
if (b->has_loop_alignment()) {
|
||||
tty->print(" (T%d)", b->code_alignment());
|
||||
}
|
||||
}
|
||||
tty->cr();
|
||||
}
|
||||
|
||||
void CFGEdge::dump( ) const {
|
||||
tty->print(" B%d --> B%d Freq: %f out:%3d%% in:%3d%% State: ",
|
||||
from()->_pre_order, to()->_pre_order, freq(), _from_pct, _to_pct);
|
||||
switch(state()) {
|
||||
case connected:
|
||||
tty->print("connected");
|
||||
break;
|
||||
case open:
|
||||
tty->print("open");
|
||||
break;
|
||||
case interior:
|
||||
tty->print("interior");
|
||||
break;
|
||||
}
|
||||
if (infrequent()) {
|
||||
tty->print(" infrequent");
|
||||
}
|
||||
tty->cr();
|
||||
}
|
||||
#endif
|
||||
|
||||
//=============================================================================
|
||||
|
||||
//------------------------------edge_order-------------------------------------
|
||||
// Comparison function for edges
|
||||
static int edge_order(CFGEdge **e0, CFGEdge **e1) {
|
||||
float freq0 = (*e0)->freq();
|
||||
float freq1 = (*e1)->freq();
|
||||
if (freq0 != freq1) {
|
||||
return freq0 > freq1 ? -1 : 1;
|
||||
}
|
||||
|
||||
int dist0 = (*e0)->to()->_rpo - (*e0)->from()->_rpo;
|
||||
int dist1 = (*e1)->to()->_rpo - (*e1)->from()->_rpo;
|
||||
|
||||
return dist1 - dist0;
|
||||
}
|
||||
|
||||
//------------------------------trace_frequency_order--------------------------
|
||||
// Comparison function for edges
|
||||
static int trace_frequency_order(const void *p0, const void *p1) {
|
||||
Trace *tr0 = *(Trace **) p0;
|
||||
Trace *tr1 = *(Trace **) p1;
|
||||
Block *b0 = tr0->first_block();
|
||||
Block *b1 = tr1->first_block();
|
||||
|
||||
// The trace of connector blocks goes at the end;
|
||||
// we only expect one such trace
|
||||
if (b0->is_connector() != b1->is_connector()) {
|
||||
return b1->is_connector() ? -1 : 1;
|
||||
}
|
||||
|
||||
// Pull more frequently executed blocks to the beginning
|
||||
float freq0 = b0->_freq;
|
||||
float freq1 = b1->_freq;
|
||||
if (freq0 != freq1) {
|
||||
return freq0 > freq1 ? -1 : 1;
|
||||
}
|
||||
|
||||
int diff = tr0->first_block()->_rpo - tr1->first_block()->_rpo;
|
||||
|
||||
return diff;
|
||||
}
|
||||
|
||||
//------------------------------find_edges-------------------------------------
|
||||
// Find edges of interest, i.e, those which can fall through. Presumes that
|
||||
// edges which don't fall through are of low frequency and can be generally
|
||||
// ignored. Initialize the list of traces.
|
||||
void PhaseBlockLayout::find_edges()
|
||||
{
|
||||
// Walk the blocks, creating edges and Traces
|
||||
uint i;
|
||||
Trace *tr = NULL;
|
||||
for (i = 0; i < _cfg._num_blocks; i++) {
|
||||
Block *b = _cfg._blocks[i];
|
||||
tr = new Trace(b, next, prev);
|
||||
traces[tr->id()] = tr;
|
||||
|
||||
// All connector blocks should be at the end of the list
|
||||
if (b->is_connector()) break;
|
||||
|
||||
// If this block and the next one have a one-to-one successor
|
||||
// predecessor relationship, simply append the next block
|
||||
int nfallthru = b->num_fall_throughs();
|
||||
while (nfallthru == 1 &&
|
||||
b->succ_fall_through(0)) {
|
||||
Block *n = b->_succs[0];
|
||||
|
||||
// Skip over single-entry connector blocks, we don't want to
|
||||
// add them to the trace.
|
||||
while (n->is_connector() && n->num_preds() == 1) {
|
||||
n = n->_succs[0];
|
||||
}
|
||||
|
||||
// We see a merge point, so stop search for the next block
|
||||
if (n->num_preds() != 1) break;
|
||||
|
||||
i++;
|
||||
assert(n = _cfg._blocks[i], "expecting next block");
|
||||
tr->append(n);
|
||||
uf->map(n->_pre_order, tr->id());
|
||||
traces[n->_pre_order] = NULL;
|
||||
nfallthru = b->num_fall_throughs();
|
||||
b = n;
|
||||
}
|
||||
|
||||
if (nfallthru > 0) {
|
||||
// Create a CFGEdge for each outgoing
|
||||
// edge that could be a fall-through.
|
||||
for (uint j = 0; j < b->_num_succs; j++ ) {
|
||||
if (b->succ_fall_through(j)) {
|
||||
Block *target = b->non_connector_successor(j);
|
||||
float freq = b->_freq * b->succ_prob(j);
|
||||
int from_pct = (int) ((100 * freq) / b->_freq);
|
||||
int to_pct = (int) ((100 * freq) / target->_freq);
|
||||
edges->append(new CFGEdge(b, target, freq, from_pct, to_pct));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Group connector blocks into one trace
|
||||
for (i++; i < _cfg._num_blocks; i++) {
|
||||
Block *b = _cfg._blocks[i];
|
||||
assert(b->is_connector(), "connector blocks at the end");
|
||||
tr->append(b);
|
||||
uf->map(b->_pre_order, tr->id());
|
||||
traces[b->_pre_order] = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------union_traces----------------------------------
|
||||
// Union two traces together in uf, and null out the trace in the list
|
||||
void PhaseBlockLayout::union_traces(Trace* updated_trace, Trace* old_trace)
|
||||
{
|
||||
uint old_id = old_trace->id();
|
||||
uint updated_id = updated_trace->id();
|
||||
|
||||
uint lo_id = updated_id;
|
||||
uint hi_id = old_id;
|
||||
|
||||
// If from is greater than to, swap values to meet
|
||||
// UnionFind guarantee.
|
||||
if (updated_id > old_id) {
|
||||
lo_id = old_id;
|
||||
hi_id = updated_id;
|
||||
|
||||
// Fix up the trace ids
|
||||
traces[lo_id] = traces[updated_id];
|
||||
updated_trace->set_id(lo_id);
|
||||
}
|
||||
|
||||
// Union the lower with the higher and remove the pointer
|
||||
// to the higher.
|
||||
uf->Union(lo_id, hi_id);
|
||||
traces[hi_id] = NULL;
|
||||
}
|
||||
|
||||
//------------------------------grow_traces-------------------------------------
|
||||
// Append traces together via the most frequently executed edges
|
||||
void PhaseBlockLayout::grow_traces()
|
||||
{
|
||||
// Order the edges, and drive the growth of Traces via the most
|
||||
// frequently executed edges.
|
||||
edges->sort(edge_order);
|
||||
for (int i = 0; i < edges->length(); i++) {
|
||||
CFGEdge *e = edges->at(i);
|
||||
|
||||
if (e->state() != CFGEdge::open) continue;
|
||||
|
||||
Block *src_block = e->from();
|
||||
Block *targ_block = e->to();
|
||||
|
||||
// Don't grow traces along backedges?
|
||||
if (!BlockLayoutRotateLoops) {
|
||||
if (targ_block->_rpo <= src_block->_rpo) {
|
||||
targ_block->set_loop_alignment(targ_block);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
Trace *src_trace = trace(src_block);
|
||||
Trace *targ_trace = trace(targ_block);
|
||||
|
||||
// If the edge in question can join two traces at their ends,
|
||||
// append one trace to the other.
|
||||
if (src_trace->last_block() == src_block) {
|
||||
if (src_trace == targ_trace) {
|
||||
e->set_state(CFGEdge::interior);
|
||||
if (targ_trace->backedge(e)) {
|
||||
// Reset i to catch any newly eligible edge
|
||||
// (Or we could remember the first "open" edge, and reset there)
|
||||
i = 0;
|
||||
}
|
||||
} else if (targ_trace->first_block() == targ_block) {
|
||||
e->set_state(CFGEdge::connected);
|
||||
src_trace->append(targ_trace);
|
||||
union_traces(src_trace, targ_trace);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------merge_traces-----------------------------------
|
||||
// Embed one trace into another, if the fork or join points are sufficiently
|
||||
// balanced.
|
||||
void PhaseBlockLayout::merge_traces(bool fall_thru_only)
|
||||
{
|
||||
// Walk the edge list a another time, looking at unprocessed edges.
|
||||
// Fold in diamonds
|
||||
for (int i = 0; i < edges->length(); i++) {
|
||||
CFGEdge *e = edges->at(i);
|
||||
|
||||
if (e->state() != CFGEdge::open) continue;
|
||||
if (fall_thru_only) {
|
||||
if (e->infrequent()) continue;
|
||||
}
|
||||
|
||||
Block *src_block = e->from();
|
||||
Trace *src_trace = trace(src_block);
|
||||
bool src_at_tail = src_trace->last_block() == src_block;
|
||||
|
||||
Block *targ_block = e->to();
|
||||
Trace *targ_trace = trace(targ_block);
|
||||
bool targ_at_start = targ_trace->first_block() == targ_block;
|
||||
|
||||
if (src_trace == targ_trace) {
|
||||
// This may be a loop, but we can't do much about it.
|
||||
e->set_state(CFGEdge::interior);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (fall_thru_only) {
|
||||
// If the edge links the middle of two traces, we can't do anything.
|
||||
// Mark the edge and continue.
|
||||
if (!src_at_tail & !targ_at_start) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Don't grow traces along backedges?
|
||||
if (!BlockLayoutRotateLoops && (targ_block->_rpo <= src_block->_rpo)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// If both ends of the edge are available, why didn't we handle it earlier?
|
||||
assert(src_at_tail ^ targ_at_start, "Should have caught this edge earlier.");
|
||||
|
||||
if (targ_at_start) {
|
||||
// Insert the "targ" trace in the "src" trace if the insertion point
|
||||
// is a two way branch.
|
||||
// Better profitability check possible, but may not be worth it.
|
||||
// Someday, see if the this "fork" has an associated "join";
|
||||
// then make a policy on merging this trace at the fork or join.
|
||||
// For example, other things being equal, it may be better to place this
|
||||
// trace at the join point if the "src" trace ends in a two-way, but
|
||||
// the insertion point is one-way.
|
||||
assert(src_block->num_fall_throughs() == 2, "unexpected diamond");
|
||||
e->set_state(CFGEdge::connected);
|
||||
src_trace->insert_after(src_block, targ_trace);
|
||||
union_traces(src_trace, targ_trace);
|
||||
} else if (src_at_tail) {
|
||||
if (src_trace != trace(_cfg._broot)) {
|
||||
e->set_state(CFGEdge::connected);
|
||||
targ_trace->insert_before(targ_block, src_trace);
|
||||
union_traces(targ_trace, src_trace);
|
||||
}
|
||||
}
|
||||
} else if (e->state() == CFGEdge::open) {
|
||||
// Append traces, even without a fall-thru connection.
|
||||
// But leave root entry at the begining of the block list.
|
||||
if (targ_trace != trace(_cfg._broot)) {
|
||||
e->set_state(CFGEdge::connected);
|
||||
src_trace->append(targ_trace);
|
||||
union_traces(src_trace, targ_trace);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//----------------------------reorder_traces-----------------------------------
|
||||
// Order the sequence of the traces in some desirable way, and fixup the
|
||||
// jumps at the end of each block.
|
||||
void PhaseBlockLayout::reorder_traces(int count)
|
||||
{
|
||||
ResourceArea *area = Thread::current()->resource_area();
|
||||
Trace ** new_traces = NEW_ARENA_ARRAY(area, Trace *, count);
|
||||
Block_List worklist;
|
||||
int new_count = 0;
|
||||
|
||||
// Compact the traces.
|
||||
for (int i = 0; i < count; i++) {
|
||||
Trace *tr = traces[i];
|
||||
if (tr != NULL) {
|
||||
new_traces[new_count++] = tr;
|
||||
}
|
||||
}
|
||||
|
||||
// The entry block should be first on the new trace list.
|
||||
Trace *tr = trace(_cfg._broot);
|
||||
assert(tr == new_traces[0], "entry trace misplaced");
|
||||
|
||||
// Sort the new trace list by frequency
|
||||
qsort(new_traces + 1, new_count - 1, sizeof(new_traces[0]), trace_frequency_order);
|
||||
|
||||
// Patch up the successor blocks
|
||||
_cfg._blocks.reset();
|
||||
_cfg._num_blocks = 0;
|
||||
for (int i = 0; i < new_count; i++) {
|
||||
Trace *tr = new_traces[i];
|
||||
if (tr != NULL) {
|
||||
tr->fixup_blocks(_cfg);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------PhaseBlockLayout-------------------------------
|
||||
// Order basic blocks based on frequency
|
||||
PhaseBlockLayout::PhaseBlockLayout(PhaseCFG &cfg) :
|
||||
Phase(BlockLayout),
|
||||
_cfg(cfg)
|
||||
{
|
||||
ResourceMark rm;
|
||||
ResourceArea *area = Thread::current()->resource_area();
|
||||
|
||||
// List of traces
|
||||
int size = _cfg._num_blocks + 1;
|
||||
traces = NEW_ARENA_ARRAY(area, Trace *, size);
|
||||
memset(traces, 0, size*sizeof(Trace*));
|
||||
next = NEW_ARENA_ARRAY(area, Block *, size);
|
||||
memset(next, 0, size*sizeof(Block *));
|
||||
prev = NEW_ARENA_ARRAY(area, Block *, size);
|
||||
memset(prev , 0, size*sizeof(Block *));
|
||||
|
||||
// List of edges
|
||||
edges = new GrowableArray<CFGEdge*>;
|
||||
|
||||
// Mapping block index --> block_trace
|
||||
uf = new UnionFind(size);
|
||||
uf->reset(size);
|
||||
|
||||
// Find edges and create traces.
|
||||
find_edges();
|
||||
|
||||
// Grow traces at their ends via most frequent edges.
|
||||
grow_traces();
|
||||
|
||||
// Merge one trace into another, but only at fall-through points.
|
||||
// This may make diamonds and other related shapes in a trace.
|
||||
merge_traces(true);
|
||||
|
||||
// Run merge again, allowing two traces to be catenated, even if
|
||||
// one does not fall through into the other. This appends loosely
|
||||
// related traces to be near each other.
|
||||
merge_traces(false);
|
||||
|
||||
// Re-order all the remaining traces by frequency
|
||||
reorder_traces(size);
|
||||
|
||||
assert(_cfg._num_blocks >= (uint) (size - 1), "number of blocks can not shrink");
|
||||
}
|
||||
|
||||
|
||||
//------------------------------backedge---------------------------------------
|
||||
// Edge e completes a loop in a trace. If the target block is head of the
|
||||
// loop, rotate the loop block so that the loop ends in a conditional branch.
|
||||
bool Trace::backedge(CFGEdge *e) {
|
||||
bool loop_rotated = false;
|
||||
Block *src_block = e->from();
|
||||
Block *targ_block = e->to();
|
||||
|
||||
assert(last_block() == src_block, "loop discovery at back branch");
|
||||
if (first_block() == targ_block) {
|
||||
if (BlockLayoutRotateLoops && last_block()->num_fall_throughs() < 2) {
|
||||
// Find the last block in the trace that has a conditional
|
||||
// branch.
|
||||
Block *b;
|
||||
for (b = last_block(); b != NULL; b = prev(b)) {
|
||||
if (b->num_fall_throughs() == 2) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (b != last_block() && b != NULL) {
|
||||
loop_rotated = true;
|
||||
|
||||
// Rotate the loop by doing two-part linked-list surgery.
|
||||
append(first_block());
|
||||
break_loop_after(b);
|
||||
}
|
||||
}
|
||||
|
||||
// Backbranch to the top of a trace
|
||||
// Scroll foward through the trace from the targ_block. If we find
|
||||
// a loop head before another loop top, use the the loop head alignment.
|
||||
for (Block *b = targ_block; b != NULL; b = next(b)) {
|
||||
if (b->has_loop_alignment()) {
|
||||
break;
|
||||
}
|
||||
if (b->head()->is_Loop()) {
|
||||
targ_block = b;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
first_block()->set_loop_alignment(targ_block);
|
||||
|
||||
} else {
|
||||
// Backbranch into the middle of a trace
|
||||
targ_block->set_loop_alignment(targ_block);
|
||||
}
|
||||
|
||||
return loop_rotated;
|
||||
}
|
||||
|
||||
//------------------------------fixup_blocks-----------------------------------
|
||||
// push blocks onto the CFG list
|
||||
// ensure that blocks have the correct two-way branch sense
|
||||
void Trace::fixup_blocks(PhaseCFG &cfg) {
|
||||
Block *last = last_block();
|
||||
for (Block *b = first_block(); b != NULL; b = next(b)) {
|
||||
cfg._blocks.push(b);
|
||||
cfg._num_blocks++;
|
||||
if (!b->is_connector()) {
|
||||
int nfallthru = b->num_fall_throughs();
|
||||
if (b != last) {
|
||||
if (nfallthru == 2) {
|
||||
// Ensure that the sense of the branch is correct
|
||||
Block *bnext = next(b);
|
||||
Block *bs0 = b->non_connector_successor(0);
|
||||
|
||||
MachNode *iff = b->_nodes[b->_nodes.size()-3]->as_Mach();
|
||||
ProjNode *proj0 = b->_nodes[b->_nodes.size()-2]->as_Proj();
|
||||
ProjNode *proj1 = b->_nodes[b->_nodes.size()-1]->as_Proj();
|
||||
|
||||
if (bnext == bs0) {
|
||||
// Fall-thru case in succs[0], should be in succs[1]
|
||||
|
||||
// Flip targets in _succs map
|
||||
Block *tbs0 = b->_succs[0];
|
||||
Block *tbs1 = b->_succs[1];
|
||||
b->_succs.map( 0, tbs1 );
|
||||
b->_succs.map( 1, tbs0 );
|
||||
|
||||
// Flip projections to match targets
|
||||
b->_nodes.map(b->_nodes.size()-2, proj1);
|
||||
b->_nodes.map(b->_nodes.size()-1, proj0);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -75,6 +75,7 @@ public:
|
|||
void insert( uint i, Block *n );
|
||||
uint size() const { return _cnt; }
|
||||
void reset() { _cnt = 0; }
|
||||
void print();
|
||||
};
|
||||
|
||||
|
||||
|
@ -130,6 +131,10 @@ class Block : public CFGElement {
|
|||
|
||||
virtual bool is_block() { return true; }
|
||||
float succ_prob(uint i); // return probability of i'th successor
|
||||
int num_fall_throughs(); // How many fall-through candidate this block has
|
||||
void update_uncommon_branch(Block* un); // Lower branch prob to uncommon code
|
||||
bool succ_fall_through(uint i); // Is successor "i" is a fall-through candidate
|
||||
Block* lone_fall_through(); // Return lone fall-through Block or null
|
||||
|
||||
Block* dom_lca(Block* that); // Compute LCA in dominator tree.
|
||||
#ifdef ASSERT
|
||||
|
@ -144,6 +149,7 @@ class Block : public CFGElement {
|
|||
// Report the alignment required by this block. Must be a power of 2.
|
||||
// The previous block will insert nops to get this alignment.
|
||||
uint code_alignment();
|
||||
uint compute_loop_alignment();
|
||||
|
||||
// BLOCK_FREQUENCY is a sentinel to mark uses of constant block frequencies.
|
||||
// It is currently also used to scale such frequencies relative to
|
||||
|
@ -184,11 +190,12 @@ class Block : public CFGElement {
|
|||
int current_alignment = current_offset & max_pad;
|
||||
if( current_alignment != 0 ) {
|
||||
uint padding = (block_alignment-current_alignment) & max_pad;
|
||||
if( !head()->is_Loop() ||
|
||||
padding <= (uint)MaxLoopPad ||
|
||||
first_inst_size() > padding ) {
|
||||
return padding;
|
||||
if( has_loop_alignment() &&
|
||||
padding > (uint)MaxLoopPad &&
|
||||
first_inst_size() <= padding ) {
|
||||
return 0;
|
||||
}
|
||||
return padding;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
|
@ -202,6 +209,21 @@ class Block : public CFGElement {
|
|||
void set_connector() { _connector = true; }
|
||||
bool is_connector() const { return _connector; };
|
||||
|
||||
// Loop_alignment will be set for blocks which are at the top of loops.
|
||||
// The block layout pass may rotate loops such that the loop head may not
|
||||
// be the sequentially first block of the loop encountered in the linear
|
||||
// list of blocks. If the layout pass is not run, loop alignment is set
|
||||
// for each block which is the head of a loop.
|
||||
uint _loop_alignment;
|
||||
void set_loop_alignment(Block *loop_top) {
|
||||
uint new_alignment = loop_top->compute_loop_alignment();
|
||||
if (new_alignment > _loop_alignment) {
|
||||
_loop_alignment = new_alignment;
|
||||
}
|
||||
}
|
||||
uint loop_alignment() const { return _loop_alignment; }
|
||||
bool has_loop_alignment() const { return loop_alignment() > 0; }
|
||||
|
||||
// Create a new Block with given head Node.
|
||||
// Creates the (empty) predecessor arrays.
|
||||
Block( Arena *a, Node *headnode )
|
||||
|
@ -219,7 +241,8 @@ class Block : public CFGElement {
|
|||
_raise_LCA_mark(0),
|
||||
_raise_LCA_visited(0),
|
||||
_first_inst_size(999999),
|
||||
_connector(false) {
|
||||
_connector(false),
|
||||
_loop_alignment(0) {
|
||||
_nodes.push(headnode);
|
||||
}
|
||||
|
||||
|
@ -275,6 +298,16 @@ class Block : public CFGElement {
|
|||
return s;
|
||||
}
|
||||
|
||||
// Return true if b is a successor of this block
|
||||
bool has_successor(Block* b) const {
|
||||
for (uint i = 0; i < _num_succs; i++ ) {
|
||||
if (non_connector_successor(i) == b) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Successor block, after forwarding through connectors
|
||||
Block* non_connector_successor(int i) const {
|
||||
return _succs[i]->non_connector();
|
||||
|
@ -319,7 +352,6 @@ class PhaseCFG : public Phase {
|
|||
|
||||
// I'll need a few machine-specific GotoNodes. Clone from this one.
|
||||
MachNode *_goto;
|
||||
void insert_goto_at(uint block_no, uint succ_no);
|
||||
|
||||
Block* insert_anti_dependences(Block* LCA, Node* load, bool verify = false);
|
||||
void verify_anti_dependences(Block* LCA, Node* load) {
|
||||
|
@ -379,10 +411,15 @@ class PhaseCFG : public Phase {
|
|||
// Compute the instruction global latency with a backwards walk
|
||||
void ComputeLatenciesBackwards(VectorSet &visited, Node_List &stack);
|
||||
|
||||
// Set loop alignment
|
||||
void set_loop_alignment();
|
||||
|
||||
// Remove empty basic blocks
|
||||
void RemoveEmpty();
|
||||
bool MoveToNext(Block* bx, uint b_index);
|
||||
void MoveToEnd(Block* bx, uint b_index);
|
||||
void remove_empty();
|
||||
void fixup_flow();
|
||||
bool move_to_next(Block* bx, uint b_index);
|
||||
void move_to_end(Block* bx, uint b_index);
|
||||
void insert_goto_at(uint block_no, uint succ_no);
|
||||
|
||||
// Check for NeverBranch at block end. This needs to become a GOTO to the
|
||||
// true target. NeverBranch are treated as a conditional branch that always
|
||||
|
@ -413,7 +450,7 @@ class PhaseCFG : public Phase {
|
|||
};
|
||||
|
||||
|
||||
//------------------------------UnionFindInfo----------------------------------
|
||||
//------------------------------UnionFind--------------------------------------
|
||||
// Map Block indices to a block-index for a cfg-cover.
|
||||
// Array lookup in the optimized case.
|
||||
class UnionFind : public ResourceObj {
|
||||
|
@ -508,3 +545,166 @@ class CFGLoop : public CFGElement {
|
|||
void dump_tree() const;
|
||||
#endif
|
||||
};
|
||||
|
||||
|
||||
//----------------------------------CFGEdge------------------------------------
|
||||
// A edge between two basic blocks that will be embodied by a branch or a
|
||||
// fall-through.
|
||||
class CFGEdge : public ResourceObj {
|
||||
private:
|
||||
Block * _from; // Source basic block
|
||||
Block * _to; // Destination basic block
|
||||
float _freq; // Execution frequency (estimate)
|
||||
int _state;
|
||||
bool _infrequent;
|
||||
int _from_pct;
|
||||
int _to_pct;
|
||||
|
||||
// Private accessors
|
||||
int from_pct() const { return _from_pct; }
|
||||
int to_pct() const { return _to_pct; }
|
||||
int from_infrequent() const { return from_pct() < BlockLayoutMinDiamondPercentage; }
|
||||
int to_infrequent() const { return to_pct() < BlockLayoutMinDiamondPercentage; }
|
||||
|
||||
public:
|
||||
enum {
|
||||
open, // initial edge state; unprocessed
|
||||
connected, // edge used to connect two traces together
|
||||
interior // edge is interior to trace (could be backedge)
|
||||
};
|
||||
|
||||
CFGEdge(Block *from, Block *to, float freq, int from_pct, int to_pct) :
|
||||
_from(from), _to(to), _freq(freq),
|
||||
_from_pct(from_pct), _to_pct(to_pct), _state(open) {
|
||||
_infrequent = from_infrequent() || to_infrequent();
|
||||
}
|
||||
|
||||
float freq() const { return _freq; }
|
||||
Block* from() const { return _from; }
|
||||
Block* to () const { return _to; }
|
||||
int infrequent() const { return _infrequent; }
|
||||
int state() const { return _state; }
|
||||
|
||||
void set_state(int state) { _state = state; }
|
||||
|
||||
#ifndef PRODUCT
|
||||
void dump( ) const;
|
||||
#endif
|
||||
};
|
||||
|
||||
|
||||
//-----------------------------------Trace-------------------------------------
|
||||
// An ordered list of basic blocks.
|
||||
class Trace : public ResourceObj {
|
||||
private:
|
||||
uint _id; // Unique Trace id (derived from initial block)
|
||||
Block ** _next_list; // Array mapping index to next block
|
||||
Block ** _prev_list; // Array mapping index to previous block
|
||||
Block * _first; // First block in the trace
|
||||
Block * _last; // Last block in the trace
|
||||
|
||||
// Return the block that follows "b" in the trace.
|
||||
Block * next(Block *b) const { return _next_list[b->_pre_order]; }
|
||||
void set_next(Block *b, Block *n) const { _next_list[b->_pre_order] = n; }
|
||||
|
||||
// Return the block that preceeds "b" in the trace.
|
||||
Block * prev(Block *b) const { return _prev_list[b->_pre_order]; }
|
||||
void set_prev(Block *b, Block *p) const { _prev_list[b->_pre_order] = p; }
|
||||
|
||||
// We've discovered a loop in this trace. Reset last to be "b", and first as
|
||||
// the block following "b
|
||||
void break_loop_after(Block *b) {
|
||||
_last = b;
|
||||
_first = next(b);
|
||||
set_prev(_first, NULL);
|
||||
set_next(_last, NULL);
|
||||
}
|
||||
|
||||
public:
|
||||
|
||||
Trace(Block *b, Block **next_list, Block **prev_list) :
|
||||
_first(b),
|
||||
_last(b),
|
||||
_next_list(next_list),
|
||||
_prev_list(prev_list),
|
||||
_id(b->_pre_order) {
|
||||
set_next(b, NULL);
|
||||
set_prev(b, NULL);
|
||||
};
|
||||
|
||||
// Return the id number
|
||||
uint id() const { return _id; }
|
||||
void set_id(uint id) { _id = id; }
|
||||
|
||||
// Return the first block in the trace
|
||||
Block * first_block() const { return _first; }
|
||||
|
||||
// Return the last block in the trace
|
||||
Block * last_block() const { return _last; }
|
||||
|
||||
// Insert a trace in the middle of this one after b
|
||||
void insert_after(Block *b, Trace *tr) {
|
||||
set_next(tr->last_block(), next(b));
|
||||
if (next(b) != NULL) {
|
||||
set_prev(next(b), tr->last_block());
|
||||
}
|
||||
|
||||
set_next(b, tr->first_block());
|
||||
set_prev(tr->first_block(), b);
|
||||
|
||||
if (b == _last) {
|
||||
_last = tr->last_block();
|
||||
}
|
||||
}
|
||||
|
||||
void insert_before(Block *b, Trace *tr) {
|
||||
Block *p = prev(b);
|
||||
assert(p != NULL, "use append instead");
|
||||
insert_after(p, tr);
|
||||
}
|
||||
|
||||
// Append another trace to this one.
|
||||
void append(Trace *tr) {
|
||||
insert_after(_last, tr);
|
||||
}
|
||||
|
||||
// Append a block at the end of this trace
|
||||
void append(Block *b) {
|
||||
set_next(_last, b);
|
||||
set_prev(b, _last);
|
||||
_last = b;
|
||||
}
|
||||
|
||||
// Adjust the the blocks in this trace
|
||||
void fixup_blocks(PhaseCFG &cfg);
|
||||
bool backedge(CFGEdge *e);
|
||||
|
||||
#ifndef PRODUCT
|
||||
void dump( ) const;
|
||||
#endif
|
||||
};
|
||||
|
||||
//------------------------------PhaseBlockLayout-------------------------------
|
||||
// Rearrange blocks into some canonical order, based on edges and their frequencies
|
||||
class PhaseBlockLayout : public Phase {
|
||||
PhaseCFG &_cfg; // Control flow graph
|
||||
|
||||
GrowableArray<CFGEdge *> *edges;
|
||||
Trace **traces;
|
||||
Block **next;
|
||||
Block **prev;
|
||||
UnionFind *uf;
|
||||
|
||||
// Given a block, find its encompassing Trace
|
||||
Trace * trace(Block *b) {
|
||||
return traces[uf->Find_compress(b->_pre_order)];
|
||||
}
|
||||
public:
|
||||
PhaseBlockLayout(PhaseCFG &cfg);
|
||||
|
||||
void find_edges();
|
||||
void grow_traces();
|
||||
void merge_traces(bool loose_connections);
|
||||
void reorder_traces(int count);
|
||||
void union_traces(Trace* from, Trace* to);
|
||||
};
|
||||
|
|
|
@ -396,5 +396,15 @@
|
|||
\
|
||||
diagnostic(intx, DominatorSearchLimit, 1000, \
|
||||
"Iterations limit in Node::dominates") \
|
||||
\
|
||||
product(bool, BlockLayoutByFrequency, true, \
|
||||
"Use edge frequencies to drive block ordering") \
|
||||
\
|
||||
product(intx, BlockLayoutMinDiamondPercentage, 20, \
|
||||
"Miniumum %% of a successor (predecessor) for which block layout "\
|
||||
"a will allow a fork (join) in a single chain") \
|
||||
\
|
||||
product(bool, BlockLayoutRotateLoops, false, \
|
||||
"Allow back branches to be fall throughs in the block layour") \
|
||||
|
||||
C2_FLAGS(DECLARE_DEVELOPER_FLAG, DECLARE_PD_DEVELOPER_FLAG, DECLARE_PRODUCT_FLAG, DECLARE_PD_PRODUCT_FLAG, DECLARE_DIAGNOSTIC_FLAG, DECLARE_NOTPRODUCT_FLAG)
|
||||
|
|
|
@ -822,6 +822,7 @@ void Compile::Init(int aliaslevel) {
|
|||
Copy::zero_to_bytes(_trap_hist, sizeof(_trap_hist));
|
||||
set_decompile_count(0);
|
||||
|
||||
set_do_freq_based_layout(BlockLayoutByFrequency || method_has_option("BlockLayoutByFrequency"));
|
||||
// Compilation level related initialization
|
||||
if (env()->comp_level() == CompLevel_fast_compile) {
|
||||
set_num_loop_opts(Tier1LoopOptsCount);
|
||||
|
@ -1701,8 +1702,14 @@ void Compile::Code_Gen() {
|
|||
// are not adding any new instructions. If any basic block is empty, we
|
||||
// can now safely remove it.
|
||||
{
|
||||
NOT_PRODUCT( TracePhase t2("removeEmpty", &_t_removeEmptyBlocks, TimeCompiler); )
|
||||
cfg.RemoveEmpty();
|
||||
NOT_PRODUCT( TracePhase t2("blockOrdering", &_t_blockOrdering, TimeCompiler); )
|
||||
cfg.remove_empty();
|
||||
if (do_freq_based_layout()) {
|
||||
PhaseBlockLayout layout(cfg);
|
||||
} else {
|
||||
cfg.set_loop_alignment();
|
||||
}
|
||||
cfg.fixup_flow();
|
||||
}
|
||||
|
||||
// Perform any platform dependent postallocation verifications.
|
||||
|
|
|
@ -154,6 +154,7 @@ class Compile : public Phase {
|
|||
uint _decompile_count; // Cumulative decompilation counts.
|
||||
bool _do_inlining; // True if we intend to do inlining
|
||||
bool _do_scheduling; // True if we intend to do scheduling
|
||||
bool _do_freq_based_layout; // True if we intend to do frequency based block layout
|
||||
bool _do_count_invocations; // True if we generate code to count invocations
|
||||
bool _do_method_data_update; // True if we generate code to update methodDataOops
|
||||
int _AliasLevel; // Locally-adjusted version of AliasLevel flag.
|
||||
|
@ -307,6 +308,8 @@ class Compile : public Phase {
|
|||
void set_do_inlining(bool z) { _do_inlining = z; }
|
||||
bool do_scheduling() const { return _do_scheduling; }
|
||||
void set_do_scheduling(bool z) { _do_scheduling = z; }
|
||||
bool do_freq_based_layout() const{ return _do_freq_based_layout; }
|
||||
void set_do_freq_based_layout(bool z){ _do_freq_based_layout = z; }
|
||||
bool do_count_invocations() const{ return _do_count_invocations; }
|
||||
void set_do_count_invocations(bool z){ _do_count_invocations = z; }
|
||||
bool do_method_data_update() const { return _do_method_data_update; }
|
||||
|
|
|
@ -1319,11 +1319,33 @@ void PhaseCFG::GlobalCodeMotion( Matcher &matcher, uint unique, Node_List &proj_
|
|||
//------------------------------Estimate_Block_Frequency-----------------------
|
||||
// Estimate block frequencies based on IfNode probabilities.
|
||||
void PhaseCFG::Estimate_Block_Frequency() {
|
||||
int cnts = C->method() ? C->method()->interpreter_invocation_count() : 1;
|
||||
// Most of our algorithms will die horribly if frequency can become
|
||||
// negative so make sure cnts is a sane value.
|
||||
if( cnts <= 0 ) cnts = 1;
|
||||
float f = (float)cnts/(float)FreqCountInvocations;
|
||||
|
||||
// Force conditional branches leading to uncommon traps to be unlikely,
|
||||
// not because we get to the uncommon_trap with less relative frequency,
|
||||
// but because an uncommon_trap typically causes a deopt, so we only get
|
||||
// there once.
|
||||
if (C->do_freq_based_layout()) {
|
||||
Block_List worklist;
|
||||
Block* root_blk = _blocks[0];
|
||||
for (uint i = 1; i < root_blk->num_preds(); i++) {
|
||||
Block *pb = _bbs[root_blk->pred(i)->_idx];
|
||||
if (pb->has_uncommon_code()) {
|
||||
worklist.push(pb);
|
||||
}
|
||||
}
|
||||
while (worklist.size() > 0) {
|
||||
Block* uct = worklist.pop();
|
||||
if (uct == _broot) continue;
|
||||
for (uint i = 1; i < uct->num_preds(); i++) {
|
||||
Block *pb = _bbs[uct->pred(i)->_idx];
|
||||
if (pb->_num_succs == 1) {
|
||||
worklist.push(pb);
|
||||
} else if (pb->num_fall_throughs() == 2) {
|
||||
pb->update_uncommon_branch(uct);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Create the loop tree and calculate loop depth.
|
||||
_root_loop = create_loop_tree();
|
||||
|
@ -1333,13 +1355,14 @@ void PhaseCFG::Estimate_Block_Frequency() {
|
|||
_root_loop->compute_freq();
|
||||
|
||||
// Adjust all frequencies to be relative to a single method entry
|
||||
_root_loop->_freq = f * 1.0;
|
||||
_root_loop->_freq = 1.0;
|
||||
_root_loop->scale_freq();
|
||||
|
||||
// force paths ending at uncommon traps to be infrequent
|
||||
if (!C->do_freq_based_layout()) {
|
||||
Block_List worklist;
|
||||
Block* root_blk = _blocks[0];
|
||||
for (uint i = 0; i < root_blk->num_preds(); i++) {
|
||||
for (uint i = 1; i < root_blk->num_preds(); i++) {
|
||||
Block *pb = _bbs[root_blk->pred(i)->_idx];
|
||||
if (pb->has_uncommon_code()) {
|
||||
worklist.push(pb);
|
||||
|
@ -1348,13 +1371,14 @@ void PhaseCFG::Estimate_Block_Frequency() {
|
|||
while (worklist.size() > 0) {
|
||||
Block* uct = worklist.pop();
|
||||
uct->_freq = PROB_MIN;
|
||||
for (uint i = 0; i < uct->num_preds(); i++) {
|
||||
for (uint i = 1; i < uct->num_preds(); i++) {
|
||||
Block *pb = _bbs[uct->pred(i)->_idx];
|
||||
if (pb->_num_succs == 1 && pb->_freq > PROB_MIN) {
|
||||
worklist.push(pb);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#ifndef PRODUCT
|
||||
if (PrintCFGBlockFreq) {
|
||||
|
@ -1556,22 +1580,6 @@ void CFGLoop::compute_freq() {
|
|||
}
|
||||
}
|
||||
|
||||
#if 0
|
||||
// Raise frequency of the loop backedge block, in an effort
|
||||
// to keep it empty. Skip the method level "loop".
|
||||
if (_parent != NULL) {
|
||||
CFGElement* s = _members.at(_members.length() - 1);
|
||||
if (s->is_block()) {
|
||||
Block* bk = s->as_Block();
|
||||
if (bk->_num_succs == 1 && bk->_succs[0] == hd) {
|
||||
// almost any value >= 1.0f works
|
||||
// FIXME: raw constant
|
||||
bk->_freq = 1.05f;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
// For all loops other than the outer, "method" loop,
|
||||
// sum and normalize the exit probability. The "method" loop
|
||||
// should keep the initial exit probability of 1, so that
|
||||
|
@ -1589,12 +1597,15 @@ void CFGLoop::compute_freq() {
|
|||
// the probability of exit per loop entry.
|
||||
for (int i = 0; i < _exits.length(); i++) {
|
||||
Block* et = _exits.at(i).get_target();
|
||||
float new_prob = _exits.at(i).get_prob() / exits_sum;
|
||||
float new_prob = 0.0f;
|
||||
if (_exits.at(i).get_prob() > 0.0f) {
|
||||
new_prob = _exits.at(i).get_prob() / exits_sum;
|
||||
}
|
||||
BlockProbPair bpp(et, new_prob);
|
||||
_exits.at_put(i, bpp);
|
||||
}
|
||||
|
||||
// Save the total, but guard against unreasoable probability,
|
||||
// Save the total, but guard against unreasonable probability,
|
||||
// as the value is used to estimate the loop trip count.
|
||||
// An infinite trip count would blur relative block
|
||||
// frequencies.
|
||||
|
@ -1688,6 +1699,137 @@ float Block::succ_prob(uint i) {
|
|||
return 0.0f;
|
||||
}
|
||||
|
||||
//------------------------------num_fall_throughs-----------------------------
|
||||
// Return the number of fall-through candidates for a block
|
||||
int Block::num_fall_throughs() {
|
||||
int eidx = end_idx();
|
||||
Node *n = _nodes[eidx]; // Get ending Node
|
||||
|
||||
int op = n->Opcode();
|
||||
if (n->is_Mach()) {
|
||||
if (n->is_MachNullCheck()) {
|
||||
// In theory, either side can fall-thru, for simplicity sake,
|
||||
// let's say only the false branch can now.
|
||||
return 1;
|
||||
}
|
||||
op = n->as_Mach()->ideal_Opcode();
|
||||
}
|
||||
|
||||
// Switch on branch type
|
||||
switch( op ) {
|
||||
case Op_CountedLoopEnd:
|
||||
case Op_If:
|
||||
return 2;
|
||||
|
||||
case Op_Root:
|
||||
case Op_Goto:
|
||||
return 1;
|
||||
|
||||
case Op_Catch: {
|
||||
for (uint i = 0; i < _num_succs; i++) {
|
||||
const CatchProjNode *ci = _nodes[i + eidx + 1]->as_CatchProj();
|
||||
if (ci->_con == CatchProjNode::fall_through_index) {
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
case Op_Jump:
|
||||
case Op_NeverBranch:
|
||||
case Op_TailCall:
|
||||
case Op_TailJump:
|
||||
case Op_Return:
|
||||
case Op_Halt:
|
||||
case Op_Rethrow:
|
||||
return 0;
|
||||
|
||||
default:
|
||||
ShouldNotReachHere();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
//------------------------------succ_fall_through-----------------------------
|
||||
// Return true if a specific successor could be fall-through target.
|
||||
bool Block::succ_fall_through(uint i) {
|
||||
int eidx = end_idx();
|
||||
Node *n = _nodes[eidx]; // Get ending Node
|
||||
|
||||
int op = n->Opcode();
|
||||
if (n->is_Mach()) {
|
||||
if (n->is_MachNullCheck()) {
|
||||
// In theory, either side can fall-thru, for simplicity sake,
|
||||
// let's say only the false branch can now.
|
||||
return _nodes[i + eidx + 1]->Opcode() == Op_IfFalse;
|
||||
}
|
||||
op = n->as_Mach()->ideal_Opcode();
|
||||
}
|
||||
|
||||
// Switch on branch type
|
||||
switch( op ) {
|
||||
case Op_CountedLoopEnd:
|
||||
case Op_If:
|
||||
case Op_Root:
|
||||
case Op_Goto:
|
||||
return true;
|
||||
|
||||
case Op_Catch: {
|
||||
const CatchProjNode *ci = _nodes[i + eidx + 1]->as_CatchProj();
|
||||
return ci->_con == CatchProjNode::fall_through_index;
|
||||
}
|
||||
|
||||
case Op_Jump:
|
||||
case Op_NeverBranch:
|
||||
case Op_TailCall:
|
||||
case Op_TailJump:
|
||||
case Op_Return:
|
||||
case Op_Halt:
|
||||
case Op_Rethrow:
|
||||
return false;
|
||||
|
||||
default:
|
||||
ShouldNotReachHere();
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
//------------------------------update_uncommon_branch------------------------
|
||||
// Update the probability of a two-branch to be uncommon
|
||||
void Block::update_uncommon_branch(Block* ub) {
|
||||
int eidx = end_idx();
|
||||
Node *n = _nodes[eidx]; // Get ending Node
|
||||
|
||||
int op = n->as_Mach()->ideal_Opcode();
|
||||
|
||||
assert(op == Op_CountedLoopEnd || op == Op_If, "must be a If");
|
||||
assert(num_fall_throughs() == 2, "must be a two way branch block");
|
||||
|
||||
// Which successor is ub?
|
||||
uint s;
|
||||
for (s = 0; s <_num_succs; s++) {
|
||||
if (_succs[s] == ub) break;
|
||||
}
|
||||
assert(s < 2, "uncommon successor must be found");
|
||||
|
||||
// If ub is the true path, make the proability small, else
|
||||
// ub is the false path, and make the probability large
|
||||
bool invert = (_nodes[s + eidx + 1]->Opcode() == Op_IfFalse);
|
||||
|
||||
// Get existing probability
|
||||
float p = n->as_MachIf()->_prob;
|
||||
|
||||
if (invert) p = 1.0 - p;
|
||||
if (p > PROB_MIN) {
|
||||
p = PROB_MIN;
|
||||
}
|
||||
if (invert) p = 1.0 - p;
|
||||
|
||||
n->as_MachIf()->_prob = p;
|
||||
}
|
||||
|
||||
//------------------------------update_succ_freq-------------------------------
|
||||
// Update the appropriate frequency associated with block 'b', a succesor of
|
||||
// a block in this loop.
|
||||
|
|
|
@ -263,7 +263,7 @@ bool Compile::is_node_getting_a_safepoint( Node* n) {
|
|||
# endif // ENABLE_ZAP_DEAD_LOCALS
|
||||
|
||||
//------------------------------compute_loop_first_inst_sizes------------------
|
||||
// Compute the size of first NumberOfLoopInstrToAlign instructions at head
|
||||
// Compute the size of first NumberOfLoopInstrToAlign instructions at the top
|
||||
// of a loop. When aligning a loop we need to provide enough instructions
|
||||
// in cpu's fetch buffer to feed decoders. The loop alignment could be
|
||||
// avoided if we have enough instructions in fetch buffer at the head of a loop.
|
||||
|
@ -284,34 +284,23 @@ void Compile::compute_loop_first_inst_sizes() {
|
|||
for( uint i=1; i <= last_block; i++ ) {
|
||||
Block *b = _cfg->_blocks[i];
|
||||
// Check the first loop's block which requires an alignment.
|
||||
if( b->head()->is_Loop() &&
|
||||
b->code_alignment() > (uint)relocInfo::addr_unit() ) {
|
||||
if( b->loop_alignment() > (uint)relocInfo::addr_unit() ) {
|
||||
uint sum_size = 0;
|
||||
uint inst_cnt = NumberOfLoopInstrToAlign;
|
||||
inst_cnt = b->compute_first_inst_size(sum_size, inst_cnt,
|
||||
_regalloc);
|
||||
// Check the next fallthrough block if first loop's block does not have
|
||||
// enough instructions.
|
||||
if( inst_cnt > 0 && i < last_block ) {
|
||||
// First, check if the first loop's block contains whole loop.
|
||||
// LoopNode::LoopBackControl == 2.
|
||||
Block *bx = _cfg->_bbs[b->pred(2)->_idx];
|
||||
// Skip connector blocks (with limit in case of irreducible loops).
|
||||
int search_limit = 16;
|
||||
while( bx->is_connector() && search_limit-- > 0) {
|
||||
bx = _cfg->_bbs[bx->pred(1)->_idx];
|
||||
}
|
||||
if( bx != b ) { // loop body is in several blocks.
|
||||
Block *nb = NULL;
|
||||
while( inst_cnt > 0 && i < last_block && nb != bx &&
|
||||
!_cfg->_blocks[i+1]->head()->is_Loop() ) {
|
||||
inst_cnt = b->compute_first_inst_size(sum_size, inst_cnt, _regalloc);
|
||||
|
||||
// Check subsequent fallthrough blocks if the loop's first
|
||||
// block(s) does not have enough instructions.
|
||||
Block *nb = b;
|
||||
while( inst_cnt > 0 &&
|
||||
i < last_block &&
|
||||
!_cfg->_blocks[i+1]->has_loop_alignment() &&
|
||||
!nb->has_successor(b) ) {
|
||||
i++;
|
||||
nb = _cfg->_blocks[i];
|
||||
inst_cnt = nb->compute_first_inst_size(sum_size, inst_cnt,
|
||||
_regalloc);
|
||||
inst_cnt = nb->compute_first_inst_size(sum_size, inst_cnt, _regalloc);
|
||||
} // while( inst_cnt > 0 && i < last_block )
|
||||
} // if( bx != b )
|
||||
} // if( inst_cnt > 0 && i < last_block )
|
||||
|
||||
b->set_first_inst_size(sum_size);
|
||||
} // f( b->head()->is_Loop() )
|
||||
} // for( i <= last_block )
|
||||
|
@ -512,7 +501,7 @@ void Compile::Shorten_branches(Label *labels, int& code_size, int& reloc_size, i
|
|||
// Get the size of the block
|
||||
uint blk_size = adr - blk_starts[i];
|
||||
|
||||
// When the next block starts a loop, we may insert pad NOP
|
||||
// When the next block is the top of a loop, we may insert pad NOP
|
||||
// instructions.
|
||||
Block *nb = _cfg->_blocks[i+1];
|
||||
int current_offset = blk_starts[i] + blk_size;
|
||||
|
@ -1382,8 +1371,8 @@ void Compile::Fill_buffer() {
|
|||
|
||||
} // End for all instructions in block
|
||||
|
||||
// If the next block _starts_ a loop, pad this block out to align
|
||||
// the loop start a little. Helps prevent pipe stalls at loop starts
|
||||
// If the next block is the top of a loop, pad this block out to align
|
||||
// the loop top a little. Helps prevent pipe stalls at loop back branches.
|
||||
int nop_size = (new (this) MachNopNode())->size(_regalloc);
|
||||
if( i<_cfg->_num_blocks-1 ) {
|
||||
Block *nb = _cfg->_blocks[i+1];
|
||||
|
|
|
@ -46,7 +46,7 @@ elapsedTimer Phase::_t_output;
|
|||
#ifndef PRODUCT
|
||||
elapsedTimer Phase::_t_graphReshaping;
|
||||
elapsedTimer Phase::_t_scheduler;
|
||||
elapsedTimer Phase::_t_removeEmptyBlocks;
|
||||
elapsedTimer Phase::_t_blockOrdering;
|
||||
elapsedTimer Phase::_t_macroExpand;
|
||||
elapsedTimer Phase::_t_peephole;
|
||||
elapsedTimer Phase::_t_codeGeneration;
|
||||
|
@ -128,7 +128,7 @@ void Phase::print_timers() {
|
|||
tty->print_cr (" subtotal : %3.3f sec, %3.2f %%", regalloc_subtotal, percent_of_regalloc);
|
||||
}
|
||||
tty->print_cr (" macroExpand : %3.3f sec", Phase::_t_macroExpand.seconds());
|
||||
tty->print_cr (" removeEmpty : %3.3f sec", Phase::_t_removeEmptyBlocks.seconds());
|
||||
tty->print_cr (" blockOrdering: %3.3f sec", Phase::_t_blockOrdering.seconds());
|
||||
tty->print_cr (" peephole : %3.3f sec", Phase::_t_peephole.seconds());
|
||||
tty->print_cr (" codeGen : %3.3f sec", Phase::_t_codeGeneration.seconds());
|
||||
tty->print_cr (" install_code : %3.3f sec", Phase::_t_registerMethod.seconds());
|
||||
|
@ -137,7 +137,7 @@ void Phase::print_timers() {
|
|||
(DoEscapeAnalysis ? Phase::_t_escapeAnalysis.seconds() : 0.0) +
|
||||
Phase::_t_optimizer.seconds() + Phase::_t_graphReshaping.seconds() +
|
||||
Phase::_t_matcher.seconds() + Phase::_t_scheduler.seconds() +
|
||||
Phase::_t_registerAllocation.seconds() + Phase::_t_removeEmptyBlocks.seconds() +
|
||||
Phase::_t_registerAllocation.seconds() + Phase::_t_blockOrdering.seconds() +
|
||||
Phase::_t_macroExpand.seconds() + Phase::_t_peephole.seconds() +
|
||||
Phase::_t_codeGeneration.seconds() + Phase::_t_registerMethod.seconds();
|
||||
double percent_of_method_compile = ((phase_subtotal == 0.0) ? 0.0 : phase_subtotal / Phase::_t_methodCompilation.seconds()) * 100.0;
|
||||
|
|
|
@ -40,16 +40,12 @@ public:
|
|||
Optimistic, // Optimistic analysis phase
|
||||
GVN, // Pessimistic global value numbering phase
|
||||
Ins_Select, // Instruction selection phase
|
||||
Copy_Elimination, // Copy Elimination
|
||||
Dead_Code_Elimination, // DCE and compress Nodes
|
||||
Conditional_Constant, // Conditional Constant Propagation
|
||||
CFG, // Build a CFG
|
||||
DefUse, // Build Def->Use chains
|
||||
BlockLayout, // Linear ordering of blocks
|
||||
Register_Allocation, // Register allocation, duh
|
||||
LIVE, // Dragon-book LIVE range problem
|
||||
Interference_Graph, // Building the IFG
|
||||
Coalesce, // Coalescing copies
|
||||
Conditional_CProp, // Conditional Constant Propagation
|
||||
Ideal_Loop, // Find idealized trip-counted loops
|
||||
Macro_Expand, // Expand macro nodes
|
||||
Peephole, // Apply peephole optimizations
|
||||
|
@ -80,7 +76,7 @@ protected:
|
|||
#ifndef PRODUCT
|
||||
static elapsedTimer _t_graphReshaping;
|
||||
static elapsedTimer _t_scheduler;
|
||||
static elapsedTimer _t_removeEmptyBlocks;
|
||||
static elapsedTimer _t_blockOrdering;
|
||||
static elapsedTimer _t_macroExpand;
|
||||
static elapsedTimer _t_peephole;
|
||||
static elapsedTimer _t_codeGeneration;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue