8149023: Event based tracing should cover safepoint begin and end

Add events to safepoint begin and end

Reviewed-by: dholmes, mgronlun, egahlin
This commit is contained in:
David Simms 2016-03-23 10:52:35 +01:00
parent a3b37b6372
commit 7294935e3e
5 changed files with 285 additions and 166 deletions

View file

@ -56,6 +56,8 @@
#include "runtime/thread.inline.hpp"
#include "runtime/timerTrace.hpp"
#include "services/runtimeService.hpp"
#include "trace/tracing.hpp"
#include "trace/traceMacros.hpp"
#include "utilities/events.hpp"
#include "utilities/macros.hpp"
#if INCLUDE_ALL_GCS
@ -80,7 +82,7 @@ static bool timeout_error_printed = false;
// Roll all threads forward to a safepoint and suspend them all
void SafepointSynchronize::begin() {
EventSafepointBegin begin_event;
Thread* myThread = Thread::current();
assert(myThread->is_VM_thread(), "Only VM thread may execute a safepoint");
@ -170,191 +172,218 @@ void SafepointSynchronize::begin() {
// between states, the safepointing code will wait for the thread to
// block itself when it attempts transitions to a new state.
//
_state = _synchronizing;
OrderAccess::fence();
{
EventSafepointStateSync sync_event;
int initial_running = 0;
// Flush all thread states to memory
if (!UseMembar) {
os::serialize_thread_states();
}
_state = _synchronizing;
OrderAccess::fence();
// Make interpreter safepoint aware
Interpreter::notice_safepoints();
// Flush all thread states to memory
if (!UseMembar) {
os::serialize_thread_states();
}
if (DeferPollingPageLoopCount < 0) {
// Make polling safepoint aware
guarantee (PageArmed == 0, "invariant") ;
PageArmed = 1 ;
os::make_polling_page_unreadable();
}
// Make interpreter safepoint aware
Interpreter::notice_safepoints();
// Consider using active_processor_count() ... but that call is expensive.
int ncpus = os::processor_count() ;
if (DeferPollingPageLoopCount < 0) {
// Make polling safepoint aware
guarantee (PageArmed == 0, "invariant") ;
PageArmed = 1 ;
os::make_polling_page_unreadable();
}
// Consider using active_processor_count() ... but that call is expensive.
int ncpus = os::processor_count() ;
#ifdef ASSERT
for (JavaThread *cur = Threads::first(); cur != NULL; cur = cur->next()) {
assert(cur->safepoint_state()->is_running(), "Illegal initial state");
// Clear the visited flag to ensure that the critical counts are collected properly.
cur->set_visited_for_critical_count(false);
}
for (JavaThread *cur = Threads::first(); cur != NULL; cur = cur->next()) {
assert(cur->safepoint_state()->is_running(), "Illegal initial state");
// Clear the visited flag to ensure that the critical counts are collected properly.
cur->set_visited_for_critical_count(false);
}
#endif // ASSERT
if (SafepointTimeout)
safepoint_limit_time = os::javaTimeNanos() + (jlong)SafepointTimeoutDelay * MICROUNITS;
if (SafepointTimeout)
safepoint_limit_time = os::javaTimeNanos() + (jlong)SafepointTimeoutDelay * MICROUNITS;
// Iterate through all threads until it have been determined how to stop them all at a safepoint
unsigned int iterations = 0;
int steps = 0 ;
while(still_running > 0) {
for (JavaThread *cur = Threads::first(); cur != NULL; cur = cur->next()) {
assert(!cur->is_ConcurrentGC_thread(), "A concurrent GC thread is unexpectly being suspended");
ThreadSafepointState *cur_state = cur->safepoint_state();
if (cur_state->is_running()) {
cur_state->examine_state_of_thread();
if (!cur_state->is_running()) {
still_running--;
// consider adjusting steps downward:
// steps = 0
// steps -= NNN
// steps >>= 1
// steps = MIN(steps, 2000-100)
// if (iterations != 0) steps -= NNN
}
if (log_is_enabled(Trace, safepoint)) {
ResourceMark rm;
cur_state->print_on(LogHandle(safepoint)::trace_stream());
// Iterate through all threads until it have been determined how to stop them all at a safepoint
unsigned int iterations = 0;
int steps = 0 ;
while(still_running > 0) {
for (JavaThread *cur = Threads::first(); cur != NULL; cur = cur->next()) {
assert(!cur->is_ConcurrentGC_thread(), "A concurrent GC thread is unexpectly being suspended");
ThreadSafepointState *cur_state = cur->safepoint_state();
if (cur_state->is_running()) {
cur_state->examine_state_of_thread();
if (!cur_state->is_running()) {
still_running--;
// consider adjusting steps downward:
// steps = 0
// steps -= NNN
// steps >>= 1
// steps = MIN(steps, 2000-100)
// if (iterations != 0) steps -= NNN
}
if (log_is_enabled(Trace, safepoint)) {
ResourceMark rm;
cur_state->print_on(LogHandle(safepoint)::trace_stream());
}
}
}
}
if (PrintSafepointStatistics && iterations == 0) {
begin_statistics(nof_threads, still_running);
}
if (still_running > 0) {
// Check for if it takes to long
if (SafepointTimeout && safepoint_limit_time < os::javaTimeNanos()) {
print_safepoint_timeout(_spinning_timeout);
if (iterations == 0) {
initial_running = still_running;
if (PrintSafepointStatistics) {
begin_statistics(nof_threads, still_running);
}
}
// Spin to avoid context switching.
// There's a tension between allowing the mutators to run (and rendezvous)
// vs spinning. As the VM thread spins, wasting cycles, it consumes CPU that
// a mutator might otherwise use profitably to reach a safepoint. Excessive
// spinning by the VM thread on a saturated system can increase rendezvous latency.
// Blocking or yielding incur their own penalties in the form of context switching
// and the resultant loss of $ residency.
//
// Further complicating matters is that yield() does not work as naively expected
// on many platforms -- yield() does not guarantee that any other ready threads
// will run. As such we revert to naked_short_sleep() after some number of iterations.
// nakes_short_sleep() is implemented as a short unconditional sleep.
// Typical operating systems round a "short" sleep period up to 10 msecs, so sleeping
// can actually increase the time it takes the VM thread to detect that a system-wide
// stop-the-world safepoint has been reached. In a pathological scenario such as that
// described in CR6415670 the VMthread may sleep just before the mutator(s) become safe.
// In that case the mutators will be stalled waiting for the safepoint to complete and the
// the VMthread will be sleeping, waiting for the mutators to rendezvous. The VMthread
// will eventually wake up and detect that all mutators are safe, at which point
// we'll again make progress.
//
// Beware too that that the VMThread typically runs at elevated priority.
// Its default priority is higher than the default mutator priority.
// Obviously, this complicates spinning.
//
// Note too that on Windows XP SwitchThreadTo() has quite different behavior than Sleep(0).
// Sleep(0) will _not yield to lower priority threads, while SwitchThreadTo() will.
//
// See the comments in synchronizer.cpp for additional remarks on spinning.
//
// In the future we might:
// 1. Modify the safepoint scheme to avoid potentially unbounded spinning.
// This is tricky as the path used by a thread exiting the JVM (say on
// on JNI call-out) simply stores into its state field. The burden
// is placed on the VM thread, which must poll (spin).
// 2. Find something useful to do while spinning. If the safepoint is GC-related
// we might aggressively scan the stacks of threads that are already safe.
// 3. Use Solaris schedctl to examine the state of the still-running mutators.
// If all the mutators are ONPROC there's no reason to sleep or yield.
// 4. YieldTo() any still-running mutators that are ready but OFFPROC.
// 5. Check system saturation. If the system is not fully saturated then
// simply spin and avoid sleep/yield.
// 6. As still-running mutators rendezvous they could unpark the sleeping
// VMthread. This works well for still-running mutators that become
// safe. The VMthread must still poll for mutators that call-out.
// 7. Drive the policy on time-since-begin instead of iterations.
// 8. Consider making the spin duration a function of the # of CPUs:
// Spin = (((ncpus-1) * M) + K) + F(still_running)
// Alternately, instead of counting iterations of the outer loop
// we could count the # of threads visited in the inner loop, above.
// 9. On windows consider using the return value from SwitchThreadTo()
// to drive subsequent spin/SwitchThreadTo()/Sleep(N) decisions.
if (still_running > 0) {
// Check for if it takes to long
if (SafepointTimeout && safepoint_limit_time < os::javaTimeNanos()) {
print_safepoint_timeout(_spinning_timeout);
}
if (int(iterations) == DeferPollingPageLoopCount) {
guarantee (PageArmed == 0, "invariant") ;
PageArmed = 1 ;
os::make_polling_page_unreadable();
// Spin to avoid context switching.
// There's a tension between allowing the mutators to run (and rendezvous)
// vs spinning. As the VM thread spins, wasting cycles, it consumes CPU that
// a mutator might otherwise use profitably to reach a safepoint. Excessive
// spinning by the VM thread on a saturated system can increase rendezvous latency.
// Blocking or yielding incur their own penalties in the form of context switching
// and the resultant loss of $ residency.
//
// Further complicating matters is that yield() does not work as naively expected
// on many platforms -- yield() does not guarantee that any other ready threads
// will run. As such we revert to naked_short_sleep() after some number of iterations.
// nakes_short_sleep() is implemented as a short unconditional sleep.
// Typical operating systems round a "short" sleep period up to 10 msecs, so sleeping
// can actually increase the time it takes the VM thread to detect that a system-wide
// stop-the-world safepoint has been reached. In a pathological scenario such as that
// described in CR6415670 the VMthread may sleep just before the mutator(s) become safe.
// In that case the mutators will be stalled waiting for the safepoint to complete and the
// the VMthread will be sleeping, waiting for the mutators to rendezvous. The VMthread
// will eventually wake up and detect that all mutators are safe, at which point
// we'll again make progress.
//
// Beware too that that the VMThread typically runs at elevated priority.
// Its default priority is higher than the default mutator priority.
// Obviously, this complicates spinning.
//
// Note too that on Windows XP SwitchThreadTo() has quite different behavior than Sleep(0).
// Sleep(0) will _not yield to lower priority threads, while SwitchThreadTo() will.
//
// See the comments in synchronizer.cpp for additional remarks on spinning.
//
// In the future we might:
// 1. Modify the safepoint scheme to avoid potentially unbounded spinning.
// This is tricky as the path used by a thread exiting the JVM (say on
// on JNI call-out) simply stores into its state field. The burden
// is placed on the VM thread, which must poll (spin).
// 2. Find something useful to do while spinning. If the safepoint is GC-related
// we might aggressively scan the stacks of threads that are already safe.
// 3. Use Solaris schedctl to examine the state of the still-running mutators.
// If all the mutators are ONPROC there's no reason to sleep or yield.
// 4. YieldTo() any still-running mutators that are ready but OFFPROC.
// 5. Check system saturation. If the system is not fully saturated then
// simply spin and avoid sleep/yield.
// 6. As still-running mutators rendezvous they could unpark the sleeping
// VMthread. This works well for still-running mutators that become
// safe. The VMthread must still poll for mutators that call-out.
// 7. Drive the policy on time-since-begin instead of iterations.
// 8. Consider making the spin duration a function of the # of CPUs:
// Spin = (((ncpus-1) * M) + K) + F(still_running)
// Alternately, instead of counting iterations of the outer loop
// we could count the # of threads visited in the inner loop, above.
// 9. On windows consider using the return value from SwitchThreadTo()
// to drive subsequent spin/SwitchThreadTo()/Sleep(N) decisions.
if (int(iterations) == DeferPollingPageLoopCount) {
guarantee (PageArmed == 0, "invariant") ;
PageArmed = 1 ;
os::make_polling_page_unreadable();
}
// Instead of (ncpus > 1) consider either (still_running < (ncpus + EPSILON)) or
// ((still_running + _waiting_to_block - TryingToBlock)) < ncpus)
++steps ;
if (ncpus > 1 && steps < SafepointSpinBeforeYield) {
SpinPause() ; // MP-Polite spin
} else
if (steps < DeferThrSuspendLoopCount) {
os::naked_yield() ;
} else {
os::naked_short_sleep(1);
}
iterations ++ ;
}
// Instead of (ncpus > 1) consider either (still_running < (ncpus + EPSILON)) or
// ((still_running + _waiting_to_block - TryingToBlock)) < ncpus)
++steps ;
if (ncpus > 1 && steps < SafepointSpinBeforeYield) {
SpinPause() ; // MP-Polite spin
} else
if (steps < DeferThrSuspendLoopCount) {
os::naked_yield() ;
} else {
os::naked_short_sleep(1);
}
iterations ++ ;
assert(iterations < (uint)max_jint, "We have been iterating in the safepoint loop too long");
}
assert(iterations < (uint)max_jint, "We have been iterating in the safepoint loop too long");
}
assert(still_running == 0, "sanity check");
assert(still_running == 0, "sanity check");
if (PrintSafepointStatistics) {
update_statistics_on_spin_end();
}
if (PrintSafepointStatistics) {
update_statistics_on_spin_end();
}
if (sync_event.should_commit()) {
sync_event.set_safepointId(safepoint_counter());
sync_event.set_initialThreadCount(initial_running);
sync_event.set_runningThreadCount(_waiting_to_block);
sync_event.set_iterations(iterations);
sync_event.commit();
}
} //EventSafepointStateSync
// wait until all threads are stopped
while (_waiting_to_block > 0) {
log_debug(safepoint)("Waiting for %d thread(s) to block", _waiting_to_block);
if (!SafepointTimeout || timeout_error_printed) {
Safepoint_lock->wait(true); // true, means with no safepoint checks
} else {
// Compute remaining time
jlong remaining_time = safepoint_limit_time - os::javaTimeNanos();
{
EventSafepointWaitBlocked wait_blocked_event;
int initial_waiting_to_block = _waiting_to_block;
// If there is no remaining time, then there is an error
if (remaining_time < 0 || Safepoint_lock->wait(true, remaining_time / MICROUNITS)) {
print_safepoint_timeout(_blocking_timeout);
while (_waiting_to_block > 0) {
log_debug(safepoint)("Waiting for %d thread(s) to block", _waiting_to_block);
if (!SafepointTimeout || timeout_error_printed) {
Safepoint_lock->wait(true); // true, means with no safepoint checks
} else {
// Compute remaining time
jlong remaining_time = safepoint_limit_time - os::javaTimeNanos();
// If there is no remaining time, then there is an error
if (remaining_time < 0 || Safepoint_lock->wait(true, remaining_time / MICROUNITS)) {
print_safepoint_timeout(_blocking_timeout);
}
}
}
}
assert(_waiting_to_block == 0, "sanity check");
assert(_waiting_to_block == 0, "sanity check");
#ifndef PRODUCT
if (SafepointTimeout) {
jlong current_time = os::javaTimeNanos();
if (safepoint_limit_time < current_time) {
tty->print_cr("# SafepointSynchronize: Finished after "
INT64_FORMAT_W(6) " ms",
((current_time - safepoint_limit_time) / MICROUNITS +
(jlong)SafepointTimeoutDelay));
if (SafepointTimeout) {
jlong current_time = os::javaTimeNanos();
if (safepoint_limit_time < current_time) {
tty->print_cr("# SafepointSynchronize: Finished after "
INT64_FORMAT_W(6) " ms",
((current_time - safepoint_limit_time) / MICROUNITS +
(jlong)SafepointTimeoutDelay));
}
}
}
#endif
assert((_safepoint_counter & 0x1) == 0, "must be even");
assert(Threads_lock->owned_by_self(), "must hold Threads_lock");
_safepoint_counter ++;
assert((_safepoint_counter & 0x1) == 0, "must be even");
assert(Threads_lock->owned_by_self(), "must hold Threads_lock");
_safepoint_counter ++;
// Record state
_state = _synchronized;
// Record state
_state = _synchronized;
OrderAccess::fence();
OrderAccess::fence();
if (wait_blocked_event.should_commit()) {
wait_blocked_event.set_safepointId(safepoint_counter());
wait_blocked_event.set_runningThreadCount(initial_waiting_to_block);
wait_blocked_event.commit();
}
} // EventSafepointWaitBlocked
#ifdef ASSERT
for (JavaThread *cur = Threads::first(); cur != NULL; cur = cur->next()) {
@ -378,17 +407,32 @@ void SafepointSynchronize::begin() {
}
// Call stuff that needs to be run when a safepoint is just about to be completed
do_cleanup_tasks();
{
EventSafepointCleanup cleanup_event;
do_cleanup_tasks();
if (cleanup_event.should_commit()) {
cleanup_event.set_safepointId(safepoint_counter());
cleanup_event.commit();
}
}
if (PrintSafepointStatistics) {
// Record how much time spend on the above cleanup tasks
update_statistics_on_cleanup_end(os::javaTimeNanos());
}
if (begin_event.should_commit()) {
begin_event.set_safepointId(safepoint_counter());
begin_event.set_totalThreadCount(nof_threads);
begin_event.set_jniCriticalThreadCount(_current_jni_active_count);
begin_event.commit();
}
}
// Wake up all threads, so they are ready to resume execution after the safepoint
// operation has been carried out
void SafepointSynchronize::end() {
EventSafepointEnd event;
int safepoint_id = safepoint_counter(); // Keep the odd counter as "id"
assert(Threads_lock->owned_by_self(), "must hold Threads_lock");
assert((_safepoint_counter & 0x1) == 1, "must be odd");
@ -475,6 +519,11 @@ void SafepointSynchronize::end() {
// record this time so VMThread can keep track how much time has elapsed
// since last safepoint.
_end_of_last_safepoint = os::javaTimeMillis();
if (event.should_commit()) {
event.set_safepointId(safepoint_id);
event.commit();
}
}
bool SafepointSynchronize::is_cleanup_needed() {
@ -483,44 +532,71 @@ bool SafepointSynchronize::is_cleanup_needed() {
return false;
}
static void event_safepoint_cleanup_task_commit(EventSafepointCleanupTask& event, const char* name) {
if (event.should_commit()) {
event.set_safepointId(SafepointSynchronize::safepoint_counter());
event.set_name(name);
event.commit();
}
}
// Various cleaning tasks that should be done periodically at safepoints
void SafepointSynchronize::do_cleanup_tasks() {
{
TraceTime timer("deflating idle monitors", TRACETIME_LOG(Info, safepointcleanup));
const char* name = "deflating idle monitors";
EventSafepointCleanupTask event;
TraceTime timer(name, TRACETIME_LOG(Info, safepointcleanup));
ObjectSynchronizer::deflate_idle_monitors();
event_safepoint_cleanup_task_commit(event, name);
}
{
TraceTime timer("updating inline caches", TRACETIME_LOG(Info, safepointcleanup));
const char* name = "updating inline caches";
EventSafepointCleanupTask event;
TraceTime timer(name, TRACETIME_LOG(Info, safepointcleanup));
InlineCacheBuffer::update_inline_caches();
event_safepoint_cleanup_task_commit(event, name);
}
{
const char* name = "compilation policy safepoint handler";
EventSafepointCleanupTask event;
TraceTime timer("compilation policy safepoint handler", TRACETIME_LOG(Info, safepointcleanup));
CompilationPolicy::policy()->do_safepoint_work();
event_safepoint_cleanup_task_commit(event, name);
}
{
TraceTime timer("mark nmethods", TRACETIME_LOG(Info, safepointcleanup));
const char* name = "mark nmethods";
EventSafepointCleanupTask event;
TraceTime timer(name, TRACETIME_LOG(Info, safepointcleanup));
NMethodSweeper::mark_active_nmethods();
event_safepoint_cleanup_task_commit(event, name);
}
if (SymbolTable::needs_rehashing()) {
TraceTime timer("rehashing symbol table", TRACETIME_LOG(Info, safepointcleanup));
const char* name = "rehashing symbol table";
EventSafepointCleanupTask event;
TraceTime timer(name, TRACETIME_LOG(Info, safepointcleanup));
SymbolTable::rehash_table();
event_safepoint_cleanup_task_commit(event, name);
}
if (StringTable::needs_rehashing()) {
TraceTime timer("rehashing string table", TRACETIME_LOG(Info, safepointcleanup));
const char* name = "rehashing string table";
EventSafepointCleanupTask event;
TraceTime timer(name, TRACETIME_LOG(Info, safepointcleanup));
StringTable::rehash_table();
event_safepoint_cleanup_task_commit(event, name);
}
{
// CMS delays purging the CLDG until the beginning of the next safepoint and to
// make sure concurrent sweep is done
TraceTime timer("purging class loader data graph", TRACETIME_LOG(Info, safepointcleanup));
const char* name = "purging class loader data graph";
EventSafepointCleanupTask event;
TraceTime timer(name, TRACETIME_LOG(Info, safepointcleanup));
ClassLoaderDataGraph::purge_if_needed();
event_safepoint_cleanup_task_commit(event, name);
}
}