mirror of
https://github.com/openjdk/jdk.git
synced 2025-08-28 07:14:30 +02:00
8166944: Hanging Error Reporting steps may lead to torn error logs
Interupt error reporting if reporting steps hang to enable subsequent reporting steps to run. Reviewed-by: cjplummer, dholmes
This commit is contained in:
parent
a842f84577
commit
8f9584260e
8 changed files with 327 additions and 25 deletions
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved.
|
* Copyright (c) 2003, 2017, Oracle and/or its affiliates. All rights reserved.
|
||||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
*
|
*
|
||||||
* This code is free software; you can redistribute it and/or modify it
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
|
@ -59,6 +59,21 @@ static const int NUM_SIGNALS = sizeof(SIGNALS) / sizeof(int);
|
||||||
static int resettedSigflags[NUM_SIGNALS];
|
static int resettedSigflags[NUM_SIGNALS];
|
||||||
static address resettedSighandler[NUM_SIGNALS];
|
static address resettedSighandler[NUM_SIGNALS];
|
||||||
|
|
||||||
|
// Needed for cancelable steps.
|
||||||
|
static volatile pthread_t reporter_thread_id;
|
||||||
|
|
||||||
|
void VMError::reporting_started() {
|
||||||
|
// record pthread id of reporter thread.
|
||||||
|
reporter_thread_id = ::pthread_self();
|
||||||
|
}
|
||||||
|
|
||||||
|
void VMError::interrupt_reporting_thread() {
|
||||||
|
// We misuse SIGILL here, but it does not really matter. We need
|
||||||
|
// a signal which is handled by crash_handler and not likely to
|
||||||
|
// occurr during error reporting itself.
|
||||||
|
::pthread_kill(reporter_thread_id, SIGILL);
|
||||||
|
}
|
||||||
|
|
||||||
static void save_signal(int idx, int sig)
|
static void save_signal(int idx, int sig)
|
||||||
{
|
{
|
||||||
struct sigaction sa;
|
struct sigaction sa;
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
|
* Copyright (c) 2003, 2017, Oracle and/or its affiliates. All rights reserved.
|
||||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
*
|
*
|
||||||
* This code is free software; you can redistribute it and/or modify it
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
|
@ -66,3 +66,10 @@ void VMError::check_failing_cds_access(outputStream* st, const void* siginfo) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Error reporting cancellation: there is no easy way to implement this on Windows, because we do
|
||||||
|
// not have an easy way to send signals to threads (aka to cause a win32 Exception in another
|
||||||
|
// thread). We would need something like "RaiseException(HANDLE thread)"...
|
||||||
|
void VMError::reporting_started() {}
|
||||||
|
void VMError::interrupt_reporting_thread() {}
|
||||||
|
|
||||||
|
|
|
@ -935,6 +935,9 @@ public:
|
||||||
notproduct(bool, TestSafeFetchInErrorHandler, false, \
|
notproduct(bool, TestSafeFetchInErrorHandler, false, \
|
||||||
"If true, tests SafeFetch inside error handler.") \
|
"If true, tests SafeFetch inside error handler.") \
|
||||||
\
|
\
|
||||||
|
notproduct(bool, TestUnresponsiveErrorHandler, false, \
|
||||||
|
"If true, simulates an unresponsive error handler.") \
|
||||||
|
\
|
||||||
develop(bool, Verbose, false, \
|
develop(bool, Verbose, false, \
|
||||||
"Print additional debugging information from other modes") \
|
"Print additional debugging information from other modes") \
|
||||||
\
|
\
|
||||||
|
|
|
@ -1289,28 +1289,32 @@ void WatcherThread::run() {
|
||||||
if (is_error_reported()) {
|
if (is_error_reported()) {
|
||||||
// A fatal error has happened, the error handler(VMError::report_and_die)
|
// A fatal error has happened, the error handler(VMError::report_and_die)
|
||||||
// should abort JVM after creating an error log file. However in some
|
// should abort JVM after creating an error log file. However in some
|
||||||
// rare cases, the error handler itself might deadlock. Here we try to
|
// rare cases, the error handler itself might deadlock. Here periodically
|
||||||
// kill JVM if the fatal error handler fails to abort in 2 minutes.
|
// check for error reporting timeouts, and if it happens, just proceed to
|
||||||
//
|
// abort the VM.
|
||||||
|
|
||||||
// This code is in WatcherThread because WatcherThread wakes up
|
// This code is in WatcherThread because WatcherThread wakes up
|
||||||
// periodically so the fatal error handler doesn't need to do anything;
|
// periodically so the fatal error handler doesn't need to do anything;
|
||||||
// also because the WatcherThread is less likely to crash than other
|
// also because the WatcherThread is less likely to crash than other
|
||||||
// threads.
|
// threads.
|
||||||
|
|
||||||
for (;;) {
|
for (;;) {
|
||||||
if (!ShowMessageBoxOnError
|
// Note: we use naked sleep in this loop because we want to avoid using
|
||||||
&& (OnError == NULL || OnError[0] == '\0')
|
// any kind of VM infrastructure which may be broken at this point.
|
||||||
&& Arguments::abort_hook() == NULL) {
|
if (VMError::check_timeout()) {
|
||||||
os::sleep(this, (jlong)ErrorLogTimeout * 1000, false); // in seconds
|
// We hit error reporting timeout. Error reporting was interrupted and
|
||||||
|
// will be wrapping things up now (closing files etc). Give it some more
|
||||||
|
// time, then quit the VM.
|
||||||
|
os::naked_short_sleep(200);
|
||||||
|
// Print a message to stderr.
|
||||||
fdStream err(defaultStream::output_fd());
|
fdStream err(defaultStream::output_fd());
|
||||||
err.print_raw_cr("# [ timer expired, abort... ]");
|
err.print_raw_cr("# [ timer expired, abort... ]");
|
||||||
// skip atexit/vm_exit/vm_abort hooks
|
// skip atexit/vm_exit/vm_abort hooks
|
||||||
os::die();
|
os::die();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Wake up 5 seconds later, the fatal handler may reset OnError or
|
// Wait a second, then recheck for timeout.
|
||||||
// ShowMessageBoxOnError when it is ready to abort.
|
os::naked_short_sleep(999);
|
||||||
os::sleep(this, 5 * 1000, false);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved.
|
* Copyright (c) 2003, 2017, Oracle and/or its affiliates. All rights reserved.
|
||||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
*
|
*
|
||||||
* This code is free software; you can redistribute it and/or modify it
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
|
@ -38,6 +38,7 @@
|
||||||
#include "runtime/thread.inline.hpp"
|
#include "runtime/thread.inline.hpp"
|
||||||
#include "runtime/vmThread.hpp"
|
#include "runtime/vmThread.hpp"
|
||||||
#include "runtime/vm_operations.hpp"
|
#include "runtime/vm_operations.hpp"
|
||||||
|
#include "runtime/vm_version.hpp"
|
||||||
#include "services/memTracker.hpp"
|
#include "services/memTracker.hpp"
|
||||||
#include "trace/traceMacros.hpp"
|
#include "trace/traceMacros.hpp"
|
||||||
#include "utilities/debug.hpp"
|
#include "utilities/debug.hpp"
|
||||||
|
@ -312,10 +313,41 @@ static void report_vm_version(outputStream* st, char* buf, int buflen) {
|
||||||
int VMError::_current_step;
|
int VMError::_current_step;
|
||||||
const char* VMError::_current_step_info;
|
const char* VMError::_current_step_info;
|
||||||
|
|
||||||
|
volatile jlong VMError::_reporting_start_time = -1;
|
||||||
|
volatile bool VMError::_reporting_did_timeout = false;
|
||||||
|
volatile jlong VMError::_step_start_time = -1;
|
||||||
|
volatile bool VMError::_step_did_timeout = false;
|
||||||
|
|
||||||
|
// Helper, return current timestamp for timeout handling.
|
||||||
|
jlong VMError::get_current_timestamp() {
|
||||||
|
return os::javaTimeNanos();
|
||||||
|
}
|
||||||
|
// Factor to translate the timestamp to seconds.
|
||||||
|
#define TIMESTAMP_TO_SECONDS_FACTOR (1000 * 1000 * 1000)
|
||||||
|
|
||||||
|
void VMError::record_reporting_start_time() {
|
||||||
|
const jlong now = get_current_timestamp();
|
||||||
|
Atomic::store(now, &_reporting_start_time);
|
||||||
|
}
|
||||||
|
|
||||||
|
jlong VMError::get_reporting_start_time() {
|
||||||
|
return Atomic::load(&_reporting_start_time);
|
||||||
|
}
|
||||||
|
|
||||||
|
void VMError::record_step_start_time() {
|
||||||
|
const jlong now = get_current_timestamp();
|
||||||
|
Atomic::store(now, &_step_start_time);
|
||||||
|
}
|
||||||
|
|
||||||
|
jlong VMError::get_step_start_time() {
|
||||||
|
return Atomic::load(&_step_start_time);
|
||||||
|
}
|
||||||
|
|
||||||
void VMError::report(outputStream* st, bool _verbose) {
|
void VMError::report(outputStream* st, bool _verbose) {
|
||||||
|
|
||||||
# define BEGIN if (_current_step == 0) { _current_step = __LINE__;
|
# define BEGIN if (_current_step == 0) { _current_step = __LINE__;
|
||||||
# define STEP(s) } if (_current_step < __LINE__) { _current_step = __LINE__; _current_step_info = s;
|
# define STEP(s) } if (_current_step < __LINE__) { _current_step = __LINE__; _current_step_info = s; \
|
||||||
|
record_step_start_time(); _step_did_timeout = false;
|
||||||
# define END }
|
# define END }
|
||||||
|
|
||||||
// don't allocate large buffer on stack
|
// don't allocate large buffer on stack
|
||||||
|
@ -352,6 +384,18 @@ void VMError::report(outputStream* st, bool _verbose) {
|
||||||
controlled_crash(TestCrashInErrorHandler);
|
controlled_crash(TestCrashInErrorHandler);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TestUnresponsiveErrorHandler: We want to test both step timeouts and global timeout.
|
||||||
|
// Step to global timeout ratio is 4:1, so in order to be absolutely sure we hit the
|
||||||
|
// global timeout, let's execute the timeout step five times.
|
||||||
|
// See corresponding test in test/runtime/ErrorHandling/TimeoutInErrorHandlingTest.java
|
||||||
|
#define TIMEOUT_TEST_STEP STEP("test unresponsive error reporting step") \
|
||||||
|
if (_verbose && TestUnresponsiveErrorHandler) { os::infinite_sleep(); }
|
||||||
|
TIMEOUT_TEST_STEP
|
||||||
|
TIMEOUT_TEST_STEP
|
||||||
|
TIMEOUT_TEST_STEP
|
||||||
|
TIMEOUT_TEST_STEP
|
||||||
|
TIMEOUT_TEST_STEP
|
||||||
|
|
||||||
STEP("test safefetch in error handler")
|
STEP("test safefetch in error handler")
|
||||||
// test whether it is safe to use SafeFetch32 in Crash Handler. Test twice
|
// test whether it is safe to use SafeFetch32 in Crash Handler. Test twice
|
||||||
// to test that resetting the signal handler works correctly.
|
// to test that resetting the signal handler works correctly.
|
||||||
|
@ -1176,6 +1220,9 @@ void VMError::report_and_die(int id, const char* message, const char* detail_fmt
|
||||||
// first time
|
// first time
|
||||||
set_error_reported();
|
set_error_reported();
|
||||||
|
|
||||||
|
reporting_started();
|
||||||
|
record_reporting_start_time();
|
||||||
|
|
||||||
if (ShowMessageBoxOnError || PauseAtExit) {
|
if (ShowMessageBoxOnError || PauseAtExit) {
|
||||||
show_message_box(buffer, sizeof(buffer));
|
show_message_box(buffer, sizeof(buffer));
|
||||||
|
|
||||||
|
@ -1216,17 +1263,31 @@ void VMError::report_and_die(int id, const char* message, const char* detail_fmt
|
||||||
os::die();
|
os::die();
|
||||||
}
|
}
|
||||||
|
|
||||||
jio_snprintf(buffer, sizeof(buffer),
|
outputStream* const st = log.is_open() ? &log : &out;
|
||||||
"[error occurred during error reporting (%s), id 0x%x]",
|
st->cr();
|
||||||
_current_step_info, _id);
|
|
||||||
if (log.is_open()) {
|
// Timeout handling.
|
||||||
log.cr();
|
if (_step_did_timeout) {
|
||||||
log.print_raw_cr(buffer);
|
// The current step had a timeout. Lets continue reporting with the next step.
|
||||||
log.cr();
|
st->print_raw("[timeout occurred during error reporting in step \"");
|
||||||
|
st->print_raw(_current_step_info);
|
||||||
|
st->print_cr("\"] after " INT64_FORMAT " s.",
|
||||||
|
(get_current_timestamp() - _step_start_time) / TIMESTAMP_TO_SECONDS_FACTOR);
|
||||||
|
} else if (_reporting_did_timeout) {
|
||||||
|
// We hit ErrorLogTimeout. Reporting will stop altogether. Let's wrap things
|
||||||
|
// up, the process is about to be stopped by the WatcherThread.
|
||||||
|
st->print_cr("------ Timeout during error reporting after " INT64_FORMAT " s. ------",
|
||||||
|
(get_current_timestamp() - _reporting_start_time) / TIMESTAMP_TO_SECONDS_FACTOR);
|
||||||
|
st->flush();
|
||||||
|
// Watcherthread is about to call os::die. Lets just wait.
|
||||||
|
os::infinite_sleep();
|
||||||
} else {
|
} else {
|
||||||
out.cr();
|
// Crash or assert during error reporting. Lets continue reporting with the next step.
|
||||||
out.print_raw_cr(buffer);
|
jio_snprintf(buffer, sizeof(buffer),
|
||||||
out.cr();
|
"[error occurred during error reporting (%s), id 0x%x]",
|
||||||
|
_current_step_info, _id);
|
||||||
|
st->print_raw_cr(buffer);
|
||||||
|
st->cr();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1421,3 +1482,50 @@ void VMError::show_message_box(char *buf, int buflen) {
|
||||||
yes = os::start_debugging(buf,buflen);
|
yes = os::start_debugging(buf,buflen);
|
||||||
} while (yes);
|
} while (yes);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Timeout handling: check if a timeout happened (either a single step did
|
||||||
|
// timeout or the whole of error reporting hit ErrorLogTimeout). Interrupt
|
||||||
|
// the reporting thread if that is the case.
|
||||||
|
bool VMError::check_timeout() {
|
||||||
|
|
||||||
|
if (ErrorLogTimeout == 0) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Do not check for timeouts if we still have a message box to show to the
|
||||||
|
// user or if there are OnError handlers to be run.
|
||||||
|
if (ShowMessageBoxOnError
|
||||||
|
|| (OnError != NULL && OnError[0] != '\0')
|
||||||
|
|| Arguments::abort_hook() != NULL) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
const jlong reporting_start_time_l = get_reporting_start_time();
|
||||||
|
const jlong now = get_current_timestamp();
|
||||||
|
// Timestamp is stored in nanos.
|
||||||
|
if (reporting_start_time_l > 0) {
|
||||||
|
const jlong end = reporting_start_time_l + (jlong)ErrorLogTimeout * TIMESTAMP_TO_SECONDS_FACTOR;
|
||||||
|
if (end <= now) {
|
||||||
|
_reporting_did_timeout = true;
|
||||||
|
interrupt_reporting_thread();
|
||||||
|
return true; // global timeout
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const jlong step_start_time_l = get_step_start_time();
|
||||||
|
if (step_start_time_l > 0) {
|
||||||
|
// A step times out after a quarter of the total timeout. Steps are mostly fast unless they
|
||||||
|
// hang for some reason, so this simple rule allows for three hanging step and still
|
||||||
|
// hopefully leaves time enough for the rest of the steps to finish.
|
||||||
|
const jlong end = step_start_time_l + (jlong)ErrorLogTimeout * TIMESTAMP_TO_SECONDS_FACTOR / 4;
|
||||||
|
if (end <= now) {
|
||||||
|
_step_did_timeout = true;
|
||||||
|
interrupt_reporting_thread();
|
||||||
|
return false; // (Not a global timeout)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2003, 2015, Oracle and/or its affiliates. All rights reserved.
|
* Copyright (c) 2003, 2017, Oracle and/or its affiliates. All rights reserved.
|
||||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
*
|
*
|
||||||
* This code is free software; you can redistribute it and/or modify it
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
|
@ -74,6 +74,16 @@ class VMError : public AllStatic {
|
||||||
// no core/minidump has been written to disk
|
// no core/minidump has been written to disk
|
||||||
static char coredump_message[O_BUFLEN];
|
static char coredump_message[O_BUFLEN];
|
||||||
|
|
||||||
|
// Timeout handling:
|
||||||
|
// Timestamp at which error reporting started; -1 if no error reporting in progress.
|
||||||
|
static volatile jlong _reporting_start_time;
|
||||||
|
// Whether or not error reporting did timeout.
|
||||||
|
static volatile bool _reporting_did_timeout;
|
||||||
|
// Timestamp at which the last error reporting step started; -1 if no error reporting
|
||||||
|
// in progress.
|
||||||
|
static volatile jlong _step_start_time;
|
||||||
|
// Whether or not the last error reporting step did timeout.
|
||||||
|
static volatile bool _step_did_timeout;
|
||||||
|
|
||||||
// set signal handlers on Solaris/Linux or the default exception filter
|
// set signal handlers on Solaris/Linux or the default exception filter
|
||||||
// on Windows, to handle recursive crashes.
|
// on Windows, to handle recursive crashes.
|
||||||
|
@ -104,6 +114,20 @@ class VMError : public AllStatic {
|
||||||
static fdStream out;
|
static fdStream out;
|
||||||
static fdStream log; // error log used by VMError::report_and_die()
|
static fdStream log; // error log used by VMError::report_and_die()
|
||||||
|
|
||||||
|
// Timeout handling.
|
||||||
|
// Hook functions for platform dependend functionality:
|
||||||
|
static void reporting_started();
|
||||||
|
static void interrupt_reporting_thread();
|
||||||
|
|
||||||
|
// Helper function to get the current timestamp.
|
||||||
|
static jlong get_current_timestamp();
|
||||||
|
|
||||||
|
// Accessors to get/set the start times for step and total timeout.
|
||||||
|
static void record_reporting_start_time();
|
||||||
|
static jlong get_reporting_start_time();
|
||||||
|
static void record_step_start_time();
|
||||||
|
static jlong get_step_start_time();
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
|
||||||
// return a string to describe the error
|
// return a string to describe the error
|
||||||
|
@ -147,6 +171,11 @@ public:
|
||||||
static bool fatal_error_in_progress() { return first_error_tid != -1; }
|
static bool fatal_error_in_progress() { return first_error_tid != -1; }
|
||||||
|
|
||||||
static intptr_t get_first_error_tid() { return first_error_tid; }
|
static intptr_t get_first_error_tid() { return first_error_tid; }
|
||||||
|
|
||||||
|
// Called by the WatcherThread to check if error reporting has timed-out.
|
||||||
|
// Returns true if error reporting has not completed within the ErrorLogTimeout limit.
|
||||||
|
static bool check_timeout();
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif // SHARE_VM_UTILITIES_VMERROR_HPP
|
#endif // SHARE_VM_UTILITIES_VMERROR_HPP
|
||||||
|
|
|
@ -371,6 +371,7 @@ hotspot_fast_runtime = \
|
||||||
-runtime/ConstantPool/IntfMethod.java \
|
-runtime/ConstantPool/IntfMethod.java \
|
||||||
-runtime/ErrorHandling/CreateCoredumpOnCrash.java \
|
-runtime/ErrorHandling/CreateCoredumpOnCrash.java \
|
||||||
-runtime/ErrorHandling/ErrorHandler.java \
|
-runtime/ErrorHandling/ErrorHandler.java \
|
||||||
|
-runtime/ErrorHandling/TimeoutInErrorHandlingTest.java \
|
||||||
-runtime/logging/MonitorMismatchTest.java \
|
-runtime/logging/MonitorMismatchTest.java \
|
||||||
-runtime/memory/ReserveMemory.java \
|
-runtime/memory/ReserveMemory.java \
|
||||||
-runtime/memory/RunUnitTestsConcurrently.java \
|
-runtime/memory/RunUnitTestsConcurrently.java \
|
||||||
|
|
|
@ -0,0 +1,135 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
|
||||||
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
|
*
|
||||||
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
|
* under the terms of the GNU General Public License version 2 only, as
|
||||||
|
* published by the Free Software Foundation.
|
||||||
|
*
|
||||||
|
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||||
|
* version 2 for more details (a copy is included in the LICENSE file that
|
||||||
|
* accompanied this code).
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License version
|
||||||
|
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||||
|
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
*
|
||||||
|
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||||
|
* or visit www.oracle.com if you need additional information or have any
|
||||||
|
* questions.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.BufferedReader;
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.FileInputStream;
|
||||||
|
import java.io.InputStreamReader;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
|
import jdk.test.lib.process.OutputAnalyzer;
|
||||||
|
import jdk.test.lib.Platform;
|
||||||
|
import jdk.test.lib.process.ProcessTools;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* @test
|
||||||
|
* @bug 8166944
|
||||||
|
* @summary Hanging Error Reporting steps may lead to torn error logs
|
||||||
|
* @modules java.base/jdk.internal.misc
|
||||||
|
* @library /test/lib
|
||||||
|
* @requires (vm.debug == true) & (os.family != "windows")
|
||||||
|
* @author Thomas Stuefe (SAP)
|
||||||
|
*/
|
||||||
|
|
||||||
|
public class TimeoutInErrorHandlingTest {
|
||||||
|
|
||||||
|
|
||||||
|
public static void main(String[] args) throws Exception {
|
||||||
|
|
||||||
|
/* Start the VM and let it crash. Specify TestUnresponsiveErrorHandler which will
|
||||||
|
* let five subsequent error reporting steps hang. The Timeout handling triggered
|
||||||
|
* by the WatcherThread should kick in and interrupt those steps. In theory, the
|
||||||
|
* text "timeout occurred during error reporting in step .." (the little timeouts)
|
||||||
|
* should occur in the error log up to four times, followed by the final big timeout
|
||||||
|
* "------ Timeout during error reporting after xx s. ------"
|
||||||
|
*
|
||||||
|
* Note that there are a number of uncertainties which make writing a 100% foolproof
|
||||||
|
* test challenging. The time the error reporting thread takes to react to the
|
||||||
|
* timeout triggers is unknown. So it is difficult to predict how many little timeouts
|
||||||
|
* will be visible before the big timeout kicks in. Also, once the big timeout hits,
|
||||||
|
* error reporting thread and Watcherthread will race. The former writes his last
|
||||||
|
* message to the error logs and flushes, the latter waits 200ms and then exits the
|
||||||
|
* process without further synchronization with the error reporting thread.
|
||||||
|
*
|
||||||
|
* Because of all this and the desire to write a bullet proof test which does
|
||||||
|
* not fail sporadically, we will not test for the final timeout message nor for all
|
||||||
|
* of the optimally expected little timeout messages. We just test for two of the
|
||||||
|
* little timeout messages to see that repeated timeout handling is basically working.
|
||||||
|
*/
|
||||||
|
|
||||||
|
ProcessBuilder pb = ProcessTools.createJavaProcessBuilder(
|
||||||
|
"-XX:+UnlockDiagnosticVMOptions",
|
||||||
|
"-Xmx100M",
|
||||||
|
"-XX:ErrorHandlerTest=14",
|
||||||
|
"-XX:+TestUnresponsiveErrorHandler",
|
||||||
|
"-XX:ErrorLogTimeout=16", // 16 seconds big timeout = 4 seconds per little timeout
|
||||||
|
"-XX:-CreateCoredumpOnCrash",
|
||||||
|
"-version");
|
||||||
|
|
||||||
|
OutputAnalyzer output_detail = new OutputAnalyzer(pb.start());
|
||||||
|
|
||||||
|
// we should have crashed with a SIGSEGV
|
||||||
|
output_detail.shouldMatch("# A fatal error has been detected by the Java Runtime Environment:.*");
|
||||||
|
output_detail.shouldMatch("# +(?:SIGSEGV|EXCEPTION_ACCESS_VIOLATION).*");
|
||||||
|
|
||||||
|
// VM should have been aborted by WatcherThread
|
||||||
|
output_detail.shouldMatch(".*timer expired, abort.*");
|
||||||
|
|
||||||
|
// extract hs-err file
|
||||||
|
String hs_err_file = output_detail.firstMatch("# *(\\S*hs_err_pid\\d+\\.log)", 1);
|
||||||
|
if (hs_err_file == null) {
|
||||||
|
throw new RuntimeException("Did not find hs-err file in output.\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
File f = new File(hs_err_file);
|
||||||
|
if (!f.exists()) {
|
||||||
|
throw new RuntimeException("hs-err file missing at "
|
||||||
|
+ f.getAbsolutePath() + ".\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
System.out.println("Found hs_err file. Scanning...");
|
||||||
|
|
||||||
|
FileInputStream fis = new FileInputStream(f);
|
||||||
|
BufferedReader br = new BufferedReader(new InputStreamReader(fis));
|
||||||
|
String line = null;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Pattern [] pattern = new Pattern[] {
|
||||||
|
Pattern.compile(".*timeout occurred during error reporting in step.*"),
|
||||||
|
Pattern.compile(".*timeout occurred during error reporting in step.*")
|
||||||
|
};
|
||||||
|
int currentPattern = 0;
|
||||||
|
|
||||||
|
String lastLine = null;
|
||||||
|
while ((line = br.readLine()) != null) {
|
||||||
|
if (currentPattern < pattern.length) {
|
||||||
|
if (pattern[currentPattern].matcher(line).matches()) {
|
||||||
|
System.out.println("Found: " + line + ".");
|
||||||
|
currentPattern ++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
lastLine = line;
|
||||||
|
}
|
||||||
|
br.close();
|
||||||
|
|
||||||
|
if (currentPattern < pattern.length) {
|
||||||
|
throw new RuntimeException("hs-err file incomplete (first missing pattern: " + currentPattern + ")");
|
||||||
|
}
|
||||||
|
|
||||||
|
System.out.println("OK.");
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue