8293711: Factor out size parsing functions from arguments.cpp

Reviewed-by: dholmes, jsjolen
This commit is contained in:
Thomas Stuefe 2022-10-17 12:44:51 +00:00
parent 5d273b9f04
commit ec2981b83b
6 changed files with 339 additions and 110 deletions

View file

@ -41,7 +41,6 @@
#include "logging/logStream.hpp" #include "logging/logStream.hpp"
#include "logging/logTag.hpp" #include "logging/logTag.hpp"
#include "memory/allocation.inline.hpp" #include "memory/allocation.inline.hpp"
#include "metaprogramming/enableIf.hpp"
#include "oops/instanceKlass.hpp" #include "oops/instanceKlass.hpp"
#include "oops/oop.inline.hpp" #include "oops/oop.inline.hpp"
#include "prims/jvmtiExport.hpp" #include "prims/jvmtiExport.hpp"
@ -62,6 +61,7 @@
#include "utilities/debug.hpp" #include "utilities/debug.hpp"
#include "utilities/defaultStream.hpp" #include "utilities/defaultStream.hpp"
#include "utilities/macros.hpp" #include "utilities/macros.hpp"
#include "utilities/parseInteger.hpp"
#include "utilities/powerOfTwo.hpp" #include "utilities/powerOfTwo.hpp"
#include "utilities/stringUtils.hpp" #include "utilities/stringUtils.hpp"
#if INCLUDE_JFR #if INCLUDE_JFR
@ -745,115 +745,6 @@ bool Arguments::verify_special_jvm_flags(bool check_globals) {
} }
#endif #endif
template <typename T, ENABLE_IF(std::is_signed<T>::value), ENABLE_IF(sizeof(T) == 4)> // signed 32-bit
static bool parse_integer_impl(const char *s, char **endptr, int base, T* result) {
// Don't use strtol -- on 64-bit builds, "long" could be either 32- or 64-bits
// so the range tests could be tautological and might cause compiler warnings.
STATIC_ASSERT(sizeof(long long) >= 8); // C++ specification
errno = 0; // errno is thread safe
long long v = strtoll(s, endptr, base);
if (errno != 0 || v < min_jint || v > max_jint) {
return false;
}
*result = static_cast<T>(v);
return true;
}
template <typename T, ENABLE_IF(!std::is_signed<T>::value), ENABLE_IF(sizeof(T) == 4)> // unsigned 32-bit
static bool parse_integer_impl(const char *s, char **endptr, int base, T* result) {
if (s[0] == '-') {
return false;
}
// Don't use strtoul -- same reason as above.
STATIC_ASSERT(sizeof(unsigned long long) >= 8); // C++ specification
errno = 0; // errno is thread safe
unsigned long long v = strtoull(s, endptr, base);
if (errno != 0 || v > max_juint) {
return false;
}
*result = static_cast<T>(v);
return true;
}
template <typename T, ENABLE_IF(std::is_signed<T>::value), ENABLE_IF(sizeof(T) == 8)> // signed 64-bit
static bool parse_integer_impl(const char *s, char **endptr, int base, T* result) {
errno = 0; // errno is thread safe
*result = strtoll(s, endptr, base);
return errno == 0;
}
template <typename T, ENABLE_IF(!std::is_signed<T>::value), ENABLE_IF(sizeof(T) == 8)> // unsigned 64-bit
static bool parse_integer_impl(const char *s, char **endptr, int base, T* result) {
if (s[0] == '-') {
return false;
}
errno = 0; // errno is thread safe
*result = strtoull(s, endptr, base);
return errno == 0;
}
template<typename T>
static bool multiply_by_1k(T& n) {
if (n >= std::numeric_limits<T>::min() / 1024 &&
n <= std::numeric_limits<T>::max() / 1024) {
n *= 1024;
return true;
} else {
return false;
}
}
// All of the integral types that can be used for command line options:
// int, uint, intx, uintx, uint64_t, size_t
//
// In all supported platforms, these types can be mapped to only 4 native types:
// {signed, unsigned} x {32-bit, 64-bit}
//
// We use SFINAE to pick the correct parse_integer_impl() function
template<typename T>
static bool parse_integer(const char *s, T* result) {
if (!isdigit(s[0]) && s[0] != '-') {
// strtoll/strtoull may allow leading spaces. Forbid it.
return false;
}
T n = 0;
bool is_hex = (s[0] == '0' && (s[1] == 'x' || s[1] == 'X')) ||
(s[0] == '-' && s[1] == '0' && (s[2] == 'x' || s[3] == 'X'));
char* remainder;
if (!parse_integer_impl(s, &remainder, (is_hex ? 16 : 10), &n)) {
return false;
}
// Fail if no number was read at all or if the remainder contains more than a single non-digit character.
if (remainder == s || strlen(remainder) > 1) {
return false;
}
switch (*remainder) {
case 'T': case 't':
if (!multiply_by_1k(n)) return false;
// fall-through
case 'G': case 'g':
if (!multiply_by_1k(n)) return false;
// fall-through
case 'M': case 'm':
if (!multiply_by_1k(n)) return false;
// fall-through
case 'K': case 'k':
if (!multiply_by_1k(n)) return false;
break;
case '\0':
break;
default:
return false;
}
*result = n;
return true;
}
bool Arguments::atojulong(const char *s, julong* result) { bool Arguments::atojulong(const char *s, julong* result) {
return parse_integer(s, result); return parse_integer(s, result);
} }

View file

@ -110,9 +110,11 @@ class oopDesc;
// Format 32-bit quantities. // Format 32-bit quantities.
#define INT32_FORMAT "%" PRId32 #define INT32_FORMAT "%" PRId32
#define INT32_FORMAT_X "0x%" PRIx32
#define INT32_FORMAT_X_0 "0x%08" PRIx32 #define INT32_FORMAT_X_0 "0x%08" PRIx32
#define INT32_FORMAT_W(width) "%" #width PRId32 #define INT32_FORMAT_W(width) "%" #width PRId32
#define UINT32_FORMAT "%" PRIu32 #define UINT32_FORMAT "%" PRIu32
#define UINT32_FORMAT_X "0x%" PRIx32
#define UINT32_FORMAT_X_0 "0x%08" PRIx32 #define UINT32_FORMAT_X_0 "0x%08" PRIx32
#define UINT32_FORMAT_W(width) "%" #width PRIu32 #define UINT32_FORMAT_W(width) "%" #width PRIu32

View file

@ -0,0 +1,173 @@
/*
* Copyright (c) 1997, 2022, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2022 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#ifndef SHARE_UTILITIES_PARSE_INTEGER_HPP
#define SHARE_UTILITIES_PARSE_INTEGER_HPP
#include "metaprogramming/enableIf.hpp"
#include "utilities/debug.hpp"
#include "utilities/globalDefinitions.hpp"
#include "utilities/macros.hpp"
#include <errno.h>
#include <limits>
#include <stdlib.h>
// *************************************************************************
// ** Attention compatibility! **
// ** These functions are used to parse JVM arguments (-XX). Be careful **
// ** with behavioral changes here. **
// *************************************************************************
template <typename T, ENABLE_IF(std::is_signed<T>::value), ENABLE_IF(sizeof(T) == 4)> // signed 32-bit
inline bool parse_integer_impl(const char *s, char **endptr, int base, T* result) {
// Don't use strtol -- on 64-bit builds, "long" could be either 32- or 64-bits
// so the range tests could be tautological and might cause compiler warnings.
STATIC_ASSERT(sizeof(long long) >= 8); // C++ specification
errno = 0; // errno is thread safe
long long v = strtoll(s, endptr, base);
if (errno != 0 || v < min_jint || v > max_jint) {
return false;
}
*result = static_cast<T>(v);
return true;
}
template <typename T, ENABLE_IF(!std::is_signed<T>::value), ENABLE_IF(sizeof(T) == 4)> // unsigned 32-bit
inline bool parse_integer_impl(const char *s, char **endptr, int base, T* result) {
if (s[0] == '-') {
return false;
}
// Don't use strtoul -- same reason as above.
STATIC_ASSERT(sizeof(unsigned long long) >= 8); // C++ specification
errno = 0; // errno is thread safe
unsigned long long v = strtoull(s, endptr, base);
if (errno != 0 || v > max_juint) {
return false;
}
*result = static_cast<T>(v);
return true;
}
template <typename T, ENABLE_IF(std::is_signed<T>::value), ENABLE_IF(sizeof(T) == 8)> // signed 64-bit
inline bool parse_integer_impl(const char *s, char **endptr, int base, T* result) {
errno = 0; // errno is thread safe
*result = strtoll(s, endptr, base);
return errno == 0;
}
template <typename T, ENABLE_IF(!std::is_signed<T>::value), ENABLE_IF(sizeof(T) == 8)> // unsigned 64-bit
inline bool parse_integer_impl(const char *s, char **endptr, int base, T* result) {
if (s[0] == '-') {
return false;
}
errno = 0; // errno is thread safe
*result = strtoull(s, endptr, base);
return errno == 0;
}
// Helper for parse_memory_size
template<typename T>
inline bool multiply_by_1k(T& n) {
if (n >= std::numeric_limits<T>::min() / 1024 &&
n <= std::numeric_limits<T>::max() / 1024) {
n *= 1024;
return true;
} else {
return false;
}
}
// Parses a memory size in the form "<number>[<unit>]" with valid units being
// "k", "K", "m", "M", "g", "G", "t", "T". Unit omitted means bytes. If unit is given,
// no space is allowed between number and unit. Number can be in either decimal form
// or in hexadecimal form, the latter must start with "0x".
//
// Valid template arguments for T are signed/unsigned 32/64-bit values.
//
// This function will parse until it encounters unparseable parts, then
// stop. If it read no valid memory size, it will fail.
//
// Example: "1024M:oom" will yield true, result=1G, endptr pointing to ":oom"
template<typename T>
static bool parse_integer(const char *s, char **endptr, T* result) {
if (!isdigit(s[0]) && s[0] != '-') {
// strtoll/strtoull may allow leading spaces. Forbid it.
return false;
}
T n = 0;
bool is_hex = (s[0] == '0' && (s[1] == 'x' || s[1] == 'X')) ||
(s[0] == '-' && s[1] == '0' && (s[2] == 'x' || s[3] == 'X'));
char* remainder;
if (!parse_integer_impl<T>(s, &remainder, (is_hex ? 16 : 10), &n)) {
return false;
}
// Nothing parsed? That is an error too.
if (remainder == s) {
return false;
}
switch (*remainder) {
case 'T': case 't':
if (!multiply_by_1k(n)) return false;
// fall-through
case 'G': case 'g':
if (!multiply_by_1k(n)) return false;
// fall-through
case 'M': case 'm':
if (!multiply_by_1k(n)) return false;
// fall-through
case 'K': case 'k':
if (!multiply_by_1k(n)) return false;
remainder ++; // shave off parsed unit char
break;
default:
// nothing. Return remainder unparsed.
break;
};
*result = n;
*endptr = remainder;
return true;
}
// Same as parse_integer(const char *s, char **endptr, T* result), but does not allow unrecognizable
// characters. No remainder are allowed here.
// Example: "100m" - okay, "100m:oom" -> not okay
template<typename T>
static bool parse_integer(const char *s, T* result) {
char* remainder;
bool rc = parse_integer(s, &remainder, result);
rc = rc && (*remainder == '\0');
return rc;
}
#endif // SHARE_UTILITIES_PARSE_INTEGER_HPP

View file

@ -56,4 +56,6 @@ public:
#define ASSERT_ALIGN(p, n) ASSERT_TRUE(is_aligned(p, n)) #define ASSERT_ALIGN(p, n) ASSERT_TRUE(is_aligned(p, n))
#define LOG_HERE(s, ...) { printf(s, __VA_ARGS__); printf("\n"); fflush(stdout); }
#endif // TESTUTILS_HPP #endif // TESTUTILS_HPP

View file

@ -240,10 +240,12 @@ TEST(globalDefinitions, format_specifiers) {
check_format(UINT16_FORMAT_X_0, (uint16_t)0x0123u, "0x0123"); check_format(UINT16_FORMAT_X_0, (uint16_t)0x0123u, "0x0123");
check_format(INT32_FORMAT, 123, "123"); check_format(INT32_FORMAT, 123, "123");
check_format(INT32_FORMAT_X, 0x123, "0x123");
check_format(INT32_FORMAT_X_0, 0x123, "0x00000123"); check_format(INT32_FORMAT_X_0, 0x123, "0x00000123");
check_format(INT32_FORMAT_W(5), 123, " 123"); check_format(INT32_FORMAT_W(5), 123, " 123");
check_format(INT32_FORMAT_W(-5), 123, "123 "); check_format(INT32_FORMAT_W(-5), 123, "123 ");
check_format(UINT32_FORMAT, 123u, "123"); check_format(UINT32_FORMAT, 123u, "123");
check_format(UINT32_FORMAT_X, 0x123u, "0x123");
check_format(UINT32_FORMAT_X_0, 0x123u, "0x00000123"); check_format(UINT32_FORMAT_X_0, 0x123u, "0x00000123");
check_format(UINT32_FORMAT_W(5), 123u, " 123"); check_format(UINT32_FORMAT_W(5), 123u, " 123");
check_format(UINT32_FORMAT_W(-5), 123u, "123 "); check_format(UINT32_FORMAT_W(-5), 123u, "123 ");

View file

@ -0,0 +1,159 @@
/*
* Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2022 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
#include "precompiled.hpp"
#include "jvm_io.h"
#include "utilities/globalDefinitions.hpp"
#include "utilities/macros.hpp"
#include "utilities/ostream.hpp"
#include "utilities/parseInteger.hpp"
#include "testutils.hpp"
#include "unittest.hpp"
template <typename T> const char* type_name();
template <> const char* type_name<uint64_t>() { return "uint64_t"; }
template <> const char* type_name<uint32_t>() { return "uint32_t"; }
template <> const char* type_name<int64_t>() { return "int64_t"; }
template <> const char* type_name<int32_t>() { return "int32_t"; }
//#define LOG(s, ...) LOG_HERE(s, __VA_ARGS__)
#define LOG(s, ...)
template <typename T>
static void do_test_valid(T expected_value, const char* pattern) {
LOG("%s: \"%s\", expect: " UINT64_FORMAT "(" UINT64_FORMAT_X ")", type_name<T>(), pattern,
(uint64_t)expected_value, (uint64_t)expected_value);
T value = 17;
char* end = nullptr;
stringStream ss;
ss.print_raw(pattern);
bool rc = parse_integer(ss.base(), &end, &value);
ASSERT_TRUE(rc);
ASSERT_EQ(value, expected_value);
rc = parse_integer(ss.base(), &value);
ASSERT_TRUE(rc);
ASSERT_EQ(value, expected_value);
// Now test with a trailing pattern.
// parse_memory_size() should return remainder pointer,
// parse_argument_memory_size() should flatly refuse to parse this.
ss.print(":-)");
rc = parse_integer(ss.base(), &end, &value);
ASSERT_TRUE(rc);
ASSERT_EQ(value, expected_value);
ASSERT_EQ(end, ss.base() + strlen(pattern));
ASSERT_EQ(strcmp(end, ":-)"), 0);
rc = parse_integer(ss.base(), &value);
ASSERT_FALSE(rc);
}
template <typename T>
static void test_valid(T value, bool hex, T scale, const char* unit) {
if ((std::numeric_limits<T>::max() / scale) >= value) {
T expected_result = value * scale;
stringStream ss;
if (hex) {
ss.print(UINT64_FORMAT_X "%s", (uint64_t)value, unit); // e.g. "0xFFFF"
} else {
ss.print(UINT64_FORMAT "%s", (uint64_t)value, unit); // e.g. "65535"
}
do_test_valid((T)expected_result, ss.base());
}
}
template <typename T>
static void test_valid_all_units(T value, bool hex) {
test_valid(value, hex, (T)1, "");
test_valid(value, hex, (T)K, "k");
test_valid(value, hex, (T)K, "K");
test_valid(value, hex, (T)M, "m");
test_valid(value, hex, (T)M, "M");
test_valid(value, hex, (T)G, "g");
test_valid(value, hex, (T)G, "G");
if (sizeof(T) > 4) {
test_valid(value, hex, (T)((uint64_t)G * 1024), "t");
test_valid(value, hex, (T)((uint64_t)G * 1024), "T");
}
}
template <typename T>
static void test_valid_all_power_of_twos() {
for (int hex = 0; hex < 3; hex ++) {
for (T i = 1; i != 0; i <<= 2) {
test_valid_all_units(i - 1, hex == 1);
test_valid_all_units(i, hex == 1);
test_valid_all_units(i + 1, hex == 1);
}
}
}
TEST(ParseMemorySize, positives) {
test_valid_all_power_of_twos<uint64_t>();
test_valid_all_power_of_twos<uint32_t>();
test_valid_all_power_of_twos<int64_t>();
test_valid_all_power_of_twos<int32_t>();
}
// Test invalids.
// Note that parse_argument_memory_size is more restrictive than parse_memory_size, because
// the latter accepts trailing content.
static void do_test_invalid_both(const char* pattern) {
uint64_t value = 4711;
char* end = nullptr;
LOG("%s\n", pattern);
bool rc = parse_integer(pattern, &end, &value);
EXPECT_FALSE(rc);
rc = parse_integer(pattern, &value);
EXPECT_FALSE(rc);
}
static void do_test_invalid_for_parse_arguments(const char* pattern) {
uint64_t value = 4711;
char* end = nullptr;
LOG("%s\n", pattern);
// The first overload parses until unrecognized chars are encountered, then
// returns pointer to string remainder.
bool rc = parse_integer(pattern, &end, &value);
ASSERT_TRUE(rc);
// The second overload parses everything; unrecognized chars will make it fail.
rc = parse_integer(pattern, &value);
ASSERT_FALSE(rc);
}
TEST(ParseMemorySize, negatives_both) {
do_test_invalid_both("");
do_test_invalid_both("abc");
do_test_invalid_for_parse_arguments("100 M"); // parse_memory_size would see "100", parse_argument_memory_size would reject it
do_test_invalid_for_parse_arguments("100X"); // parse_memory_size would see "100", parse_argument_memory_size would reject it
}