8293711: Factor out size parsing functions from arguments.cpp

Reviewed-by: dholmes, jsjolen
2025-09-17 17:44:40 +02:00 · 2022-10-17 12:44:51 +00:00 · 2022-10-17 12:44:51 +00:00 · ec2981b83b
commit ec2981b83b
parent 5d273b9f04
6 changed files with 339 additions and 110 deletions
--- a/src/hotspot/share/runtime/arguments.cpp
+++ b/src/hotspot/share/runtime/arguments.cpp
@ -41,7 +41,6 @@
 #include "logging/logStream.hpp"
 #include "logging/logTag.hpp"
 #include "memory/allocation.inline.hpp"
 #include "metaprogramming/enableIf.hpp"
 #include "oops/instanceKlass.hpp"
 #include "oops/oop.inline.hpp"
 #include "prims/jvmtiExport.hpp"
@ -62,6 +61,7 @@
 #include "utilities/debug.hpp"
 #include "utilities/defaultStream.hpp"
 #include "utilities/macros.hpp"
 #include "utilities/parseInteger.hpp"
 #include "utilities/powerOfTwo.hpp"
 #include "utilities/stringUtils.hpp"
 #if INCLUDE_JFR
@ -745,115 +745,6 @@ bool Arguments::verify_special_jvm_flags(bool check_globals) {
 }
 #endif
 template <typename T, ENABLE_IF(std::is_signed<T>::value), ENABLE_IF(sizeof(T) == 4)> // signed 32-bit
 static bool parse_integer_impl(const char *s, char **endptr, int base, T* result) {
  // Don't use strtol -- on 64-bit builds, "long" could be either 32- or 64-bits
  // so the range tests could be tautological and might cause compiler warnings.
  STATIC_ASSERT(sizeof(long long) >= 8); // C++ specification
  errno = 0; // errno is thread safe
  long long v = strtoll(s, endptr, base);
  if (errno != 0 || v < min_jint || v > max_jint) {
    return false;
  }
  *result = static_cast<T>(v);
  return true;
 }
 template <typename T, ENABLE_IF(!std::is_signed<T>::value), ENABLE_IF(sizeof(T) == 4)> // unsigned 32-bit
 static bool parse_integer_impl(const char *s, char **endptr, int base, T* result) {
  if (s[0] == '-') {
    return false;
  }
  // Don't use strtoul -- same reason as above.
  STATIC_ASSERT(sizeof(unsigned long long) >= 8); // C++ specification
  errno = 0; // errno is thread safe
  unsigned long long v = strtoull(s, endptr, base);
  if (errno != 0 || v > max_juint) {
    return false;
  }
  *result = static_cast<T>(v);
  return true;
 }
 template <typename T, ENABLE_IF(std::is_signed<T>::value), ENABLE_IF(sizeof(T) == 8)> // signed 64-bit
 static bool parse_integer_impl(const char *s, char **endptr, int base, T* result) {
  errno = 0; // errno is thread safe
  *result = strtoll(s, endptr, base);
  return errno == 0;
 }
 template <typename T, ENABLE_IF(!std::is_signed<T>::value), ENABLE_IF(sizeof(T) == 8)> // unsigned 64-bit
 static bool parse_integer_impl(const char *s, char **endptr, int base, T* result) {
  if (s[0] == '-') {
    return false;
  }
  errno = 0; // errno is thread safe
  *result = strtoull(s, endptr, base);
  return errno == 0;
 }
 template<typename T>
 static bool multiply_by_1k(T& n) {
  if (n >= std::numeric_limits<T>::min() / 1024 &&
      n <= std::numeric_limits<T>::max() / 1024) {
    n *= 1024;
    return true;
  } else {
    return false;
  }
 }
 // All of the integral types that can be used for command line options:
 //   int, uint, intx, uintx, uint64_t, size_t
 //
 // In all supported platforms, these types can be mapped to only 4 native types:
 //    {signed, unsigned} x {32-bit, 64-bit}
 //
 // We use SFINAE to pick the correct parse_integer_impl() function
 template<typename T>
 static bool parse_integer(const char *s, T* result) {
  if (!isdigit(s[0]) && s[0] != '-') {
    // strtoll/strtoull may allow leading spaces. Forbid it.
    return false;
  }
  T n = 0;
  bool is_hex = (s[0] == '0' && (s[1] == 'x' || s[1] == 'X')) ||
                (s[0] == '-' && s[1] == '0' && (s[2] == 'x' || s[3] == 'X'));
  char* remainder;
  if (!parse_integer_impl(s, &remainder, (is_hex ? 16 : 10), &n)) {
    return false;
  }
  // Fail if no number was read at all or if the remainder contains more than a single non-digit character.
  if (remainder == s || strlen(remainder) > 1) {
    return false;
  }
  switch (*remainder) {
    case 'T': case 't':
      if (!multiply_by_1k(n)) return false;
      // fall-through
    case 'G': case 'g':
      if (!multiply_by_1k(n)) return false;
      // fall-through
    case 'M': case 'm':
      if (!multiply_by_1k(n)) return false;
      // fall-through
    case 'K': case 'k':
      if (!multiply_by_1k(n)) return false;
      break;
    case '\0':
      break;
    default:
      return false;
  }
  *result = n;
  return true;
 }
 bool Arguments::atojulong(const char *s, julong* result) {
  return parse_integer(s, result);
 }
--- a/src/hotspot/share/utilities/globalDefinitions.hpp
+++ b/src/hotspot/share/utilities/globalDefinitions.hpp
@ -110,9 +110,11 @@ class oopDesc;
 // Format 32-bit quantities.
 #define INT32_FORMAT             "%"          PRId32
 #define INT32_FORMAT_X           "0x%"        PRIx32
 #define INT32_FORMAT_X_0         "0x%08"      PRIx32
 #define INT32_FORMAT_W(width)    "%"   #width PRId32
 #define UINT32_FORMAT            "%"          PRIu32
 #define UINT32_FORMAT_X          "0x%"        PRIx32
 #define UINT32_FORMAT_X_0        "0x%08"      PRIx32
 #define UINT32_FORMAT_W(width)   "%"   #width PRIu32
--- a/src/hotspot/share/utilities/parseInteger.hpp
+++ b/src/hotspot/share/utilities/parseInteger.hpp
@ -0,0 +1,173 @@
 /*
 * Copyright (c) 1997, 2022, Oracle and/or its affiliates. All rights reserved.
 * Copyright (c) 2022 SAP SE. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 *
 */
 #ifndef SHARE_UTILITIES_PARSE_INTEGER_HPP
 #define SHARE_UTILITIES_PARSE_INTEGER_HPP
 #include "metaprogramming/enableIf.hpp"
 #include "utilities/debug.hpp"
 #include "utilities/globalDefinitions.hpp"
 #include "utilities/macros.hpp"
 #include <errno.h>
 #include <limits>
 #include <stdlib.h>
 // *************************************************************************
 // ** Attention compatibility!                                            **
 // ** These functions are used to parse JVM arguments (-XX). Be careful   **
 // ** with behavioral changes here.                                       **
 // *************************************************************************
 template <typename T, ENABLE_IF(std::is_signed<T>::value), ENABLE_IF(sizeof(T) == 4)> // signed 32-bit
 inline bool parse_integer_impl(const char *s, char **endptr, int base, T* result) {
  // Don't use strtol -- on 64-bit builds, "long" could be either 32- or 64-bits
  // so the range tests could be tautological and might cause compiler warnings.
  STATIC_ASSERT(sizeof(long long) >= 8); // C++ specification
  errno = 0; // errno is thread safe
  long long v = strtoll(s, endptr, base);
  if (errno != 0 || v < min_jint || v > max_jint) {
    return false;
  }
  *result = static_cast<T>(v);
  return true;
 }
 template <typename T, ENABLE_IF(!std::is_signed<T>::value), ENABLE_IF(sizeof(T) == 4)> // unsigned 32-bit
 inline bool parse_integer_impl(const char *s, char **endptr, int base, T* result) {
  if (s[0] == '-') {
    return false;
  }
  // Don't use strtoul -- same reason as above.
  STATIC_ASSERT(sizeof(unsigned long long) >= 8); // C++ specification
  errno = 0; // errno is thread safe
  unsigned long long v = strtoull(s, endptr, base);
  if (errno != 0 || v > max_juint) {
    return false;
  }
  *result = static_cast<T>(v);
  return true;
 }
 template <typename T, ENABLE_IF(std::is_signed<T>::value), ENABLE_IF(sizeof(T) == 8)> // signed 64-bit
 inline bool parse_integer_impl(const char *s, char **endptr, int base, T* result) {
  errno = 0; // errno is thread safe
  *result = strtoll(s, endptr, base);
  return errno == 0;
 }
 template <typename T, ENABLE_IF(!std::is_signed<T>::value), ENABLE_IF(sizeof(T) == 8)> // unsigned 64-bit
 inline bool parse_integer_impl(const char *s, char **endptr, int base, T* result) {
  if (s[0] == '-') {
    return false;
  }
  errno = 0; // errno is thread safe
  *result = strtoull(s, endptr, base);
  return errno == 0;
 }
 // Helper for parse_memory_size
 template<typename T>
 inline bool multiply_by_1k(T& n) {
  if (n >= std::numeric_limits<T>::min() / 1024 &&
      n <= std::numeric_limits<T>::max() / 1024) {
    n *= 1024;
    return true;
  } else {
    return false;
  }
 }
 // Parses a memory size in the form "<number>[<unit>]" with valid units being
 // "k", "K", "m", "M", "g", "G", "t", "T". Unit omitted means bytes. If unit is given,
 // no space is allowed between number and unit. Number can be in either decimal form
 // or in hexadecimal form, the latter must start with "0x".
 //
 // Valid template arguments for T are signed/unsigned 32/64-bit values.
 //
 // This function will parse until it encounters unparseable parts, then
 // stop. If it read no valid memory size, it will fail.
 //
 // Example: "1024M:oom" will yield true, result=1G, endptr pointing to ":oom"
 template<typename T>
 static bool parse_integer(const char *s, char **endptr, T* result) {
  if (!isdigit(s[0]) && s[0] != '-') {
    // strtoll/strtoull may allow leading spaces. Forbid it.
    return false;
  }
  T n = 0;
  bool is_hex = (s[0] == '0' && (s[1] == 'x' || s[1] == 'X')) ||
                (s[0] == '-' && s[1] == '0' && (s[2] == 'x' || s[3] == 'X'));
  char* remainder;
  if (!parse_integer_impl<T>(s, &remainder, (is_hex ? 16 : 10), &n)) {
    return false;
  }
  // Nothing parsed? That is an error too.
  if (remainder == s) {
    return false;
  }
  switch (*remainder) {
    case 'T': case 't':
      if (!multiply_by_1k(n)) return false;
      // fall-through
    case 'G': case 'g':
      if (!multiply_by_1k(n)) return false;
      // fall-through
    case 'M': case 'm':
      if (!multiply_by_1k(n)) return false;
      // fall-through
    case 'K': case 'k':
      if (!multiply_by_1k(n)) return false;
      remainder ++; // shave off parsed unit char
      break;
    default:
      // nothing. Return remainder unparsed.
      break;
  };
  *result = n;
  *endptr = remainder;
  return true;
 }
 // Same as parse_integer(const char *s, char **endptr, T* result), but does not allow unrecognizable
 // characters. No remainder are allowed here.
 // Example: "100m" - okay, "100m:oom" -> not okay
 template<typename T>
 static bool parse_integer(const char *s, T* result) {
  char* remainder;
  bool rc = parse_integer(s, &remainder, result);
  rc = rc && (*remainder == '\0');
  return rc;
 }
 #endif // SHARE_UTILITIES_PARSE_INTEGER_HPP
--- a/test/hotspot/gtest/testutils.hpp
+++ b/test/hotspot/gtest/testutils.hpp
@ -56,4 +56,6 @@ public:
 #define ASSERT_ALIGN(p, n) ASSERT_TRUE(is_aligned(p, n))
 #define LOG_HERE(s, ...) { printf(s, __VA_ARGS__); printf("\n"); fflush(stdout); }
 #endif // TESTUTILS_HPP
--- a/test/hotspot/gtest/utilities/test_globalDefinitions.cpp
+++ b/test/hotspot/gtest/utilities/test_globalDefinitions.cpp
@ -240,10 +240,12 @@ TEST(globalDefinitions, format_specifiers) {
  check_format(UINT16_FORMAT_X_0,      (uint16_t)0x0123u, "0x0123");
  check_format(INT32_FORMAT,           123,               "123");
  check_format(INT32_FORMAT_X,         0x123,             "0x123");
  check_format(INT32_FORMAT_X_0,       0x123,             "0x00000123");
  check_format(INT32_FORMAT_W(5),      123,               "  123");
  check_format(INT32_FORMAT_W(-5),     123,               "123  ");
  check_format(UINT32_FORMAT,          123u,              "123");
  check_format(UINT32_FORMAT_X,        0x123u,            "0x123");
  check_format(UINT32_FORMAT_X_0,      0x123u,            "0x00000123");
  check_format(UINT32_FORMAT_W(5),     123u,              "  123");
  check_format(UINT32_FORMAT_W(-5),    123u,              "123  ");
--- a/test/hotspot/gtest/utilities/test_parse_memory_size.cpp
+++ b/test/hotspot/gtest/utilities/test_parse_memory_size.cpp
@ -0,0 +1,159 @@
 /*
 * Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved.
 * Copyright (c) 2022 SAP SE. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 */
 #include "precompiled.hpp"
 #include "jvm_io.h"
 #include "utilities/globalDefinitions.hpp"
 #include "utilities/macros.hpp"
 #include "utilities/ostream.hpp"
 #include "utilities/parseInteger.hpp"
 #include "testutils.hpp"
 #include "unittest.hpp"
 template <typename T> const char* type_name();
 template <> const char* type_name<uint64_t>() { return "uint64_t"; }
 template <> const char* type_name<uint32_t>() { return "uint32_t"; }
 template <> const char* type_name<int64_t>()  { return "int64_t"; }
 template <> const char* type_name<int32_t>()  { return "int32_t"; }
 //#define LOG(s, ...) LOG_HERE(s, __VA_ARGS__)
 #define LOG(s, ...)
 template <typename T>
 static void do_test_valid(T expected_value, const char* pattern) {
  LOG("%s: \"%s\", expect: " UINT64_FORMAT "(" UINT64_FORMAT_X ")", type_name<T>(), pattern,
      (uint64_t)expected_value, (uint64_t)expected_value);
  T value = 17;
  char* end = nullptr;
  stringStream ss;
  ss.print_raw(pattern);
  bool rc = parse_integer(ss.base(), &end, &value);
  ASSERT_TRUE(rc);
  ASSERT_EQ(value, expected_value);
  rc = parse_integer(ss.base(), &value);
  ASSERT_TRUE(rc);
  ASSERT_EQ(value, expected_value);
  // Now test with a trailing pattern.
  // parse_memory_size() should return remainder pointer,
  // parse_argument_memory_size() should flatly refuse to parse this.
  ss.print(":-)");
  rc = parse_integer(ss.base(), &end, &value);
  ASSERT_TRUE(rc);
  ASSERT_EQ(value, expected_value);
  ASSERT_EQ(end, ss.base() + strlen(pattern));
  ASSERT_EQ(strcmp(end, ":-)"), 0);
  rc = parse_integer(ss.base(), &value);
  ASSERT_FALSE(rc);
 }
 template <typename T>
 static void test_valid(T value, bool hex, T scale, const char* unit) {
  if ((std::numeric_limits<T>::max() / scale) >= value) {
    T expected_result = value * scale;
    stringStream ss;
    if (hex) {
      ss.print(UINT64_FORMAT_X "%s", (uint64_t)value, unit);  // e.g. "0xFFFF"
    } else {
      ss.print(UINT64_FORMAT "%s", (uint64_t)value, unit);    // e.g. "65535"
    }
    do_test_valid((T)expected_result, ss.base());
  }
 }
 template <typename T>
 static void test_valid_all_units(T value, bool hex) {
  test_valid(value, hex, (T)1, "");
  test_valid(value, hex, (T)K, "k");
  test_valid(value, hex, (T)K, "K");
  test_valid(value, hex, (T)M, "m");
  test_valid(value, hex, (T)M, "M");
  test_valid(value, hex, (T)G, "g");
  test_valid(value, hex, (T)G, "G");
  if (sizeof(T) > 4) {
    test_valid(value, hex, (T)((uint64_t)G * 1024), "t");
    test_valid(value, hex, (T)((uint64_t)G * 1024), "T");
  }
 }
 template <typename T>
 static void test_valid_all_power_of_twos() {
  for (int hex = 0; hex < 3; hex ++) {
    for (T i = 1; i != 0; i <<= 2) {
      test_valid_all_units(i - 1, hex == 1);
      test_valid_all_units(i, hex == 1);
      test_valid_all_units(i + 1, hex == 1);
    }
  }
 }
 TEST(ParseMemorySize, positives) {
  test_valid_all_power_of_twos<uint64_t>();
  test_valid_all_power_of_twos<uint32_t>();
  test_valid_all_power_of_twos<int64_t>();
  test_valid_all_power_of_twos<int32_t>();
 }
 // Test invalids.
 // Note that parse_argument_memory_size is more restrictive than parse_memory_size, because
 // the latter accepts trailing content.
 static void do_test_invalid_both(const char* pattern) {
  uint64_t value = 4711;
  char* end = nullptr;
  LOG("%s\n", pattern);
  bool rc = parse_integer(pattern, &end, &value);
  EXPECT_FALSE(rc);
  rc = parse_integer(pattern, &value);
  EXPECT_FALSE(rc);
 }
 static void do_test_invalid_for_parse_arguments(const char* pattern) {
  uint64_t value = 4711;
  char* end = nullptr;
  LOG("%s\n", pattern);
  // The first overload parses until unrecognized chars are encountered, then
  // returns pointer to string remainder.
  bool rc = parse_integer(pattern, &end, &value);
  ASSERT_TRUE(rc);
  // The second overload parses everything; unrecognized chars will make it fail.
  rc = parse_integer(pattern, &value);
  ASSERT_FALSE(rc);
 }
 TEST(ParseMemorySize, negatives_both) {
  do_test_invalid_both("");
  do_test_invalid_both("abc");
  do_test_invalid_for_parse_arguments("100 M"); // parse_memory_size would see "100", parse_argument_memory_size would reject it
  do_test_invalid_for_parse_arguments("100X");  // parse_memory_size would see "100", parse_argument_memory_size would reject it
 }