8327640: Allow NumberFormat strict parsing

Reviewed-by: naoto
2025-08-27 14:54:52 +02:00 · 2024-04-16 16:18:09 +00:00 · 2024-04-16 16:18:09 +00:00 · 941bee197f
commit 941bee197f
parent 2ede14335a
12 changed files with 1569 additions and 103 deletions
--- a/src/java.base/share/classes/java/text/DecimalFormat.java
+++ b/src/java.base/share/classes/java/text/DecimalFormat.java
@ -50,6 +50,7 @@ import java.util.Currency;
 import java.util.Locale;
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.concurrent.atomic.AtomicLong;
+
 import sun.util.locale.provider.LocaleProviderAdapter;
 import sun.util.locale.provider.ResourceBundleBasedAdapter;

@ -2140,18 +2141,32 @@ public class DecimalFormat extends NumberFormat {
    }

    /**
-     * Parses text from a string to produce a {@code Number}.
+     * {@inheritDoc NumberFormat}
     * <p>
-     * The method attempts to parse text starting at the index given by
-     * {@code pos}.
-     * If parsing succeeds, then the index of {@code pos} is updated
-     * to the index after the last character used (parsing does not necessarily
-     * use all characters up to the end of the string), and the parsed
-     * number is returned. The updated {@code pos} can be used to
-     * indicate the starting point for the next call to this method.
-     * If an error occurs, then the index of {@code pos} is not
-     * changed, the error index of {@code pos} is set to the index of
-     * the character where the error occurred, and null is returned.
+     * Parsing can be done in either a strict or lenient manner, by default it is lenient.
+     * <p>
+     * Parsing fails when <b>lenient</b>, if the prefix and/or suffix are non-empty
+     * and cannot be found due to parsing ending early, or the first character
+     * after the prefix cannot be parsed.
+     * <p>
+     * Parsing fails when <b>strict</b>, if in {@code text},
+     * <ul>
+     *   <li> The prefix is not found. For example, a {@code Locale.US} currency
+     *   format prefix: "{@code $}"
+     *   <li> The suffix is not found. For example, a {@code Locale.US} percent
+     *   format suffix: "{@code %}"
+     *   <li> {@link #isGroupingUsed()} returns {@code true}, and {@link
+     *   #getGroupingSize()} is not adhered to
+     *   <li> {@link #isGroupingUsed()} returns {@code false}, and the grouping
+     *   symbol is found
+     *   <li> {@link #isParseIntegerOnly()} returns {@code true}, and the decimal
+     *   separator is found
+     *   <li> {@link #isGroupingUsed()} returns {@code true} and {@link
+     *   #isParseIntegerOnly()} returns {@code false}, and the grouping
+     *   symbol occurs after the decimal separator
+     *   <li> Any other characters are found, that are not the expected symbols,
+     *   and are not digits that occur within the numerical portion
+     * </ul>
     * <p>
     * The subclass returned depends on the value of {@link #isParseBigDecimal}
     * as well as on the string being parsed.
@ -2371,22 +2386,34 @@ public class DecimalFormat extends NumberFormat {
            return false;
        }

+        // position will serve as new index when success, otherwise it will
+        // serve as errorIndex when failure
        position = subparseNumber(text, position, digits, true, isExponent, status);
+
+        // First character after the prefix was un-parseable, should
+        // fail regardless if lenient or strict.
        if (position == -1) {
            parsePosition.index = oldStart;
            parsePosition.errorIndex = oldStart;
            return false;
        }

-        // Check for suffix
+        // When strict, text should end with the suffix.
+        // When lenient, text only needs to contain the suffix.
        if (!isExponent) {
            if (gotPositive) {
-                gotPositive = text.regionMatches(position,positiveSuffix,0,
-                        positiveSuffix.length());
+                boolean containsPosSuffix =
+                        text.regionMatches(position, positiveSuffix, 0, positiveSuffix.length());
+                boolean endsWithPosSuffix =
+                        containsPosSuffix && text.length() == position + positiveSuffix.length();
+                gotPositive = parseStrict ? endsWithPosSuffix : containsPosSuffix;
            }
            if (gotNegative) {
-                gotNegative = text.regionMatches(position,negativeSuffix,0,
-                        negativeSuffix.length());
+                boolean containsNegSuffix =
+                        text.regionMatches(position, negativeSuffix, 0, negativeSuffix.length());
+                boolean endsWithNegSuffix =
+                        containsNegSuffix && text.length() == position + negativeSuffix.length();
+                gotNegative = parseStrict ? endsWithNegSuffix : containsNegSuffix;
            }

            // If both match, take longest
@ -2404,8 +2431,9 @@ public class DecimalFormat extends NumberFormat {
                return false;
            }

+            // No failures, thus increment the index by the suffix
            parsePosition.index = position +
-                    (gotPositive ? positiveSuffix.length() : negativeSuffix.length()); // mark success!
+                    (gotPositive ? positiveSuffix.length() : negativeSuffix.length());
        } else {
            parsePosition.index = position;
        }
@ -2420,7 +2448,7 @@ public class DecimalFormat extends NumberFormat {

    /**
     * Parses a number from the given {@code text}. The text is parsed
-     * beginning at position, until an unparseable character is seen.
+     * beginning at {@code position}, until an unparseable character is seen.
     *
     * @param text the string to parse
     * @param position the position at which parsing begins
@ -2438,7 +2466,7 @@ public class DecimalFormat extends NumberFormat {
                       boolean isExponent, boolean[] status) {
        // process digits or Inf, find decimal position
        status[STATUS_INFINITE] = false;
-        if (!isExponent && text.regionMatches(position,symbols.getInfinity(),0,
+        if (!isExponent && text.regionMatches(position, symbols.getInfinity(), 0,
                symbols.getInfinity().length())) {
            position += symbols.getInfinity().length();
            status[STATUS_INFINITE] = true;
@ -2467,6 +2495,8 @@ public class DecimalFormat extends NumberFormat {
            // We have to track digitCount ourselves, because digits.count will
            // pin when the maximum allowable digits is reached.
            int digitCount = 0;
+            int prevSeparatorIndex = -groupingSize;
+            int startPos = position; // Rely on startPos as index after prefix

            int backup = -1;
            for (; position < text.length(); ++position) {
@ -2488,6 +2518,13 @@ public class DecimalFormat extends NumberFormat {
                    digit = Character.digit(ch, 10);
                }

+                // Enforce the grouping size on the first group
+                if (parseStrict && isGroupingUsed() && position == startPos + groupingSize
+                        && prevSeparatorIndex == -groupingSize && !sawDecimal
+                        && digit >= 0 && digit <= 9) {
+                    return position;
+                }
+
                if (digit == 0) {
                    // Cancel out backup setting (see grouping handler below)
                    backup = -1; // Do this BEFORE continue statement below!!!
@ -2517,6 +2554,10 @@ public class DecimalFormat extends NumberFormat {
                    // Cancel out backup setting (see grouping handler below)
                    backup = -1;
                } else if (!isExponent && ch == decimal) {
+                    // Check grouping size on decimal separator
+                    if (parseStrict && isGroupingViolation(position, prevSeparatorIndex)) {
+                        return groupingViolationIndex(position, prevSeparatorIndex);
+                    }
                    // If we're only parsing integers, or if we ALREADY saw the
                    // decimal, then don't parse this one.
                    if (isParseIntegerOnly() || sawDecimal) {
@ -2525,8 +2566,23 @@ public class DecimalFormat extends NumberFormat {
                    digits.decimalAt = digitCount; // Not digits.count!
                    sawDecimal = true;
                } else if (!isExponent && ch == grouping && isGroupingUsed()) {
-                    if (sawDecimal) {
-                        break;
+                    if (parseStrict) {
+                        // text should not start with grouping when strict
+                        if (position == startPos) {
+                            return startPos;
+                        }
+                        // when strict, fail if grouping occurs after decimal OR
+                        // current group violates grouping size
+                        if (sawDecimal || (isGroupingViolation(position, prevSeparatorIndex))) {
+                            return groupingViolationIndex(position, prevSeparatorIndex);
+                        }
+                        prevSeparatorIndex = position; // track previous
+                    } else {
+                        // when lenient, only exit if grouping occurs after decimal
+                        // subsequent grouping symbols are allowed when lenient
+                        if (sawDecimal) {
+                            break;
+                        }
                    }
                    // Ignore grouping characters, if we are using them, but
                    // require that they be followed by a digit.  Otherwise
@ -2554,6 +2610,23 @@ public class DecimalFormat extends NumberFormat {
                }
            }

+            // (When strict), within the loop we enforce grouping when encountering
+            // decimal/grouping symbols. Once outside loop, we need to check
+            // the final grouping, ex: "1,234". Only check the final grouping
+            // if we have not seen a decimal separator, to prevent a non needed check,
+            // for ex: "1,234.", "1,234.12"
+            if (parseStrict) {
+                if (!sawDecimal && isGroupingViolation(position, prevSeparatorIndex)) {
+                    // -1, since position is incremented by one too many when loop is finished
+                    // "1,234%" and "1,234" both end with pos = 5, since '%' breaks
+                    // the loop before incrementing position. In both cases, check
+                    // should be done at pos = 4
+                    return groupingViolationIndex(position - 1, prevSeparatorIndex);
+                }
+            }
+
+            // If a grouping symbol is not followed by a digit, it must be
+            // backed up to either exit early or fail depending on leniency
            if (backup != -1) {
                position = backup;
            }
@ -2575,7 +2648,30 @@ public class DecimalFormat extends NumberFormat {
            }
        }
        return position;
+    }

+    // Checks to make sure grouping size is not violated. Used when strict.
+    private boolean isGroupingViolation(int pos, int prevGroupingPos) {
+        assert parseStrict : "Grouping violations should only occur when strict";
+        return isGroupingUsed() && // Only violates if using grouping
+                // Checks if a previous grouping symbol was seen.
+                prevGroupingPos != -groupingSize &&
+                // The check itself, - 1 to account for grouping/decimal symbol
+                pos - 1 != prevGroupingPos + groupingSize;
+    }
+
+    // Calculates the index that violated the grouping size
+    // Violation can be over or under the grouping size
+    // under - Current group has a grouping size of less than the expected
+    // over - Current group has a grouping size of more than the expected
+    private int groupingViolationIndex(int pos, int prevGroupingPos) {
+        // Both examples assume grouping size of 3 and 0 indexed
+        // under ex: "1,23,4". (4) OR "1,,2". (2) When under, violating char is grouping symbol
+        // over ex: "1,2345,6. (5) When over, violating char is the excess digit
+        // This method is only evaluated when a grouping symbol is found, thus
+        // we can take the minimum of either the current pos, or where we expect
+        // the current group to have ended
+        return Math.min(pos, prevGroupingPos + groupingSize + 1);
    }

    /**
@ -2888,6 +2984,30 @@ public class DecimalFormat extends NumberFormat {
        fastPathCheckNeeded = true;
    }

+    /**
+     * {@inheritDoc NumberFormat}
+     *
+     * @see #setStrict(boolean)
+     * @see #parse(String, ParsePosition)
+     * @since 23
+     */
+    @Override
+    public boolean isStrict() {
+        return parseStrict;
+    }
+
+    /**
+     * {@inheritDoc NumberFormat}
+     *
+     * @see #isStrict()
+     * @see #parse(String, ParsePosition)
+     * @since 23
+     */
+    @Override
+    public void setStrict(boolean strict) {
+        parseStrict = strict;
+    }
+
    /**
     * Returns whether the {@link #parse(java.lang.String, java.text.ParsePosition)}
     * method returns {@code BigDecimal}. The default value is false.
@ -2991,7 +3111,8 @@ public class DecimalFormat extends NumberFormat {
            && maximumFractionDigits == other.maximumFractionDigits
            && minimumFractionDigits == other.minimumFractionDigits
            && roundingMode == other.roundingMode
-            && symbols.equals(other.symbols);
+            && symbols.equals(other.symbols)
+            && parseStrict == other.parseStrict;
    }

    /**
@ -4176,6 +4297,15 @@ public class DecimalFormat extends NumberFormat {
     */
    private boolean useExponentialNotation;  // Newly persistent in the Java 2 platform v.1.2

+    /**
+     * True if this {@code DecimalFormat} will parse numbers with strict
+     * leniency.
+     *
+     * @serial
+     * @since 23
+     */
+    private boolean parseStrict = false;
+
    /**
     * FieldPositions describing the positive prefix String. This is
     * lazily created. Use {@code getPositivePrefixFieldPositions}