mirror of
https://github.com/openjdk/jdk.git
synced 2025-08-27 14:54:52 +02:00
8327640: Allow NumberFormat strict parsing
Reviewed-by: naoto
This commit is contained in:
parent
2ede14335a
commit
941bee197f
12 changed files with 1569 additions and 103 deletions
|
@ -50,6 +50,7 @@ import java.util.Currency;
|
|||
import java.util.Locale;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
|
||||
import sun.util.locale.provider.LocaleProviderAdapter;
|
||||
import sun.util.locale.provider.ResourceBundleBasedAdapter;
|
||||
|
||||
|
@ -2140,18 +2141,32 @@ public class DecimalFormat extends NumberFormat {
|
|||
}
|
||||
|
||||
/**
|
||||
* Parses text from a string to produce a {@code Number}.
|
||||
* {@inheritDoc NumberFormat}
|
||||
* <p>
|
||||
* The method attempts to parse text starting at the index given by
|
||||
* {@code pos}.
|
||||
* If parsing succeeds, then the index of {@code pos} is updated
|
||||
* to the index after the last character used (parsing does not necessarily
|
||||
* use all characters up to the end of the string), and the parsed
|
||||
* number is returned. The updated {@code pos} can be used to
|
||||
* indicate the starting point for the next call to this method.
|
||||
* If an error occurs, then the index of {@code pos} is not
|
||||
* changed, the error index of {@code pos} is set to the index of
|
||||
* the character where the error occurred, and null is returned.
|
||||
* Parsing can be done in either a strict or lenient manner, by default it is lenient.
|
||||
* <p>
|
||||
* Parsing fails when <b>lenient</b>, if the prefix and/or suffix are non-empty
|
||||
* and cannot be found due to parsing ending early, or the first character
|
||||
* after the prefix cannot be parsed.
|
||||
* <p>
|
||||
* Parsing fails when <b>strict</b>, if in {@code text},
|
||||
* <ul>
|
||||
* <li> The prefix is not found. For example, a {@code Locale.US} currency
|
||||
* format prefix: "{@code $}"
|
||||
* <li> The suffix is not found. For example, a {@code Locale.US} percent
|
||||
* format suffix: "{@code %}"
|
||||
* <li> {@link #isGroupingUsed()} returns {@code true}, and {@link
|
||||
* #getGroupingSize()} is not adhered to
|
||||
* <li> {@link #isGroupingUsed()} returns {@code false}, and the grouping
|
||||
* symbol is found
|
||||
* <li> {@link #isParseIntegerOnly()} returns {@code true}, and the decimal
|
||||
* separator is found
|
||||
* <li> {@link #isGroupingUsed()} returns {@code true} and {@link
|
||||
* #isParseIntegerOnly()} returns {@code false}, and the grouping
|
||||
* symbol occurs after the decimal separator
|
||||
* <li> Any other characters are found, that are not the expected symbols,
|
||||
* and are not digits that occur within the numerical portion
|
||||
* </ul>
|
||||
* <p>
|
||||
* The subclass returned depends on the value of {@link #isParseBigDecimal}
|
||||
* as well as on the string being parsed.
|
||||
|
@ -2371,22 +2386,34 @@ public class DecimalFormat extends NumberFormat {
|
|||
return false;
|
||||
}
|
||||
|
||||
// position will serve as new index when success, otherwise it will
|
||||
// serve as errorIndex when failure
|
||||
position = subparseNumber(text, position, digits, true, isExponent, status);
|
||||
|
||||
// First character after the prefix was un-parseable, should
|
||||
// fail regardless if lenient or strict.
|
||||
if (position == -1) {
|
||||
parsePosition.index = oldStart;
|
||||
parsePosition.errorIndex = oldStart;
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check for suffix
|
||||
// When strict, text should end with the suffix.
|
||||
// When lenient, text only needs to contain the suffix.
|
||||
if (!isExponent) {
|
||||
if (gotPositive) {
|
||||
gotPositive = text.regionMatches(position,positiveSuffix,0,
|
||||
positiveSuffix.length());
|
||||
boolean containsPosSuffix =
|
||||
text.regionMatches(position, positiveSuffix, 0, positiveSuffix.length());
|
||||
boolean endsWithPosSuffix =
|
||||
containsPosSuffix && text.length() == position + positiveSuffix.length();
|
||||
gotPositive = parseStrict ? endsWithPosSuffix : containsPosSuffix;
|
||||
}
|
||||
if (gotNegative) {
|
||||
gotNegative = text.regionMatches(position,negativeSuffix,0,
|
||||
negativeSuffix.length());
|
||||
boolean containsNegSuffix =
|
||||
text.regionMatches(position, negativeSuffix, 0, negativeSuffix.length());
|
||||
boolean endsWithNegSuffix =
|
||||
containsNegSuffix && text.length() == position + negativeSuffix.length();
|
||||
gotNegative = parseStrict ? endsWithNegSuffix : containsNegSuffix;
|
||||
}
|
||||
|
||||
// If both match, take longest
|
||||
|
@ -2404,8 +2431,9 @@ public class DecimalFormat extends NumberFormat {
|
|||
return false;
|
||||
}
|
||||
|
||||
// No failures, thus increment the index by the suffix
|
||||
parsePosition.index = position +
|
||||
(gotPositive ? positiveSuffix.length() : negativeSuffix.length()); // mark success!
|
||||
(gotPositive ? positiveSuffix.length() : negativeSuffix.length());
|
||||
} else {
|
||||
parsePosition.index = position;
|
||||
}
|
||||
|
@ -2420,7 +2448,7 @@ public class DecimalFormat extends NumberFormat {
|
|||
|
||||
/**
|
||||
* Parses a number from the given {@code text}. The text is parsed
|
||||
* beginning at position, until an unparseable character is seen.
|
||||
* beginning at {@code position}, until an unparseable character is seen.
|
||||
*
|
||||
* @param text the string to parse
|
||||
* @param position the position at which parsing begins
|
||||
|
@ -2438,7 +2466,7 @@ public class DecimalFormat extends NumberFormat {
|
|||
boolean isExponent, boolean[] status) {
|
||||
// process digits or Inf, find decimal position
|
||||
status[STATUS_INFINITE] = false;
|
||||
if (!isExponent && text.regionMatches(position,symbols.getInfinity(),0,
|
||||
if (!isExponent && text.regionMatches(position, symbols.getInfinity(), 0,
|
||||
symbols.getInfinity().length())) {
|
||||
position += symbols.getInfinity().length();
|
||||
status[STATUS_INFINITE] = true;
|
||||
|
@ -2467,6 +2495,8 @@ public class DecimalFormat extends NumberFormat {
|
|||
// We have to track digitCount ourselves, because digits.count will
|
||||
// pin when the maximum allowable digits is reached.
|
||||
int digitCount = 0;
|
||||
int prevSeparatorIndex = -groupingSize;
|
||||
int startPos = position; // Rely on startPos as index after prefix
|
||||
|
||||
int backup = -1;
|
||||
for (; position < text.length(); ++position) {
|
||||
|
@ -2488,6 +2518,13 @@ public class DecimalFormat extends NumberFormat {
|
|||
digit = Character.digit(ch, 10);
|
||||
}
|
||||
|
||||
// Enforce the grouping size on the first group
|
||||
if (parseStrict && isGroupingUsed() && position == startPos + groupingSize
|
||||
&& prevSeparatorIndex == -groupingSize && !sawDecimal
|
||||
&& digit >= 0 && digit <= 9) {
|
||||
return position;
|
||||
}
|
||||
|
||||
if (digit == 0) {
|
||||
// Cancel out backup setting (see grouping handler below)
|
||||
backup = -1; // Do this BEFORE continue statement below!!!
|
||||
|
@ -2517,6 +2554,10 @@ public class DecimalFormat extends NumberFormat {
|
|||
// Cancel out backup setting (see grouping handler below)
|
||||
backup = -1;
|
||||
} else if (!isExponent && ch == decimal) {
|
||||
// Check grouping size on decimal separator
|
||||
if (parseStrict && isGroupingViolation(position, prevSeparatorIndex)) {
|
||||
return groupingViolationIndex(position, prevSeparatorIndex);
|
||||
}
|
||||
// If we're only parsing integers, or if we ALREADY saw the
|
||||
// decimal, then don't parse this one.
|
||||
if (isParseIntegerOnly() || sawDecimal) {
|
||||
|
@ -2525,8 +2566,23 @@ public class DecimalFormat extends NumberFormat {
|
|||
digits.decimalAt = digitCount; // Not digits.count!
|
||||
sawDecimal = true;
|
||||
} else if (!isExponent && ch == grouping && isGroupingUsed()) {
|
||||
if (sawDecimal) {
|
||||
break;
|
||||
if (parseStrict) {
|
||||
// text should not start with grouping when strict
|
||||
if (position == startPos) {
|
||||
return startPos;
|
||||
}
|
||||
// when strict, fail if grouping occurs after decimal OR
|
||||
// current group violates grouping size
|
||||
if (sawDecimal || (isGroupingViolation(position, prevSeparatorIndex))) {
|
||||
return groupingViolationIndex(position, prevSeparatorIndex);
|
||||
}
|
||||
prevSeparatorIndex = position; // track previous
|
||||
} else {
|
||||
// when lenient, only exit if grouping occurs after decimal
|
||||
// subsequent grouping symbols are allowed when lenient
|
||||
if (sawDecimal) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
// Ignore grouping characters, if we are using them, but
|
||||
// require that they be followed by a digit. Otherwise
|
||||
|
@ -2554,6 +2610,23 @@ public class DecimalFormat extends NumberFormat {
|
|||
}
|
||||
}
|
||||
|
||||
// (When strict), within the loop we enforce grouping when encountering
|
||||
// decimal/grouping symbols. Once outside loop, we need to check
|
||||
// the final grouping, ex: "1,234". Only check the final grouping
|
||||
// if we have not seen a decimal separator, to prevent a non needed check,
|
||||
// for ex: "1,234.", "1,234.12"
|
||||
if (parseStrict) {
|
||||
if (!sawDecimal && isGroupingViolation(position, prevSeparatorIndex)) {
|
||||
// -1, since position is incremented by one too many when loop is finished
|
||||
// "1,234%" and "1,234" both end with pos = 5, since '%' breaks
|
||||
// the loop before incrementing position. In both cases, check
|
||||
// should be done at pos = 4
|
||||
return groupingViolationIndex(position - 1, prevSeparatorIndex);
|
||||
}
|
||||
}
|
||||
|
||||
// If a grouping symbol is not followed by a digit, it must be
|
||||
// backed up to either exit early or fail depending on leniency
|
||||
if (backup != -1) {
|
||||
position = backup;
|
||||
}
|
||||
|
@ -2575,7 +2648,30 @@ public class DecimalFormat extends NumberFormat {
|
|||
}
|
||||
}
|
||||
return position;
|
||||
}
|
||||
|
||||
// Checks to make sure grouping size is not violated. Used when strict.
|
||||
private boolean isGroupingViolation(int pos, int prevGroupingPos) {
|
||||
assert parseStrict : "Grouping violations should only occur when strict";
|
||||
return isGroupingUsed() && // Only violates if using grouping
|
||||
// Checks if a previous grouping symbol was seen.
|
||||
prevGroupingPos != -groupingSize &&
|
||||
// The check itself, - 1 to account for grouping/decimal symbol
|
||||
pos - 1 != prevGroupingPos + groupingSize;
|
||||
}
|
||||
|
||||
// Calculates the index that violated the grouping size
|
||||
// Violation can be over or under the grouping size
|
||||
// under - Current group has a grouping size of less than the expected
|
||||
// over - Current group has a grouping size of more than the expected
|
||||
private int groupingViolationIndex(int pos, int prevGroupingPos) {
|
||||
// Both examples assume grouping size of 3 and 0 indexed
|
||||
// under ex: "1,23,4". (4) OR "1,,2". (2) When under, violating char is grouping symbol
|
||||
// over ex: "1,2345,6. (5) When over, violating char is the excess digit
|
||||
// This method is only evaluated when a grouping symbol is found, thus
|
||||
// we can take the minimum of either the current pos, or where we expect
|
||||
// the current group to have ended
|
||||
return Math.min(pos, prevGroupingPos + groupingSize + 1);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -2888,6 +2984,30 @@ public class DecimalFormat extends NumberFormat {
|
|||
fastPathCheckNeeded = true;
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc NumberFormat}
|
||||
*
|
||||
* @see #setStrict(boolean)
|
||||
* @see #parse(String, ParsePosition)
|
||||
* @since 23
|
||||
*/
|
||||
@Override
|
||||
public boolean isStrict() {
|
||||
return parseStrict;
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc NumberFormat}
|
||||
*
|
||||
* @see #isStrict()
|
||||
* @see #parse(String, ParsePosition)
|
||||
* @since 23
|
||||
*/
|
||||
@Override
|
||||
public void setStrict(boolean strict) {
|
||||
parseStrict = strict;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns whether the {@link #parse(java.lang.String, java.text.ParsePosition)}
|
||||
* method returns {@code BigDecimal}. The default value is false.
|
||||
|
@ -2991,7 +3111,8 @@ public class DecimalFormat extends NumberFormat {
|
|||
&& maximumFractionDigits == other.maximumFractionDigits
|
||||
&& minimumFractionDigits == other.minimumFractionDigits
|
||||
&& roundingMode == other.roundingMode
|
||||
&& symbols.equals(other.symbols);
|
||||
&& symbols.equals(other.symbols)
|
||||
&& parseStrict == other.parseStrict;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -4176,6 +4297,15 @@ public class DecimalFormat extends NumberFormat {
|
|||
*/
|
||||
private boolean useExponentialNotation; // Newly persistent in the Java 2 platform v.1.2
|
||||
|
||||
/**
|
||||
* True if this {@code DecimalFormat} will parse numbers with strict
|
||||
* leniency.
|
||||
*
|
||||
* @serial
|
||||
* @since 23
|
||||
*/
|
||||
private boolean parseStrict = false;
|
||||
|
||||
/**
|
||||
* FieldPositions describing the positive prefix String. This is
|
||||
* lazily created. Use {@code getPositivePrefixFieldPositions}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue