8300207: Add a pre-check for the number of canonical equivalent permutations in j.u.r.Pattern

Reviewed-by: smarks
This commit is contained in:
Raffaello Giulietti 2023-01-22 12:45:52 +00:00
parent 7ced08d4ec
commit 030b071db1

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 1999, 2022, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1999, 2023, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -916,7 +916,8 @@ public final class Pattern
* <p> There is no embedded flag character for enabling canonical
* equivalence.
*
* <p> Specifying this flag may impose a performance penalty. </p>
* <p> Specifying this flag may impose a performance penalty
* and a moderate risk of memory exhaustion.</p>
*/
public static final int CANON_EQ = 0x80;
@ -1095,6 +1096,9 @@ public final class Pattern
* Compiles the given regular expression into a pattern with the given
* flags.
*
* <p>Setting {@link #CANON_EQ} among the flags may impose a moderate risk
* of memory exhaustion.</p>
*
* @param regex
* The expression to be compiled
*
@ -1112,6 +1116,10 @@ public final class Pattern
*
* @throws PatternSyntaxException
* If the expression's syntax is invalid
*
* @implNote If {@link #CANON_EQ} is specified and the number of combining
* marks for any character is too large, an {@link java.lang.OutOfMemoryError}
* is thrown.
*/
public static Pattern compile(String regex, int flags) {
return new Pattern(regex, flags);
@ -1145,6 +1153,13 @@ public final class Pattern
* The character sequence to be matched
*
* @return A new matcher for this pattern
*
* @implNote When a {@link Pattern} is deserialized, compilation is deferred
* until a direct or indirect invocation of this method. Thus, if a
* deserialized pattern has {@link #CANON_EQ} among its flags and the number
* of combining marks for any character is too large, an
* {@link java.lang.OutOfMemoryError} is thrown,
* as in {@link #compile(String, int)}.
*/
public Matcher matcher(CharSequence input) {
if (!compiled) {
@ -1618,14 +1633,30 @@ public final class Pattern
return result;
}
int length = 1;
/*
* Since
* 12! = 479_001_600 < Integer.MAX_VALUE
* 13! = 6_227_020_800 > Integer.MAX_VALUE
* the computation of n! using int arithmetic will overflow iff
* n < 0 or n > 12
*
* Here, nCodePoints! is computed in the next for-loop below.
* As nCodePoints >= 0, the computation overflows iff nCodePoints > 12.
* In that case, throw OOME to simulate length > Integer.MAX_VALUE.
*/
int nCodePoints = countCodePoints(input);
for(int x=1; x<nCodePoints; x++)
length = length * (x+1);
if (nCodePoints > 12) {
throw new OutOfMemoryError("Pattern too complex");
}
/* Compute length = nCodePoints! */
int length = 1;
for (int x = 2; x <= nCodePoints; ++x) {
length *= x;
}
String[] temp = new String[length];
int combClass[] = new int[nCodePoints];
int[] combClass = new int[nCodePoints];
for(int x=0, i=0; x<nCodePoints; x++) {
int c = Character.codePointAt(input, i);
combClass[x] = getClass(c);