8065554: MatchResult should provide values of named-capturing groups

Reviewed-by: smarks
This commit is contained in:
Raffaello Giulietti 2022-09-29 09:16:21 +00:00
parent 1decdcee71
commit ce85cac947
4 changed files with 619 additions and 52 deletions

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2003, 2022, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -25,6 +25,9 @@
package java.util.regex;
import java.util.Map;
import java.util.Objects;
/**
* The result of a match operation.
*
@ -33,6 +36,15 @@ package java.util.regex;
* groups and group boundaries can be seen but not modified through
* a {@code MatchResult}.
*
* @implNote
* Support for named groups is implemented by the default methods
* {@link #start(String)}, {@link #end(String)} and {@link #group(String)}.
* They all make use of the map returned by {@link #namedGroups()}, whose
* default implementation simply throws {@link UnsupportedOperationException}.
* It is thus sufficient to override {@link #namedGroups()} for these methods
* to work. However, overriding them directly might be preferable for
* performance or other reasons.
*
* @author Michael McCloskey
* @see Matcher
* @since 1.5
@ -48,7 +60,7 @@ public interface MatchResult {
* If no match has yet been attempted,
* or if the previous match operation failed
*/
public int start();
int start();
/**
* Returns the start index of the subsequence captured by the given group
@ -74,7 +86,38 @@ public interface MatchResult {
* If there is no capturing group in the pattern
* with the given index
*/
public int start(int group);
int start(int group);
/**
* Returns the start index of the subsequence captured by the given
* <a href="Pattern.html#groupname">named-capturing group</a> during the
* previous match operation.
*
* @param name
* The name of a named-capturing group in this matcher's pattern
*
* @return The index of the first character captured by the group,
* or {@code -1} if the match was successful but the group
* itself did not match anything
*
* @throws IllegalStateException
* If no match has yet been attempted,
* or if the previous match operation failed
*
* @throws IllegalArgumentException
* If there is no capturing group in the pattern
* with the given name
*
* @implSpec
* The default implementation of this method invokes {@link #namedGroups()}
* to obtain the group number from the {@code name} argument, and uses it
* as argument to an invocation of {@link #start(int)}.
*
* @since 20
*/
default int start(String name) {
return start(groupNumber(name));
}
/**
* Returns the offset after the last character matched.
@ -85,7 +128,7 @@ public interface MatchResult {
* If no match has yet been attempted,
* or if the previous match operation failed
*/
public int end();
int end();
/**
* Returns the offset after the last character of the subsequence
@ -111,7 +154,38 @@ public interface MatchResult {
* If there is no capturing group in the pattern
* with the given index
*/
public int end(int group);
int end(int group);
/**
* Returns the offset after the last character of the subsequence
* captured by the given <a href="Pattern.html#groupname">named-capturing
* group</a> during the previous match operation.
*
* @param name
* The name of a named-capturing group in this matcher's pattern
*
* @return The offset after the last character captured by the group,
* or {@code -1} if the match was successful
* but the group itself did not match anything
*
* @throws IllegalStateException
* If no match has yet been attempted,
* or if the previous match operation failed
*
* @throws IllegalArgumentException
* If there is no capturing group in the pattern
* with the given name
*
* @implSpec
* The default implementation of this method invokes {@link #namedGroups()}
* to obtain the group number from the {@code name} argument, and uses it
* as argument to an invocation of {@link #end(int)}.
*
* @since 20
*/
default int end(String name) {
return end(groupNumber(name));
}
/**
* Returns the input subsequence matched by the previous match.
@ -132,7 +206,7 @@ public interface MatchResult {
* If no match has yet been attempted,
* or if the previous match operation failed
*/
public String group();
String group();
/**
* Returns the input subsequence captured by the given group during the
@ -170,7 +244,44 @@ public interface MatchResult {
* If there is no capturing group in the pattern
* with the given index
*/
public String group(int group);
String group(int group);
/**
* Returns the input subsequence captured by the given
* <a href="Pattern.html#groupname">named-capturing group</a> during the
* previous match operation.
*
* <p> If the match was successful but the group specified failed to match
* any part of the input sequence, then {@code null} is returned. Note
* that some groups, for example {@code (a*)}, match the empty string.
* This method will return the empty string when such a group successfully
* matches the empty string in the input. </p>
*
* @param name
* The name of a named-capturing group in this matcher's pattern
*
* @return The (possibly empty) subsequence captured by the named group
* during the previous match, or {@code null} if the group
* failed to match part of the input
*
* @throws IllegalStateException
* If no match has yet been attempted,
* or if the previous match operation failed
*
* @throws IllegalArgumentException
* If there is no capturing group in the pattern
* with the given name
*
* @implSpec
* The default implementation of this method invokes {@link #namedGroups()}
* to obtain the group number from the {@code name} argument, and uses it
* as argument to an invocation of {@link #group(int)}.
*
* @since 20
*/
default String group(String name) {
return group(groupNumber(name));
}
/**
* Returns the number of capturing groups in this match result's pattern.
@ -184,6 +295,55 @@ public interface MatchResult {
*
* @return The number of capturing groups in this matcher's pattern
*/
public int groupCount();
int groupCount();
/**
* Returns an unmodifiable map from capturing group names to group numbers.
* If there are no named groups, returns an empty map.
*
* @return an unmodifiable map from capturing group names to group numbers
*
* @throws UnsupportedOperationException if the implementation does not
* support named groups.
*
* @implSpec The default implementation of this method always throws
* {@link UnsupportedOperationException}
*
* @apiNote
* This method must be overridden by an implementation that supports
* named groups.
*
* @since 20
*/
default Map<String,Integer> namedGroups() {
throw new UnsupportedOperationException("namedGroups()");
}
private int groupNumber(String name) {
Objects.requireNonNull(name, "Group name");
Integer number = namedGroups().get(name);
if (number != null) {
return number;
}
throw new IllegalArgumentException("No group with name <" + name + ">");
}
/**
* Returns whether {@code this} contains a valid match from
* a previous match or find operation.
*
* @return whether {@code this} contains a valid match
*
* @throws UnsupportedOperationException if the implementation cannot report
* whether it has a match
*
* @implSpec The default implementation of this method always throws
* {@link UnsupportedOperationException}
*
* @since 20
*/
default boolean hasMatch() {
throw new UnsupportedOperationException("hasMatch()");
}
}

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1999, 2022, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -27,6 +27,7 @@ package java.util.regex;
import java.util.ConcurrentModificationException;
import java.util.Iterator;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.Objects;
import java.util.Spliterator;
@ -229,6 +230,8 @@ public final class Matcher implements MatchResult {
*/
int modCount;
private Map<String, Integer> namedGroups;
/**
* No default constructor.
*/
@ -278,7 +281,8 @@ public final class Matcher implements MatchResult {
this.last,
groupCount(),
this.groups.clone(),
text);
text,
namedGroups());
}
private static class ImmutableMatchResult implements MatchResult {
@ -287,15 +291,18 @@ public final class Matcher implements MatchResult {
private final int[] groups;
private final int groupCount;
private final String text;
private final Map<String, Integer> namedGroups;
ImmutableMatchResult(int first, int last, int groupCount,
int[] groups, String text)
int[] groups, String text,
Map<String, Integer> namedGroups)
{
this.first = first;
this.last = last;
this.groupCount = groupCount;
this.groups = groups;
this.text = text;
this.namedGroups = namedGroups;
}
@Override
@ -307,8 +314,7 @@ public final class Matcher implements MatchResult {
@Override
public int start(int group) {
checkMatch();
if (group < 0 || group > groupCount)
throw new IndexOutOfBoundsException("No group " + group);
checkGroup(group);
return groups[group * 2];
}
@ -321,8 +327,7 @@ public final class Matcher implements MatchResult {
@Override
public int end(int group) {
checkMatch();
if (group < 0 || group > groupCount)
throw new IndexOutOfBoundsException("No group " + group);
checkGroup(group);
return groups[group * 2 + 1];
}
@ -340,18 +345,33 @@ public final class Matcher implements MatchResult {
@Override
public String group(int group) {
checkMatch();
if (group < 0 || group > groupCount)
throw new IndexOutOfBoundsException("No group " + group);
if ((groups[group*2] == -1) || (groups[group*2+1] == -1))
checkGroup(group);
if ((groups[group * 2] == -1) || (groups[group * 2 + 1] == -1))
return null;
return text.subSequence(groups[group * 2], groups[group * 2 + 1]).toString();
}
@Override
public Map<String, Integer> namedGroups() {
return namedGroups;
}
@Override
public boolean hasMatch() {
return first >= 0;
}
private void checkGroup(int group) {
if (group < 0 || group > groupCount)
throw new IndexOutOfBoundsException("No group " + group);
}
private void checkMatch() {
if (first < 0)
if (!hasMatch())
throw new IllegalStateException("No match found");
}
}
/**
@ -446,8 +466,7 @@ public final class Matcher implements MatchResult {
* or if the previous match operation failed
*/
public int start() {
if (first < 0)
throw new IllegalStateException("No match available");
checkMatch();
return first;
}
@ -476,10 +495,8 @@ public final class Matcher implements MatchResult {
* with the given index
*/
public int start(int group) {
if (first < 0)
throw new IllegalStateException("No match available");
if (group < 0 || group > groupCount())
throw new IndexOutOfBoundsException("No group " + group);
checkMatch();
checkGroup(group);
return groups[group * 2];
}
@ -518,8 +535,7 @@ public final class Matcher implements MatchResult {
* or if the previous match operation failed
*/
public int end() {
if (first < 0)
throw new IllegalStateException("No match available");
checkMatch();
return last;
}
@ -548,10 +564,8 @@ public final class Matcher implements MatchResult {
* with the given index
*/
public int end(int group) {
if (first < 0)
throw new IllegalStateException("No match available");
if (group < 0 || group > groupCount())
throw new IndexOutOfBoundsException("No group " + group);
checkMatch();
checkGroup(group);
return groups[group * 2 + 1];
}
@ -640,10 +654,8 @@ public final class Matcher implements MatchResult {
* with the given index
*/
public String group(int group) {
if (first < 0)
throw new IllegalStateException("No match found");
if (group < 0 || group > groupCount())
throw new IndexOutOfBoundsException("No group " + group);
checkMatch();
checkGroup(group);
if ((groups[group*2] == -1) || (groups[group*2+1] == -1))
return null;
return getSubSequence(groups[group * 2], groups[group * 2 + 1]).toString();
@ -900,9 +912,7 @@ public final class Matcher implements MatchResult {
* that does not exist in the pattern
*/
public Matcher appendReplacement(StringBuffer sb, String replacement) {
// If no match, return error
if (first < 0)
throw new IllegalStateException("No match available");
checkMatch();
StringBuilder result = new StringBuilder();
appendExpandedReplacement(replacement, result);
// Append the intervening text
@ -991,8 +1001,7 @@ public final class Matcher implements MatchResult {
*/
public Matcher appendReplacement(StringBuilder sb, String replacement) {
// If no match, return error
if (first < 0)
throw new IllegalStateException("No match available");
checkMatch();
StringBuilder result = new StringBuilder();
appendExpandedReplacement(replacement, result);
// Append the intervening text
@ -1055,10 +1064,10 @@ public final class Matcher implements MatchResult {
throw new IllegalArgumentException(
"capturing group name {" + gname +
"} starts with digit character");
if (!parentPattern.namedGroups().containsKey(gname))
if (!namedGroups().containsKey(gname))
throw new IllegalArgumentException(
"No group with name {" + gname + "}");
refNum = parentPattern.namedGroups().get(gname);
refNum = namedGroups().get(gname);
cursor++;
} else {
// The first number is always a group
@ -1796,10 +1805,47 @@ public final class Matcher implements MatchResult {
*/
int getMatchedGroupIndex(String name) {
Objects.requireNonNull(name, "Group name");
if (first < 0)
throw new IllegalStateException("No match found");
if (!parentPattern.namedGroups().containsKey(name))
checkMatch();
if (!namedGroups().containsKey(name))
throw new IllegalArgumentException("No group with name <" + name + ">");
return parentPattern.namedGroups().get(name);
return namedGroups().get(name);
}
private void checkGroup(int group) {
if (group < 0 || group > groupCount())
throw new IndexOutOfBoundsException("No group " + group);
}
private void checkMatch() {
if (!hasMatch())
throw new IllegalStateException("No match found");
}
/**
* {@inheritDoc}
*
* @return {@inheritDoc}
*
* @since {@inheritDoc}
*/
@Override
public Map<String, Integer> namedGroups() {
if (namedGroups == null) {
return namedGroups = parentPattern.namedGroups();
}
return namedGroups;
}
/**
* {@inheritDoc}
*
* @return {@inheritDoc}
*
* @since {@inheritDoc}
*/
@Override
public boolean hasMatch() {
return first >= 0;
}
}

View file

@ -1843,7 +1843,7 @@ loop: for(int x=0, offset=0; x<nCodePoints; x++, offset+=len) {
topClosureNodes = null;
}
Map<String, Integer> namedGroups() {
private Map<String, Integer> namedGroupsMap() {
Map<String, Integer> groups = namedGroups;
if (groups == null) {
namedGroups = groups = new HashMap<>(2);
@ -1851,6 +1851,18 @@ loop: for(int x=0, offset=0; x<nCodePoints; x++, offset+=len) {
return groups;
}
/**
* Returns an unmodifiable map from capturing group names to group numbers.
* If there are no named groups, returns an empty map.
*
* @return an unmodifiable map from capturing group names to group numbers
*
* @since 20
*/
public Map<String, Integer> namedGroups() {
return Map.copyOf(namedGroupsMap());
}
/**
* Used to accumulate information about a subtree of the object graph
* so that optimizations can be applied to the subtree.
@ -2554,14 +2566,14 @@ loop: for(int x=0, offset=0; x<nCodePoints; x++, offset+=len) {
if (read() != '<')
throw error("\\k is not followed by '<' for named capturing group");
String name = groupname(read());
if (!namedGroups().containsKey(name))
if (!namedGroupsMap().containsKey(name))
throw error("named capturing group <" + name + "> does not exist");
if (create) {
hasGroupRef = true;
if (has(CASE_INSENSITIVE))
root = new CIBackRef(namedGroups().get(name), has(UNICODE_CASE));
root = new CIBackRef(namedGroupsMap().get(name), has(UNICODE_CASE));
else
root = new BackRef(namedGroups().get(name));
root = new BackRef(namedGroupsMap().get(name));
}
return -1;
case 'l':
@ -3008,13 +3020,13 @@ loop: for(int x=0, offset=0; x<nCodePoints; x++, offset+=len) {
if (ch != '=' && ch != '!') {
// named captured group
String name = groupname(ch);
if (namedGroups().containsKey(name))
if (namedGroupsMap().containsKey(name))
throw error("Named capturing group <" + name
+ "> is already defined");
capturingGroup = true;
head = createGroup(false);
tail = root;
namedGroups().put(name, capturingGroupCount - 1);
namedGroupsMap().put(name, capturingGroupCount - 1);
head.next = expr(tail);
break;
}