8303018: Unicode Emoji Properties

Reviewed-by: prr, erikj, rriggs
This commit is contained in:
Naoto Sato 2023-03-20 20:20:19 +00:00
parent bc0ed730f2
commit f593a6b52e
19 changed files with 643 additions and 278 deletions

View file

@ -10781,6 +10781,113 @@ class Character implements java.io.Serializable, Comparable<Character>, Constabl
return CharacterData.of(codePoint).isIdentifierIgnorable(codePoint);
}
/**
* Determines if the specified character (Unicode code point) is an Emoji.
* <p>
* A character is considered to be an Emoji if and only if it has the {@code Emoji}
* property, defined in
* <a href="https://unicode.org/reports/tr51/#Emoji_Properties_and_Data_Files">
* Unicode Emoji (Technical Standard #51)</a>.
*
* @param codePoint the character (Unicode code point) to be tested.
* @return {@code true} if the character is an Emoji;
* {@code false} otherwise.
* @since 21
*/
public static boolean isEmoji(int codePoint) {
return CharacterData.of(codePoint).isEmoji(codePoint);
}
/**
* Determines if the specified character (Unicode code point) has the
* Emoji Presentation property by default.
* <p>
* A character is considered to have the Emoji Presentation property if and
* only if it has the {@code Emoji_Presentation} property, defined in
* <a href="https://unicode.org/reports/tr51/#Emoji_Properties_and_Data_Files">
* Unicode Emoji (Technical Standard #51)</a>.
*
* @param codePoint the character (Unicode code point) to be tested.
* @return {@code true} if the character has the Emoji Presentation
* property; {@code false} otherwise.
* @since 21
*/
public static boolean isEmojiPresentation(int codePoint) {
return CharacterData.of(codePoint).isEmojiPresentation(codePoint);
}
/**
* Determines if the specified character (Unicode code point) is an
* Emoji Modifier.
* <p>
* A character is considered to be an Emoji Modifier if and only if it has
* the {@code Emoji_Modifier} property, defined in
* <a href="https://unicode.org/reports/tr51/#Emoji_Properties_and_Data_Files">
* Unicode Emoji (Technical Standard #51)</a>.
*
* @param codePoint the character (Unicode code point) to be tested.
* @return {@code true} if the character is an Emoji Modifier;
* {@code false} otherwise.
* @since 21
*/
public static boolean isEmojiModifier(int codePoint) {
return CharacterData.of(codePoint).isEmojiModifier(codePoint);
}
/**
* Determines if the specified character (Unicode code point) is an
* Emoji Modifier Base.
* <p>
* A character is considered to be an Emoji Modifier Base if and only if it has
* the {@code Emoji_Modifier_Base} property, defined in
* <a href="https://unicode.org/reports/tr51/#Emoji_Properties_and_Data_Files">
* Unicode Emoji (Technical Standard #51)</a>.
*
* @param codePoint the character (Unicode code point) to be tested.
* @return {@code true} if the character is an Emoji Modifier Base;
* {@code false} otherwise.
* @since 21
*/
public static boolean isEmojiModifierBase(int codePoint) {
return CharacterData.of(codePoint).isEmojiModifierBase(codePoint);
}
/**
* Determines if the specified character (Unicode code point) is an
* Emoji Component.
* <p>
* A character is considered to be an Emoji Component if and only if it has
* the {@code Emoji_Component} property, defined in
* <a href="https://unicode.org/reports/tr51/#Emoji_Properties_and_Data_Files">
* Unicode Emoji (Technical Standard #51)</a>.
*
* @param codePoint the character (Unicode code point) to be tested.
* @return {@code true} if the character is an Emoji Component;
* {@code false} otherwise.
* @since 21
*/
public static boolean isEmojiComponent(int codePoint) {
return CharacterData.of(codePoint).isEmojiComponent(codePoint);
}
/**
* Determines if the specified character (Unicode code point) is
* an Extended Pictographic.
* <p>
* A character is considered to be an Extended Pictographic if and only if it has
* the {@code Extended_Pictographic} property, defined in
* <a href="https://unicode.org/reports/tr51/#Emoji_Properties_and_Data_Files">
* Unicode Emoji (Technical Standard #51)</a>.
*
* @param codePoint the character (Unicode code point) to be tested.
* @return {@code true} if the character is an Extended Pictographic;
* {@code false} otherwise.
* @since 21
*/
public static boolean isExtendedPictographic(int codePoint) {
return CharacterData.of(codePoint).isExtendedPictographic(codePoint);
}
/**
* Converts the character argument to lowercase using case
* mapping information from the UnicodeData file.

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2006, 2020, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2006, 2023, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -38,6 +38,12 @@ abstract class CharacterData {
abstract boolean isUnicodeIdentifierStart(int ch);
abstract boolean isUnicodeIdentifierPart(int ch);
abstract boolean isIdentifierIgnorable(int ch);
abstract boolean isEmoji(int ch);
abstract boolean isEmojiPresentation(int ch);
abstract boolean isEmojiModifier(int ch);
abstract boolean isEmojiModifierBase(int ch);
abstract boolean isEmojiComponent(int ch);
abstract boolean isExtendedPictographic(int ch);
abstract int toLowerCase(int ch);
abstract int toUpperCase(int ch);
abstract int toTitleCase(int ch);

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2003, 2021, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2003, 2023, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -120,6 +120,30 @@ class CharacterData00 extends CharacterData {
return ((props & $$maskIdentifierInfo) == $$valueIgnorable);
}
boolean isEmoji(int ch) {
return (getPropertiesEx(ch) & $$maskEmoji) != 0;
}
boolean isEmojiPresentation(int ch) {
return (getPropertiesEx(ch) & $$maskEmojiPresentation) != 0;
}
boolean isEmojiModifier(int ch) {
return (getPropertiesEx(ch) & $$maskEmojiModifier) != 0;
}
boolean isEmojiModifierBase(int ch) {
return (getPropertiesEx(ch) & $$maskEmojiModifierBase) != 0;
}
boolean isEmojiComponent(int ch) {
return (getPropertiesEx(ch) & $$maskEmojiComponent) != 0;
}
boolean isExtendedPictographic(int ch) {
return (getPropertiesEx(ch) & $$maskExtendedPictographic) != 0;
}
int toLowerCase(int ch) {
int mapChar = ch;
int val = getProperties(ch);

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2003, 2021, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2003, 2023, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -117,6 +117,30 @@ class CharacterData01 extends CharacterData {
return ((props & $$maskIdentifierInfo) == $$valueIgnorable);
}
boolean isEmoji(int ch) {
return (getPropertiesEx(ch) & $$maskEmoji) != 0;
}
boolean isEmojiPresentation(int ch) {
return (getPropertiesEx(ch) & $$maskEmojiPresentation) != 0;
}
boolean isEmojiModifier(int ch) {
return (getPropertiesEx(ch) & $$maskEmojiModifier) != 0;
}
boolean isEmojiModifierBase(int ch) {
return (getPropertiesEx(ch) & $$maskEmojiModifierBase) != 0;
}
boolean isEmojiComponent(int ch) {
return (getPropertiesEx(ch) & $$maskEmojiComponent) != 0;
}
boolean isExtendedPictographic(int ch) {
return (getPropertiesEx(ch) & $$maskExtendedPictographic) != 0;
}
int toLowerCase(int ch) {
int mapChar = ch;
int val = getProperties(ch);

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2003, 2021, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2003, 2023, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -116,6 +116,30 @@ class CharacterData02 extends CharacterData {
return ((props & $$maskIdentifierInfo) == $$valueIgnorable);
}
boolean isEmoji(int ch) {
return (getPropertiesEx(ch) & $$maskEmoji) != 0;
}
boolean isEmojiPresentation(int ch) {
return (getPropertiesEx(ch) & $$maskEmojiPresentation) != 0;
}
boolean isEmojiModifier(int ch) {
return (getPropertiesEx(ch) & $$maskEmojiModifier) != 0;
}
boolean isEmojiModifierBase(int ch) {
return (getPropertiesEx(ch) & $$maskEmojiModifierBase) != 0;
}
boolean isEmojiComponent(int ch) {
return (getPropertiesEx(ch) & $$maskEmojiComponent) != 0;
}
boolean isExtendedPictographic(int ch) {
return (getPropertiesEx(ch) & $$maskExtendedPictographic) != 0;
}
int toLowerCase(int ch) {
int mapChar = ch;
int val = getProperties(ch);

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2020, 2021, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2020, 2023, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -116,6 +116,30 @@ class CharacterData03 extends CharacterData {
return ((props & $$maskIdentifierInfo) == $$valueIgnorable);
}
boolean isEmoji(int ch) {
return (getPropertiesEx(ch) & $$maskEmoji) != 0;
}
boolean isEmojiPresentation(int ch) {
return (getPropertiesEx(ch) & $$maskEmojiPresentation) != 0;
}
boolean isEmojiModifier(int ch) {
return (getPropertiesEx(ch) & $$maskEmojiModifier) != 0;
}
boolean isEmojiModifierBase(int ch) {
return (getPropertiesEx(ch) & $$maskEmojiModifierBase) != 0;
}
boolean isEmojiComponent(int ch) {
return (getPropertiesEx(ch) & $$maskEmojiComponent) != 0;
}
boolean isExtendedPictographic(int ch) {
return (getPropertiesEx(ch) & $$maskExtendedPictographic) != 0;
}
int toLowerCase(int ch) {
int mapChar = ch;
int val = getProperties(ch);

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2003, 2021, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2003, 2023, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -116,6 +116,30 @@ class CharacterData0E extends CharacterData {
return ((props & $$maskIdentifierInfo) == $$valueIgnorable);
}
boolean isEmoji(int ch) {
return (getPropertiesEx(ch) & $$maskEmoji) != 0;
}
boolean isEmojiPresentation(int ch) {
return (getPropertiesEx(ch) & $$maskEmojiPresentation) != 0;
}
boolean isEmojiModifier(int ch) {
return (getPropertiesEx(ch) & $$maskEmojiModifier) != 0;
}
boolean isEmojiModifierBase(int ch) {
return (getPropertiesEx(ch) & $$maskEmojiModifierBase) != 0;
}
boolean isEmojiComponent(int ch) {
return (getPropertiesEx(ch) & $$maskEmojiComponent) != 0;
}
boolean isExtendedPictographic(int ch) {
return (getPropertiesEx(ch) & $$maskExtendedPictographic) != 0;
}
int toLowerCase(int ch) {
int mapChar = ch;
int val = getProperties(ch);

View file

@ -134,6 +134,30 @@ class CharacterDataLatin1 extends CharacterData {
return ((props & $$maskIdentifierInfo) == $$valueIgnorable);
}
boolean isEmoji(int ch) {
return (getPropertiesEx(ch) & $$maskEmoji) != 0;
}
boolean isEmojiPresentation(int ch) {
return (getPropertiesEx(ch) & $$maskEmojiPresentation) != 0;
}
boolean isEmojiModifier(int ch) {
return (getPropertiesEx(ch) & $$maskEmojiModifier) != 0;
}
boolean isEmojiModifierBase(int ch) {
return (getPropertiesEx(ch) & $$maskEmojiModifierBase) != 0;
}
boolean isEmojiComponent(int ch) {
return (getPropertiesEx(ch) & $$maskEmojiComponent) != 0;
}
boolean isExtendedPictographic(int ch) {
return (getPropertiesEx(ch) & $$maskExtendedPictographic) != 0;
}
int toLowerCase(int ch) {
if (ch < 'A') { // Fast path for low code points
return ch;

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2003, 2023, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -60,6 +60,30 @@ class CharacterDataPrivateUse extends CharacterData {
return false;
}
boolean isEmoji(int ch) {
return false;
}
boolean isEmojiPresentation(int ch) {
return false;
}
boolean isEmojiModifier(int ch) {
return false;
}
boolean isEmojiModifierBase(int ch) {
return false;
}
boolean isEmojiComponent(int ch) {
return false;
}
boolean isExtendedPictographic(int ch) {
return false;
}
int toLowerCase(int ch) {
return ch;
}

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2003, 2023, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -58,6 +58,30 @@ class CharacterDataUndefined extends CharacterData {
return false;
}
boolean isEmoji(int ch) {
return false;
}
boolean isEmojiPresentation(int ch) {
return false;
}
boolean isEmojiModifier(int ch) {
return false;
}
boolean isEmojiModifierBase(int ch) {
return false;
}
boolean isEmojiComponent(int ch) {
return false;
}
boolean isExtendedPictographic(int ch) {
return false;
}
int toLowerCase(int ch) {
return ch;
}