ext/intl: SpoofChecker::setAllowedChars support.

To limit the acceptable range of acceptable unicode chars via individual
ones or via a pattern.
This commit is contained in:
David Carlier 2024-06-02 12:47:53 +01:00
parent b537f01353
commit 21418b5bb5
No known key found for this signature in database
GPG key ID: CEF290BB40D2086B
6 changed files with 180 additions and 16 deletions

4
NEWS
View file

@ -20,6 +20,10 @@ PHP NEWS
. /dev/poll events.mechanism for Solaris/Illumos setting had been retired. . /dev/poll events.mechanism for Solaris/Illumos setting had been retired.
(David Carlier) (David Carlier)
- Intl:
. Added SpoofChecker::setAllowedChars to set unicode chars ranges.
(David Carlier)
- Opcache: - Opcache:
. Fixed bug GH-13775 (Memory leak possibly related to opcache SHM placement). . Fixed bug GH-13775 (Memory leak possibly related to opcache SHM placement).
(Arnaud) (Arnaud)

View file

@ -609,6 +609,8 @@ PHP 8.4 UPGRADE NOTES
Selectors. Selectors.
. Added IntlDateFormatter::parseToCalendar which behaves like . Added IntlDateFormatter::parseToCalendar which behaves like
IntlDateFormatter::parse except the time zone is updated. IntlDateFormatter::parse except the time zone is updated.
. Added SpoofChecker::setAllowedChars to limit the range of unicode
chars.
- MBString: - MBString:
. Added mb_trim, mb_ltrim and mb_rtrim functions. . Added mb_trim, mb_ltrim and mb_rtrim functions.

View file

@ -40,6 +40,17 @@ class Spoofchecker
public const int HIDDEN_OVERLAY = UNKNOWN; public const int HIDDEN_OVERLAY = UNKNOWN;
#endif #endif
/** @cvalue USET_IGNORE_SPACE */
public const int IGNORE_SPACE = UNKNOWN;
/** @cvalue USET_CASE_INSENSITIVE */
public const int CASE_INSENSITIVE = UNKNOWN;
/** @cvalue USET_ADD_CASE_MAPPINGS */
public const int ADD_CASE_MAPPINGS = UNKNOWN;
#if U_ICU_VERSION_MAJOR_NUM >= 73
/** @cvalue USET_SIMPLE_CASE_INSENSITIVE */
public const int SIMPLE_CASE_INSENSITIVE = UNKNOWN;
#endif
public function __construct() {} public function __construct() {}
/** /**
@ -64,4 +75,5 @@ class Spoofchecker
/** @tentative-return-type */ /** @tentative-return-type */
public function setRestrictionLevel(int $level): void {} public function setRestrictionLevel(int $level): void {}
#endif #endif
public function setAllowedChars(string $pattern, int $patternOptions = 0): void {}
} }

View file

@ -1,5 +1,5 @@
/* This is a generated file, edit the .stub.php file instead. /* This is a generated file, edit the .stub.php file instead.
* Stub hash: d74f65f808ec0f71ee0ae8c04e253c4412da885e */ * Stub hash: 4834be57a3f0cb74dbc4422e609846139f09f6cb */
ZEND_BEGIN_ARG_INFO_EX(arginfo_class_Spoofchecker___construct, 0, 0, 0) ZEND_BEGIN_ARG_INFO_EX(arginfo_class_Spoofchecker___construct, 0, 0, 0)
ZEND_END_ARG_INFO() ZEND_END_ARG_INFO()
@ -29,6 +29,11 @@ ZEND_BEGIN_ARG_WITH_TENTATIVE_RETURN_TYPE_INFO_EX(arginfo_class_Spoofchecker_set
ZEND_END_ARG_INFO() ZEND_END_ARG_INFO()
#endif #endif
ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_class_Spoofchecker_setAllowedChars, 0, 1, IS_VOID, 0)
ZEND_ARG_TYPE_INFO(0, pattern, IS_STRING, 0)
ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, patternOptions, IS_LONG, 0, "0")
ZEND_END_ARG_INFO()
ZEND_METHOD(Spoofchecker, __construct); ZEND_METHOD(Spoofchecker, __construct);
ZEND_METHOD(Spoofchecker, isSuspicious); ZEND_METHOD(Spoofchecker, isSuspicious);
ZEND_METHOD(Spoofchecker, areConfusable); ZEND_METHOD(Spoofchecker, areConfusable);
@ -37,6 +42,7 @@ ZEND_METHOD(Spoofchecker, setChecks);
#if U_ICU_VERSION_MAJOR_NUM >= 58 #if U_ICU_VERSION_MAJOR_NUM >= 58
ZEND_METHOD(Spoofchecker, setRestrictionLevel); ZEND_METHOD(Spoofchecker, setRestrictionLevel);
#endif #endif
ZEND_METHOD(Spoofchecker, setAllowedChars);
static const zend_function_entry class_Spoofchecker_methods[] = { static const zend_function_entry class_Spoofchecker_methods[] = {
ZEND_ME(Spoofchecker, __construct, arginfo_class_Spoofchecker___construct, ZEND_ACC_PUBLIC) ZEND_ME(Spoofchecker, __construct, arginfo_class_Spoofchecker___construct, ZEND_ACC_PUBLIC)
@ -47,6 +53,7 @@ static const zend_function_entry class_Spoofchecker_methods[] = {
#if U_ICU_VERSION_MAJOR_NUM >= 58 #if U_ICU_VERSION_MAJOR_NUM >= 58
ZEND_ME(Spoofchecker, setRestrictionLevel, arginfo_class_Spoofchecker_setRestrictionLevel, ZEND_ACC_PUBLIC) ZEND_ME(Spoofchecker, setRestrictionLevel, arginfo_class_Spoofchecker_setRestrictionLevel, ZEND_ACC_PUBLIC)
#endif #endif
ZEND_ME(Spoofchecker, setAllowedChars, arginfo_class_Spoofchecker_setAllowedChars, ZEND_ACC_PUBLIC)
ZEND_FE_END ZEND_FE_END
}; };
@ -164,5 +171,31 @@ static zend_class_entry *register_class_Spoofchecker(void)
zend_string_release(const_HIDDEN_OVERLAY_name); zend_string_release(const_HIDDEN_OVERLAY_name);
#endif #endif
zval const_IGNORE_SPACE_value;
ZVAL_LONG(&const_IGNORE_SPACE_value, USET_IGNORE_SPACE);
zend_string *const_IGNORE_SPACE_name = zend_string_init_interned("IGNORE_SPACE", sizeof("IGNORE_SPACE") - 1, 1);
zend_declare_typed_class_constant(class_entry, const_IGNORE_SPACE_name, &const_IGNORE_SPACE_value, ZEND_ACC_PUBLIC, NULL, (zend_type) ZEND_TYPE_INIT_MASK(MAY_BE_LONG));
zend_string_release(const_IGNORE_SPACE_name);
zval const_CASE_INSENSITIVE_value;
ZVAL_LONG(&const_CASE_INSENSITIVE_value, USET_CASE_INSENSITIVE);
zend_string *const_CASE_INSENSITIVE_name = zend_string_init_interned("CASE_INSENSITIVE", sizeof("CASE_INSENSITIVE") - 1, 1);
zend_declare_typed_class_constant(class_entry, const_CASE_INSENSITIVE_name, &const_CASE_INSENSITIVE_value, ZEND_ACC_PUBLIC, NULL, (zend_type) ZEND_TYPE_INIT_MASK(MAY_BE_LONG));
zend_string_release(const_CASE_INSENSITIVE_name);
zval const_ADD_CASE_MAPPINGS_value;
ZVAL_LONG(&const_ADD_CASE_MAPPINGS_value, USET_ADD_CASE_MAPPINGS);
zend_string *const_ADD_CASE_MAPPINGS_name = zend_string_init_interned("ADD_CASE_MAPPINGS", sizeof("ADD_CASE_MAPPINGS") - 1, 1);
zend_declare_typed_class_constant(class_entry, const_ADD_CASE_MAPPINGS_name, &const_ADD_CASE_MAPPINGS_value, ZEND_ACC_PUBLIC, NULL, (zend_type) ZEND_TYPE_INIT_MASK(MAY_BE_LONG));
zend_string_release(const_ADD_CASE_MAPPINGS_name);
#if U_ICU_VERSION_MAJOR_NUM >= 73
zval const_SIMPLE_CASE_INSENSITIVE_value;
ZVAL_LONG(&const_SIMPLE_CASE_INSENSITIVE_value, USET_SIMPLE_CASE_INSENSITIVE);
zend_string *const_SIMPLE_CASE_INSENSITIVE_name = zend_string_init_interned("SIMPLE_CASE_INSENSITIVE", sizeof("SIMPLE_CASE_INSENSITIVE") - 1, 1);
zend_declare_typed_class_constant(class_entry, const_SIMPLE_CASE_INSENSITIVE_name, &const_SIMPLE_CASE_INSENSITIVE_value, ZEND_ACC_PUBLIC, NULL, (zend_type) ZEND_TYPE_INIT_MASK(MAY_BE_LONG));
zend_string_release(const_SIMPLE_CASE_INSENSITIVE_name);
#endif
return class_entry; return class_entry;
} }

View file

@ -17,19 +17,19 @@
#endif #endif
#include "php_intl.h" #include "php_intl.h"
#include "intl_convert.h"
#include "spoofchecker_class.h" #include "spoofchecker_class.h"
/* {{{ Checks if a given text contains any suspicious characters */ /* {{{ Checks if a given text contains any suspicious characters */
PHP_METHOD(Spoofchecker, isSuspicious) PHP_METHOD(Spoofchecker, isSuspicious)
{ {
int32_t ret, errmask; int32_t ret, errmask;
char *text; zend_string *text;
size_t text_len;
zval *error_code = NULL; zval *error_code = NULL;
SPOOFCHECKER_METHOD_INIT_VARS; SPOOFCHECKER_METHOD_INIT_VARS;
ZEND_PARSE_PARAMETERS_START(1, 2) ZEND_PARSE_PARAMETERS_START(1, 2)
Z_PARAM_STRING(text, text_len) Z_PARAM_STR(text)
Z_PARAM_OPTIONAL Z_PARAM_OPTIONAL
Z_PARAM_ZVAL(error_code) Z_PARAM_ZVAL(error_code)
ZEND_PARSE_PARAMETERS_END(); ZEND_PARSE_PARAMETERS_END();
@ -37,9 +37,9 @@ PHP_METHOD(Spoofchecker, isSuspicious)
SPOOFCHECKER_METHOD_FETCH_OBJECT; SPOOFCHECKER_METHOD_FETCH_OBJECT;
#if U_ICU_VERSION_MAJOR_NUM >= 58 #if U_ICU_VERSION_MAJOR_NUM >= 58
ret = uspoof_check2UTF8(co->uspoof, text, text_len, co->uspoofres, SPOOFCHECKER_ERROR_CODE_P(co)); ret = uspoof_check2UTF8(co->uspoof, ZSTR_VAL(text), ZSTR_LEN(text), co->uspoofres, SPOOFCHECKER_ERROR_CODE_P(co));
#else #else
ret = uspoof_checkUTF8(co->uspoof, text, text_len, NULL, SPOOFCHECKER_ERROR_CODE_P(co)); ret = uspoof_checkUTF8(co->uspoof, ZSTR_VAL(text), ZSTR_LEN(text), NULL, SPOOFCHECKER_ERROR_CODE_P(co));
#endif #endif
if (U_FAILURE(SPOOFCHECKER_ERROR_CODE(co))) { if (U_FAILURE(SPOOFCHECKER_ERROR_CODE(co))) {
@ -65,23 +65,22 @@ PHP_METHOD(Spoofchecker, isSuspicious)
PHP_METHOD(Spoofchecker, areConfusable) PHP_METHOD(Spoofchecker, areConfusable)
{ {
int ret; int ret;
char *s1, *s2; zend_string *s1, *s2;
size_t s1_len, s2_len;
zval *error_code = NULL; zval *error_code = NULL;
SPOOFCHECKER_METHOD_INIT_VARS; SPOOFCHECKER_METHOD_INIT_VARS;
ZEND_PARSE_PARAMETERS_START(2, 3) ZEND_PARSE_PARAMETERS_START(2, 3)
Z_PARAM_STRING(s1, s1_len) Z_PARAM_STR(s1)
Z_PARAM_STRING(s2, s2_len) Z_PARAM_STR(s2)
Z_PARAM_OPTIONAL Z_PARAM_OPTIONAL
Z_PARAM_ZVAL(error_code) Z_PARAM_ZVAL(error_code)
ZEND_PARSE_PARAMETERS_END(); ZEND_PARSE_PARAMETERS_END();
SPOOFCHECKER_METHOD_FETCH_OBJECT; SPOOFCHECKER_METHOD_FETCH_OBJECT;
if(s1_len > INT32_MAX || s2_len > INT32_MAX) { if(ZSTR_LEN(s1) > INT32_MAX || ZSTR_LEN(s2) > INT32_MAX) {
SPOOFCHECKER_ERROR_CODE(co) = U_BUFFER_OVERFLOW_ERROR; SPOOFCHECKER_ERROR_CODE(co) = U_BUFFER_OVERFLOW_ERROR;
} else { } else {
ret = uspoof_areConfusableUTF8(co->uspoof, s1, (int32_t)s1_len, s2, (int32_t)s2_len, SPOOFCHECKER_ERROR_CODE_P(co)); ret = uspoof_areConfusableUTF8(co->uspoof, ZSTR_VAL(s1), (int32_t)ZSTR_LEN(s1), ZSTR_VAL(s2), (int32_t)ZSTR_LEN(s2), SPOOFCHECKER_ERROR_CODE_P(co));
} }
if (U_FAILURE(SPOOFCHECKER_ERROR_CODE(co))) { if (U_FAILURE(SPOOFCHECKER_ERROR_CODE(co))) {
php_error_docref(NULL, E_WARNING, "(%d) %s", SPOOFCHECKER_ERROR_CODE(co), u_errorName(SPOOFCHECKER_ERROR_CODE(co))); php_error_docref(NULL, E_WARNING, "(%d) %s", SPOOFCHECKER_ERROR_CODE(co), u_errorName(SPOOFCHECKER_ERROR_CODE(co)));
@ -98,17 +97,16 @@ PHP_METHOD(Spoofchecker, areConfusable)
/* {{{ Locales to use when running checks */ /* {{{ Locales to use when running checks */
PHP_METHOD(Spoofchecker, setAllowedLocales) PHP_METHOD(Spoofchecker, setAllowedLocales)
{ {
char *locales; zend_string *locales;
size_t locales_len;
SPOOFCHECKER_METHOD_INIT_VARS; SPOOFCHECKER_METHOD_INIT_VARS;
ZEND_PARSE_PARAMETERS_START(1, 1) ZEND_PARSE_PARAMETERS_START(1, 1)
Z_PARAM_STRING(locales, locales_len) Z_PARAM_STR(locales)
ZEND_PARSE_PARAMETERS_END(); ZEND_PARSE_PARAMETERS_END();
SPOOFCHECKER_METHOD_FETCH_OBJECT; SPOOFCHECKER_METHOD_FETCH_OBJECT;
uspoof_setAllowedLocales(co->uspoof, locales, SPOOFCHECKER_ERROR_CODE_P(co)); uspoof_setAllowedLocales(co->uspoof, ZSTR_VAL(locales), SPOOFCHECKER_ERROR_CODE_P(co));
if (U_FAILURE(SPOOFCHECKER_ERROR_CODE(co))) { if (U_FAILURE(SPOOFCHECKER_ERROR_CODE(co))) {
php_error_docref(NULL, E_WARNING, "(%d) %s", SPOOFCHECKER_ERROR_CODE(co), u_errorName(SPOOFCHECKER_ERROR_CODE(co))); php_error_docref(NULL, E_WARNING, "(%d) %s", SPOOFCHECKER_ERROR_CODE(co), u_errorName(SPOOFCHECKER_ERROR_CODE(co)));
@ -167,3 +165,74 @@ PHP_METHOD(Spoofchecker, setRestrictionLevel)
} }
/* }}} */ /* }}} */
#endif #endif
PHP_METHOD(Spoofchecker, setAllowedChars)
{
zend_string *pattern;
UChar *upattern = NULL;
int32_t upattern_len = 0;
zend_long pattern_option = 0;
SPOOFCHECKER_METHOD_INIT_VARS;
ZEND_PARSE_PARAMETERS_START(1, 2)
Z_PARAM_STR(pattern)
Z_PARAM_OPTIONAL
Z_PARAM_LONG(pattern_option)
ZEND_PARSE_PARAMETERS_END();
SPOOFCHECKER_METHOD_FETCH_OBJECT;
if (ZSTR_LEN(pattern) > INT32_MAX) {
zend_argument_value_error(1, "must be less than or equal to " ZEND_LONG_FMT " bytes long", INT32_MAX);
RETURN_THROWS();
}
/* uset_applyPattern requires to start with a regex range char */
if (ZSTR_VAL(pattern)[0] != '[' || ZSTR_VAL(pattern)[ZSTR_LEN(pattern) -1] != ']') {
zend_argument_value_error(1, "must be a valid regular expression character set pattern");
RETURN_THROWS();
}
intl_convert_utf8_to_utf16(&upattern, &upattern_len, ZSTR_VAL(pattern), ZSTR_LEN(pattern), SPOOFCHECKER_ERROR_CODE_P(co));
if (U_FAILURE(SPOOFCHECKER_ERROR_CODE(co))) {
zend_argument_value_error(1, "string conversion to unicode encoding failed (%d) %s", SPOOFCHECKER_ERROR_CODE(co), u_errorName(SPOOFCHECKER_ERROR_CODE(co)));
RETURN_THROWS();
}
USet *set = uset_openEmpty();
/* pattern is either USE_IGNORE_SPACE alone or in conjunction with the following flags (but mutually exclusive) */
if (pattern_option &&
pattern_option != USET_IGNORE_SPACE &&
#if U_ICU_VERSION_MAJOR_NUM >= 73
pattern_option != (USET_IGNORE_SPACE|USET_SIMPLE_CASE_INSENSITIVE) &&
#endif
pattern_option != (USET_IGNORE_SPACE|USET_CASE_INSENSITIVE) &&
pattern_option != (USET_IGNORE_SPACE|USET_ADD_CASE_MAPPINGS)) {
zend_argument_value_error(2, "must be a valid pattern option, 0 or (SpoofChecker::IGNORE_SPACE|(<none> or SpoofChecker::USET_CASE_INSENSITIVE or SpoofChecker::USET_ADD_CASE_MAPPINGS"
#if U_ICU_VERSION_MAJOR_NUM >= 73
" or SpoofChecker::USET_SIMPLE_CASE_INSENSITIVE"
#endif
"))"
);
uset_close(set);
efree(upattern);
RETURN_THROWS();
}
uset_applyPattern(set, upattern, upattern_len, (uint32_t)pattern_option, SPOOFCHECKER_ERROR_CODE_P(co));
if (U_FAILURE(SPOOFCHECKER_ERROR_CODE(co))) {
zend_argument_value_error(1, "must be a valid regular expression character set pattern (%d) %s", SPOOFCHECKER_ERROR_CODE(co), u_errorName(SPOOFCHECKER_ERROR_CODE(co)));
uset_close(set);
efree(upattern);
RETURN_THROWS();
}
uset_compact(set);
uspoof_setAllowedChars(co->uspoof, set, SPOOFCHECKER_ERROR_CODE_P(co));
uset_close(set);
efree(upattern);
if (U_FAILURE(SPOOFCHECKER_ERROR_CODE(co))) {
php_error_docref(NULL, E_WARNING, "(%d) %s", SPOOFCHECKER_ERROR_CODE(co), u_errorName(SPOOFCHECKER_ERROR_CODE(co)));
}
}

View file

@ -0,0 +1,44 @@
--TEST--
spoofchecker with locale settings
--EXTENSIONS--
intl
--SKIPIF--
<?php if(!class_exists("Spoofchecker")) print 'skip'; ?>
--FILE--
<?php
$s = new Spoofchecker();
$s->setAllowedChars('[a-z]');
var_dump($s->isSuspicious("123"));
$s->setAllowedChars('[1-3]');
var_dump($s->isSuspicious("123"));
$s->setAllowedChars('[a-z]', SpoofChecker::IGNORE_SPACE | SpoofChecker::CASE_INSENSITIVE);
var_dump($s->isSuspicious("ABC"));
try {
$s->setAllowedChars('[a-z]', 1024);
} catch (\ValueError $e) {
echo $e->getMessage() . PHP_EOL;
}
try {
$s->setAllowedChars("A-Z]");
} catch (\ValueError $e) {
echo $e->getMessage() . PHP_EOL;
}
try {
$s->setAllowedChars("[A-Z");
} catch (\ValueError $e) {
echo $e->getMessage();
}
?>
--EXPECTF--
bool(true)
bool(false)
bool(false)
Spoofchecker::setAllowedChars(): Argument #2 ($patternOptions) must be a valid pattern option, 0 or (SpoofChecker::IGNORE_SPACE|(<none> or SpoofChecker::USET_CASE_INSENSITIVE%s))
Spoofchecker::setAllowedChars(): Argument #1 ($pattern) must be a valid regular expression character set pattern
Spoofchecker::setAllowedChars(): Argument #1 ($pattern) must be a valid regular expression character set pattern