diff --git a/NEWS b/NEWS index 104b3610400..c0f63de1bdf 100644 --- a/NEWS +++ b/NEWS @@ -20,6 +20,10 @@ PHP NEWS . /dev/poll events.mechanism for Solaris/Illumos setting had been retired. (David Carlier) +- Intl: + . Added SpoofChecker::setAllowedChars to set unicode chars ranges. + (David Carlier) + - Opcache: . Fixed bug GH-13775 (Memory leak possibly related to opcache SHM placement). (Arnaud) diff --git a/UPGRADING b/UPGRADING index 4450aeac8e7..beef5bbbcc7 100644 --- a/UPGRADING +++ b/UPGRADING @@ -609,6 +609,8 @@ PHP 8.4 UPGRADE NOTES Selectors. . Added IntlDateFormatter::parseToCalendar which behaves like IntlDateFormatter::parse except the time zone is updated. + . Added SpoofChecker::setAllowedChars to limit the range of unicode + chars. - MBString: . Added mb_trim, mb_ltrim and mb_rtrim functions. diff --git a/ext/intl/spoofchecker/spoofchecker.stub.php b/ext/intl/spoofchecker/spoofchecker.stub.php index b0afd4a796a..0141252d478 100644 --- a/ext/intl/spoofchecker/spoofchecker.stub.php +++ b/ext/intl/spoofchecker/spoofchecker.stub.php @@ -40,6 +40,17 @@ class Spoofchecker public const int HIDDEN_OVERLAY = UNKNOWN; #endif + /** @cvalue USET_IGNORE_SPACE */ + public const int IGNORE_SPACE = UNKNOWN; + /** @cvalue USET_CASE_INSENSITIVE */ + public const int CASE_INSENSITIVE = UNKNOWN; + /** @cvalue USET_ADD_CASE_MAPPINGS */ + public const int ADD_CASE_MAPPINGS = UNKNOWN; +#if U_ICU_VERSION_MAJOR_NUM >= 73 + /** @cvalue USET_SIMPLE_CASE_INSENSITIVE */ + public const int SIMPLE_CASE_INSENSITIVE = UNKNOWN; +#endif + public function __construct() {} /** @@ -64,4 +75,5 @@ class Spoofchecker /** @tentative-return-type */ public function setRestrictionLevel(int $level): void {} #endif + public function setAllowedChars(string $pattern, int $patternOptions = 0): void {} } diff --git a/ext/intl/spoofchecker/spoofchecker_arginfo.h b/ext/intl/spoofchecker/spoofchecker_arginfo.h index f76c3ca6fd0..d0ba891ea95 100644 --- a/ext/intl/spoofchecker/spoofchecker_arginfo.h +++ b/ext/intl/spoofchecker/spoofchecker_arginfo.h @@ -1,5 +1,5 @@ /* This is a generated file, edit the .stub.php file instead. - * Stub hash: d74f65f808ec0f71ee0ae8c04e253c4412da885e */ + * Stub hash: 4834be57a3f0cb74dbc4422e609846139f09f6cb */ ZEND_BEGIN_ARG_INFO_EX(arginfo_class_Spoofchecker___construct, 0, 0, 0) ZEND_END_ARG_INFO() @@ -29,6 +29,11 @@ ZEND_BEGIN_ARG_WITH_TENTATIVE_RETURN_TYPE_INFO_EX(arginfo_class_Spoofchecker_set ZEND_END_ARG_INFO() #endif +ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_class_Spoofchecker_setAllowedChars, 0, 1, IS_VOID, 0) + ZEND_ARG_TYPE_INFO(0, pattern, IS_STRING, 0) + ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, patternOptions, IS_LONG, 0, "0") +ZEND_END_ARG_INFO() + ZEND_METHOD(Spoofchecker, __construct); ZEND_METHOD(Spoofchecker, isSuspicious); ZEND_METHOD(Spoofchecker, areConfusable); @@ -37,6 +42,7 @@ ZEND_METHOD(Spoofchecker, setChecks); #if U_ICU_VERSION_MAJOR_NUM >= 58 ZEND_METHOD(Spoofchecker, setRestrictionLevel); #endif +ZEND_METHOD(Spoofchecker, setAllowedChars); static const zend_function_entry class_Spoofchecker_methods[] = { ZEND_ME(Spoofchecker, __construct, arginfo_class_Spoofchecker___construct, ZEND_ACC_PUBLIC) @@ -47,6 +53,7 @@ static const zend_function_entry class_Spoofchecker_methods[] = { #if U_ICU_VERSION_MAJOR_NUM >= 58 ZEND_ME(Spoofchecker, setRestrictionLevel, arginfo_class_Spoofchecker_setRestrictionLevel, ZEND_ACC_PUBLIC) #endif + ZEND_ME(Spoofchecker, setAllowedChars, arginfo_class_Spoofchecker_setAllowedChars, ZEND_ACC_PUBLIC) ZEND_FE_END }; @@ -164,5 +171,31 @@ static zend_class_entry *register_class_Spoofchecker(void) zend_string_release(const_HIDDEN_OVERLAY_name); #endif + zval const_IGNORE_SPACE_value; + ZVAL_LONG(&const_IGNORE_SPACE_value, USET_IGNORE_SPACE); + zend_string *const_IGNORE_SPACE_name = zend_string_init_interned("IGNORE_SPACE", sizeof("IGNORE_SPACE") - 1, 1); + zend_declare_typed_class_constant(class_entry, const_IGNORE_SPACE_name, &const_IGNORE_SPACE_value, ZEND_ACC_PUBLIC, NULL, (zend_type) ZEND_TYPE_INIT_MASK(MAY_BE_LONG)); + zend_string_release(const_IGNORE_SPACE_name); + + zval const_CASE_INSENSITIVE_value; + ZVAL_LONG(&const_CASE_INSENSITIVE_value, USET_CASE_INSENSITIVE); + zend_string *const_CASE_INSENSITIVE_name = zend_string_init_interned("CASE_INSENSITIVE", sizeof("CASE_INSENSITIVE") - 1, 1); + zend_declare_typed_class_constant(class_entry, const_CASE_INSENSITIVE_name, &const_CASE_INSENSITIVE_value, ZEND_ACC_PUBLIC, NULL, (zend_type) ZEND_TYPE_INIT_MASK(MAY_BE_LONG)); + zend_string_release(const_CASE_INSENSITIVE_name); + + zval const_ADD_CASE_MAPPINGS_value; + ZVAL_LONG(&const_ADD_CASE_MAPPINGS_value, USET_ADD_CASE_MAPPINGS); + zend_string *const_ADD_CASE_MAPPINGS_name = zend_string_init_interned("ADD_CASE_MAPPINGS", sizeof("ADD_CASE_MAPPINGS") - 1, 1); + zend_declare_typed_class_constant(class_entry, const_ADD_CASE_MAPPINGS_name, &const_ADD_CASE_MAPPINGS_value, ZEND_ACC_PUBLIC, NULL, (zend_type) ZEND_TYPE_INIT_MASK(MAY_BE_LONG)); + zend_string_release(const_ADD_CASE_MAPPINGS_name); +#if U_ICU_VERSION_MAJOR_NUM >= 73 + + zval const_SIMPLE_CASE_INSENSITIVE_value; + ZVAL_LONG(&const_SIMPLE_CASE_INSENSITIVE_value, USET_SIMPLE_CASE_INSENSITIVE); + zend_string *const_SIMPLE_CASE_INSENSITIVE_name = zend_string_init_interned("SIMPLE_CASE_INSENSITIVE", sizeof("SIMPLE_CASE_INSENSITIVE") - 1, 1); + zend_declare_typed_class_constant(class_entry, const_SIMPLE_CASE_INSENSITIVE_name, &const_SIMPLE_CASE_INSENSITIVE_value, ZEND_ACC_PUBLIC, NULL, (zend_type) ZEND_TYPE_INIT_MASK(MAY_BE_LONG)); + zend_string_release(const_SIMPLE_CASE_INSENSITIVE_name); +#endif + return class_entry; } diff --git a/ext/intl/spoofchecker/spoofchecker_main.c b/ext/intl/spoofchecker/spoofchecker_main.c index 0d747d6c0f1..afea503bc0e 100644 --- a/ext/intl/spoofchecker/spoofchecker_main.c +++ b/ext/intl/spoofchecker/spoofchecker_main.c @@ -17,19 +17,19 @@ #endif #include "php_intl.h" +#include "intl_convert.h" #include "spoofchecker_class.h" /* {{{ Checks if a given text contains any suspicious characters */ PHP_METHOD(Spoofchecker, isSuspicious) { int32_t ret, errmask; - char *text; - size_t text_len; + zend_string *text; zval *error_code = NULL; SPOOFCHECKER_METHOD_INIT_VARS; ZEND_PARSE_PARAMETERS_START(1, 2) - Z_PARAM_STRING(text, text_len) + Z_PARAM_STR(text) Z_PARAM_OPTIONAL Z_PARAM_ZVAL(error_code) ZEND_PARSE_PARAMETERS_END(); @@ -37,9 +37,9 @@ PHP_METHOD(Spoofchecker, isSuspicious) SPOOFCHECKER_METHOD_FETCH_OBJECT; #if U_ICU_VERSION_MAJOR_NUM >= 58 - ret = uspoof_check2UTF8(co->uspoof, text, text_len, co->uspoofres, SPOOFCHECKER_ERROR_CODE_P(co)); + ret = uspoof_check2UTF8(co->uspoof, ZSTR_VAL(text), ZSTR_LEN(text), co->uspoofres, SPOOFCHECKER_ERROR_CODE_P(co)); #else - ret = uspoof_checkUTF8(co->uspoof, text, text_len, NULL, SPOOFCHECKER_ERROR_CODE_P(co)); + ret = uspoof_checkUTF8(co->uspoof, ZSTR_VAL(text), ZSTR_LEN(text), NULL, SPOOFCHECKER_ERROR_CODE_P(co)); #endif if (U_FAILURE(SPOOFCHECKER_ERROR_CODE(co))) { @@ -65,23 +65,22 @@ PHP_METHOD(Spoofchecker, isSuspicious) PHP_METHOD(Spoofchecker, areConfusable) { int ret; - char *s1, *s2; - size_t s1_len, s2_len; + zend_string *s1, *s2; zval *error_code = NULL; SPOOFCHECKER_METHOD_INIT_VARS; ZEND_PARSE_PARAMETERS_START(2, 3) - Z_PARAM_STRING(s1, s1_len) - Z_PARAM_STRING(s2, s2_len) + Z_PARAM_STR(s1) + Z_PARAM_STR(s2) Z_PARAM_OPTIONAL Z_PARAM_ZVAL(error_code) ZEND_PARSE_PARAMETERS_END(); SPOOFCHECKER_METHOD_FETCH_OBJECT; - if(s1_len > INT32_MAX || s2_len > INT32_MAX) { + if(ZSTR_LEN(s1) > INT32_MAX || ZSTR_LEN(s2) > INT32_MAX) { SPOOFCHECKER_ERROR_CODE(co) = U_BUFFER_OVERFLOW_ERROR; } else { - ret = uspoof_areConfusableUTF8(co->uspoof, s1, (int32_t)s1_len, s2, (int32_t)s2_len, SPOOFCHECKER_ERROR_CODE_P(co)); + ret = uspoof_areConfusableUTF8(co->uspoof, ZSTR_VAL(s1), (int32_t)ZSTR_LEN(s1), ZSTR_VAL(s2), (int32_t)ZSTR_LEN(s2), SPOOFCHECKER_ERROR_CODE_P(co)); } if (U_FAILURE(SPOOFCHECKER_ERROR_CODE(co))) { php_error_docref(NULL, E_WARNING, "(%d) %s", SPOOFCHECKER_ERROR_CODE(co), u_errorName(SPOOFCHECKER_ERROR_CODE(co))); @@ -98,17 +97,16 @@ PHP_METHOD(Spoofchecker, areConfusable) /* {{{ Locales to use when running checks */ PHP_METHOD(Spoofchecker, setAllowedLocales) { - char *locales; - size_t locales_len; + zend_string *locales; SPOOFCHECKER_METHOD_INIT_VARS; ZEND_PARSE_PARAMETERS_START(1, 1) - Z_PARAM_STRING(locales, locales_len) + Z_PARAM_STR(locales) ZEND_PARSE_PARAMETERS_END(); SPOOFCHECKER_METHOD_FETCH_OBJECT; - uspoof_setAllowedLocales(co->uspoof, locales, SPOOFCHECKER_ERROR_CODE_P(co)); + uspoof_setAllowedLocales(co->uspoof, ZSTR_VAL(locales), SPOOFCHECKER_ERROR_CODE_P(co)); if (U_FAILURE(SPOOFCHECKER_ERROR_CODE(co))) { php_error_docref(NULL, E_WARNING, "(%d) %s", SPOOFCHECKER_ERROR_CODE(co), u_errorName(SPOOFCHECKER_ERROR_CODE(co))); @@ -167,3 +165,74 @@ PHP_METHOD(Spoofchecker, setRestrictionLevel) } /* }}} */ #endif + +PHP_METHOD(Spoofchecker, setAllowedChars) +{ + zend_string *pattern; + UChar *upattern = NULL; + int32_t upattern_len = 0; + zend_long pattern_option = 0; + SPOOFCHECKER_METHOD_INIT_VARS; + + ZEND_PARSE_PARAMETERS_START(1, 2) + Z_PARAM_STR(pattern) + Z_PARAM_OPTIONAL + Z_PARAM_LONG(pattern_option) + ZEND_PARSE_PARAMETERS_END(); + SPOOFCHECKER_METHOD_FETCH_OBJECT; + + if (ZSTR_LEN(pattern) > INT32_MAX) { + zend_argument_value_error(1, "must be less than or equal to " ZEND_LONG_FMT " bytes long", INT32_MAX); + RETURN_THROWS(); + } + + /* uset_applyPattern requires to start with a regex range char */ + if (ZSTR_VAL(pattern)[0] != '[' || ZSTR_VAL(pattern)[ZSTR_LEN(pattern) -1] != ']') { + zend_argument_value_error(1, "must be a valid regular expression character set pattern"); + RETURN_THROWS(); + } + + intl_convert_utf8_to_utf16(&upattern, &upattern_len, ZSTR_VAL(pattern), ZSTR_LEN(pattern), SPOOFCHECKER_ERROR_CODE_P(co)); + if (U_FAILURE(SPOOFCHECKER_ERROR_CODE(co))) { + zend_argument_value_error(1, "string conversion to unicode encoding failed (%d) %s", SPOOFCHECKER_ERROR_CODE(co), u_errorName(SPOOFCHECKER_ERROR_CODE(co))); + RETURN_THROWS(); + } + + USet *set = uset_openEmpty(); + + /* pattern is either USE_IGNORE_SPACE alone or in conjunction with the following flags (but mutually exclusive) */ + if (pattern_option && + pattern_option != USET_IGNORE_SPACE && +#if U_ICU_VERSION_MAJOR_NUM >= 73 + pattern_option != (USET_IGNORE_SPACE|USET_SIMPLE_CASE_INSENSITIVE) && +#endif + pattern_option != (USET_IGNORE_SPACE|USET_CASE_INSENSITIVE) && + pattern_option != (USET_IGNORE_SPACE|USET_ADD_CASE_MAPPINGS)) { + zend_argument_value_error(2, "must be a valid pattern option, 0 or (SpoofChecker::IGNORE_SPACE|( or SpoofChecker::USET_CASE_INSENSITIVE or SpoofChecker::USET_ADD_CASE_MAPPINGS" +#if U_ICU_VERSION_MAJOR_NUM >= 73 + " or SpoofChecker::USET_SIMPLE_CASE_INSENSITIVE" +#endif + "))" + ); + uset_close(set); + efree(upattern); + RETURN_THROWS(); + } + + uset_applyPattern(set, upattern, upattern_len, (uint32_t)pattern_option, SPOOFCHECKER_ERROR_CODE_P(co)); + if (U_FAILURE(SPOOFCHECKER_ERROR_CODE(co))) { + zend_argument_value_error(1, "must be a valid regular expression character set pattern (%d) %s", SPOOFCHECKER_ERROR_CODE(co), u_errorName(SPOOFCHECKER_ERROR_CODE(co))); + uset_close(set); + efree(upattern); + RETURN_THROWS(); + } + + uset_compact(set); + uspoof_setAllowedChars(co->uspoof, set, SPOOFCHECKER_ERROR_CODE_P(co)); + uset_close(set); + efree(upattern); + + if (U_FAILURE(SPOOFCHECKER_ERROR_CODE(co))) { + php_error_docref(NULL, E_WARNING, "(%d) %s", SPOOFCHECKER_ERROR_CODE(co), u_errorName(SPOOFCHECKER_ERROR_CODE(co))); + } +} diff --git a/ext/intl/tests/spoofchecker_008.phpt b/ext/intl/tests/spoofchecker_008.phpt new file mode 100644 index 00000000000..4eb78711c2c --- /dev/null +++ b/ext/intl/tests/spoofchecker_008.phpt @@ -0,0 +1,44 @@ +--TEST-- +spoofchecker with locale settings +--EXTENSIONS-- +intl +--SKIPIF-- + +--FILE-- +setAllowedChars('[a-z]'); +var_dump($s->isSuspicious("123")); +$s->setAllowedChars('[1-3]'); +var_dump($s->isSuspicious("123")); +$s->setAllowedChars('[a-z]', SpoofChecker::IGNORE_SPACE | SpoofChecker::CASE_INSENSITIVE); +var_dump($s->isSuspicious("ABC")); + +try { + $s->setAllowedChars('[a-z]', 1024); +} catch (\ValueError $e) { + echo $e->getMessage() . PHP_EOL; +} + +try { + $s->setAllowedChars("A-Z]"); +} catch (\ValueError $e) { + echo $e->getMessage() . PHP_EOL; +} + +try { + $s->setAllowedChars("[A-Z"); +} catch (\ValueError $e) { + echo $e->getMessage(); +} + +?> +--EXPECTF-- +bool(true) +bool(false) +bool(false) +Spoofchecker::setAllowedChars(): Argument #2 ($patternOptions) must be a valid pattern option, 0 or (SpoofChecker::IGNORE_SPACE|( or SpoofChecker::USET_CASE_INSENSITIVE%s)) +Spoofchecker::setAllowedChars(): Argument #1 ($pattern) must be a valid regular expression character set pattern +Spoofchecker::setAllowedChars(): Argument #1 ($pattern) must be a valid regular expression character set pattern