From 2198d38cbc12beeaf00be688cf33a5a6697ea20c Mon Sep 17 00:00:00 2001 From: Anatol Belski Date: Fri, 20 Oct 2017 19:02:42 +0200 Subject: [PATCH 1/2] Add test for newer ICU version --- .../tests/formatter_get_locale_variant3.phpt | 50 +++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 ext/intl/tests/formatter_get_locale_variant3.phpt diff --git a/ext/intl/tests/formatter_get_locale_variant3.phpt b/ext/intl/tests/formatter_get_locale_variant3.phpt new file mode 100644 index 00000000000..901e2353b4e --- /dev/null +++ b/ext/intl/tests/formatter_get_locale_variant3.phpt @@ -0,0 +1,50 @@ +--TEST-- +numfmt_get_locale() +--SKIPIF-- + += 58.1'); ?> +--FILE-- + 'actual', + Locale::VALID_LOCALE => 'valid', + ); + + $res_str = ''; + + foreach( $locales as $locale ) + { + $fmt = ut_nfmt_create( $locale, NumberFormatter::DECIMAL ); + $res_str .= "$locale: "; + foreach( $loc_types as $loc_type => $loc_type_name ) + $res_str .= sprintf( " %s=%s", + $loc_type_name, + dump( ut_nfmt_get_locale( $fmt, $loc_type ) ) ); + $res_str .= "\n"; + } + + return $res_str; +} + +include_once( 'ut_common.inc' ); + +// Run the test +ut_run(); +?> +--EXPECT-- +en_UK: actual='en' valid='en' +en_US: actual='en' valid='en_US' +fr_CA: actual='fr' valid='fr_CA' From f95063647ccebfde5e8e39bc3349e63f19662395 Mon Sep 17 00:00:00 2001 From: Anatol Belski Date: Fri, 20 Oct 2017 19:14:22 +0200 Subject: [PATCH 2/2] Fixed bug #73655 Spoofchecker::isSuspicious behavior change due to upstream changes There are significant changes in the spoof checking reflecting http://www.unicode.org/reports/tr39/tr39-15.html and relying on the restriction levels. ICU 58+ removes WSC and MSC handling and otherwise undergoes big changes in both code and data areas. Keep up with the basic points for now, as we need to move forward and provide an acceptable experience to PHP users linking to a newer ICU. The most distros ATM don't provide ICU > 57.1, though. We'll need for sure to keep up with the BC breach in ICU 58+ in possible further aspects. --- ext/intl/spoofchecker/spoofchecker_class.h | 2 ++ ext/intl/spoofchecker/spoofchecker_create.c | 13 +++++++++++ ext/intl/tests/spoofchecker_006.phpt | 26 +++++++++++++++++++++ 3 files changed, 41 insertions(+) create mode 100644 ext/intl/tests/spoofchecker_006.phpt diff --git a/ext/intl/spoofchecker/spoofchecker_class.h b/ext/intl/spoofchecker/spoofchecker_class.h index 7c5864b82ff..7a95f315dc5 100644 --- a/ext/intl/spoofchecker/spoofchecker_class.h +++ b/ext/intl/spoofchecker/spoofchecker_class.h @@ -78,4 +78,6 @@ extern zend_class_entry *Spoofchecker_ce_ptr; RETURN_FALSE; \ } \ +#define SPOOFCHECKER_DEFAULT_RESTRICTION_LEVEL USPOOF_MODERATELY_RESTRICTIVE + #endif // #ifndef SPOOFCHECKER_CLASS_H diff --git a/ext/intl/spoofchecker/spoofchecker_create.c b/ext/intl/spoofchecker/spoofchecker_create.c index fbe7cbae1d1..1333a0f2050 100644 --- a/ext/intl/spoofchecker/spoofchecker_create.c +++ b/ext/intl/spoofchecker/spoofchecker_create.c @@ -43,12 +43,25 @@ PHP_METHOD(Spoofchecker, __construct) co->uspoof = uspoof_open(SPOOFCHECKER_ERROR_CODE_P(co)); INTL_METHOD_CHECK_STATUS(co, "spoofchecker: unable to open ICU Spoof Checker"); +#if U_ICU_VERSION_MAJOR_NUM >= 58 + /* TODO save it into the object for further suspiction check comparison. */ + /* ICU 58 removes WSC and MSC handling. However there are restriction + levels as defined in + http://www.unicode.org/reports/tr39/tr39-15.html#Restriction_Level_Detection + and the default is high restrictive. However the moderately restrictive + level is what seems to correspond to the setting below applicable to + ICU < 58. In further, we might want to utilize uspoof_check2 APIs when + it became stable, to use extended check result APIs. Subsequent changes + in the unicode security algos are to be watched.*/ + uspoof_setRestrictionLevel(co->uspoof, SPOOFCHECKER_DEFAULT_RESTRICTION_LEVEL); +#else /* Single-script enforcement is on by default. This fails for languages like Japanese that legally use multiple scripts within a single word, so we turn it off. */ checks = uspoof_getChecks(co->uspoof, SPOOFCHECKER_ERROR_CODE_P(co)); uspoof_setChecks(co->uspoof, checks & ~USPOOF_SINGLE_SCRIPT, SPOOFCHECKER_ERROR_CODE_P(co)); +#endif zend_restore_error_handling(&error_handling); } /* }}} */ diff --git a/ext/intl/tests/spoofchecker_006.phpt b/ext/intl/tests/spoofchecker_006.phpt new file mode 100644 index 00000000000..038a2554195 --- /dev/null +++ b/ext/intl/tests/spoofchecker_006.phpt @@ -0,0 +1,26 @@ +--TEST-- +spoofchecker suspicious character checker +--SKIPIF-- + += 58.1'); ?> +--FILE-- +isSuspicious("http://www.payp\u{0430}l.com")); +var_dump($x->isSuspicious("\u{041F}aypal.com")); + +echo "certain all-uppercase Latin sequences can be spoof of Greek\n"; +$x = new Spoofchecker(); +$x->setAllowedLocales("gr_GR"); +var_dump($x->isSuspicious("NAPKIN PEZ")); +var_dump($x->isSuspicious("napkin pez")); +?> +--EXPECTF-- +paypal with Cyrillic spoof characters +bool(true) +bool(true) +certain all-uppercase Latin sequences can be spoof of Greek +bool(true) +bool(true)