Merge branch 'PHP-8.2'

* PHP-8.2: Fix GH-11300: license issue: restricted unicode license headers
2025-08-16 05:58:45 +02:00 · 2023-07-01 22:03:08 +02:00 · 2023-07-01 22:03:08 +02:00 · b2a54bc6af
commit b2a54bc6af
parent 775bc49700 297fec099e
5 changed files with 153 additions and 117 deletions
--- a/ext/mbstring/tests/data/BIG5.txt
+++ b/ext/mbstring/tests/data/BIG5.txt
@ -1,32 +1,23 @@
 # BIG5.TXT
 # Date: 2015-12-02 23:52:00 GMT [KW]
 # © 2015 Unicode®, Inc.
 # For terms of use, see http://www.unicode.org/terms_of_use.html
 #
 #	Name:             BIG5 to Unicode table (complete)
 #	Unicode version:  1.1
-#	Table version:    0.0d3
+#	Table version:    2.0
 #	Table format:     Format A
-#	Date:             11 February 1994
+#	Date:             2011 October 14 (header updated: 2015 December 02)
 #	Authors:          Glenn Adams <glenn@metis.com>
 #                     John H. Jenkins <John_Jenkins@taligent.com>
 #
 #	Copyright (c) 1991-1994 Unicode, Inc.  All Rights reserved.
 #
 #	This file is provided as-is by Unicode, Inc. (The Unicode Consortium).
 #	No claims are made as to fitness for any particular purpose.  No
 #	warranties of any kind are expressed or implied.  The recipient
 #	agrees to determine applicability of information provided.  If this
 #	file has been provided on magnetic media by Unicode, Inc., the sole
 #	remedy for any claim will be exchange of defective media within 90
 #	days of receipt.
 #
 #	Recipient is granted the right to make copies in any form for
 #	internal distribution and to freely use the information supplied
 #	in the creation of products supporting Unicode.  Unicode, Inc.
 #	specifically excludes the right to re-distribute this file directly
 #	to third parties or other organizations whether for profit or not.
 #
 #	General notes:
 #
-#	This table contains the data Metis and Taligent currently have on how
+#
-#       BIG5 characters map into Unicode.
+# This table contains one set of mappings from BIG5 into Unicode.
 # Note that these data are *possible* mappings only and may not be the
 # same as those used by actual products, nor may they be the best suited
 # for all uses.  For more information on the mappings between various code
 # pages incorporating the repertoire of BIG5 and Unicode, consult the
 # VENDORS mapping data.
 #
 #	WARNING!  It is currently impossible to provide round-trip compatibility
 #		between BIG5 and Unicode.  
@ -63,10 +54,8 @@
 #	MACRON (Mandarin Chinese first tone) to reflect this semantic.  
 #	However, because bopomofo uses the absense of a tone mark to indicate
 #	the first Mandarin tone, most implementations of Big Five represent
-#		this character with a blank space, and so a mapping such as U+2003 EM SPACE
+#	this character with a blank space, and so a mapping such as U+2003 EM
-#		might be preferred.
+#	SPACE might be preferred.  
 #
 #
 #
 #	Format:  Three tab-separated columns
 #		 Column #1 is the BIG5 code (in hex as 0xXXXX)
@ -82,9 +71,24 @@
 #
 #	The entries are in BIG5 order
 #
-#	Any comments or problems, contact <John_Jenkins@taligent.com>
+#  Revision History:
 #
 #    [v2.0, 2015 December 02]
 #    updates to copyright notice and terms of use
 #    no changes to character mappings
 #
 #    [v1.0, 2011 October 14]
 #    Updated terms of use to current wording.
 #    Updated contact information.
 #    No changes to the mapping data.
 #
 #    [v0.0d3, 11 February 1994]
 #    First release.
 #
 #  Use the Unicode reporting form <http://www.unicode.org/reporting.html>
 #    for any questions or comments or to report errors in the data.
 #
 # Manually added mapping of lower ASCII characters
 0x0	0x0
 0x1	0x1
 0x2	0x2
@ -239,6 +243,7 @@
 0xA157	0xFE31	# PRESENTATION FORM FOR VERTICAL EM DASH
 0xA158	0x2014	# EM DASH
 0xA159	0xFE33	# PRESENTATION FORM FOR VERTICAL LOW LINE
 0xA15A	0xFFFD	# *** NO MAPPING ***
 0xA15B	0xFE34	# PRESENTATION FORM FOR VERTICAL WAVY LOW LINE
 0xA15C	0xFE4F	# WAVY LOW LINE
 0xA15D	0xFF08	# FULLWIDTH LEFT PARENTHESIS
@ -309,7 +314,9 @@
 0xA1C0	0x32A3	# CIRCLED IDEOGRAPH CORRECT
 0xA1C1	0x2105	# CARE OF
 0xA1C2	0x203E	# OVERLINE
 0xA1C3	0xFFFD	# *** NO MAPPING ***
 0xA1C4	0xFF3F	# FULLWIDTH LOW LINE
 0xA1C5	0xFFFD	# *** NO MAPPING ***
 0xA1C6	0xFE49	# DASHED OVERLINE
 0xA1C7	0xFE4A	# CENTRELINE OVERLINE
 0xA1C8	0xFE4D	# DASHED LOW LINE
@ -366,6 +373,8 @@
 0xA1FB	0x2198	# SOUTH EAST ARROW
 0xA1FC	0x2225	# PARALLEL TO
 0xA1FD	0x2223	# DIVIDES
 0xA1FE	0xFFFD	# *** NO MAPPING ***
 0xA240	0xFFFD	# *** NO MAPPING ***
 0xA241	0xFF0F	# FULLWIDTH SOLIDUS
 0xA242	0xFF3C	# FULLWIDTH REVERSE SOLIDUS
 0xA243	0xFF04	# FULLWIDTH DOLLAR SIGN
@ -471,7 +480,9 @@
 0xA2C9	0x3027	# HANGZHOU NUMERAL SEVEN
 0xA2CA	0x3028	# HANGZHOU NUMERAL EIGHT
 0xA2CB	0x3029	# HANGZHOU NUMERAL NINE
 0xA2CC	0xFFFD	# *** NO MAPPING ***
 0xA2CD	0x5344	# <CJK>
 0xA2CE	0xFFFD	# *** NO MAPPING ***
 0xA2CF	0xFF21	# FULLWIDTH LATIN CAPITAL LETTER A
 0xA2D0	0xFF22	# FULLWIDTH LATIN CAPITAL LETTER B
 0xA2D1	0xFF23	# FULLWIDTH LATIN CAPITAL LETTER C
@ -13916,7 +13927,7 @@
 0xF9D3	0x9F7E	# <CJK>
 0xF9D4	0x9F49	# <CJK>
 0xF9D5	0x9F98	# <CJK>
-# The following ETEN extensions are copied from CP950.txt:
+# The following ETEN extensions are copied from CP950.txt (https://unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP950.TXT):
 0xF9D6	0x7881	#CJK UNIFIED IDEOGRAPH
 0xF9D7	0x92B9	#CJK UNIFIED IDEOGRAPH
 0xF9D8	0x88CF	#CJK UNIFIED IDEOGRAPH
--- a/ext/mbstring/tests/data/JISX0201.txt
+++ b/ext/mbstring/tests/data/JISX0201.txt
@ -1,33 +1,24 @@
 # JIS0201.TXT
 # Date: 2015-12-02 23:49:00 GMT [KW]
 # © 2015 Unicode®, Inc.
 # For terms of use, see http://www.unicode.org/terms_of_use.html
 #
 #	Name:             JIS X 0201 (1976) to Unicode 1.1 Table
 #	Unicode version:  1.1
-#	Table version:    0.9
+#	Table version:    2.0
 #	Table format:     Format A
-#	Date:             8 March 1994
+#	Date:             2011 October 14 (header updated: 2015 December 02)
 #	Authors:          Glenn Adams <glenn@metis.com>
 #                     John H. Jenkins <John_Jenkins@taligent.com>
 #
 #	Copyright (c) 1991-1994 Unicode, Inc.  All Rights reserved.
 #
 #	This file is provided as-is by Unicode, Inc. (The Unicode Consortium).
 #	No claims are made as to fitness for any particular purpose.  No
 #	warranties of any kind are expressed or implied.  The recipient
 #	agrees to determine applicability of information provided.  If this
 #	file has been provided on magnetic media by Unicode, Inc., the sole
 #	remedy for any claim will be exchange of defective media within 90
 #	days of receipt.
 #
 #	Recipient is granted the right to make copies in any form for
 #	internal distribution and to freely use the information supplied
 #	in the creation of products supporting Unicode.  Unicode, Inc.
 #	specifically excludes the right to re-distribute this file directly
 #	to third parties or other organizations whether for profit or not.
 #
 #	General notes:
 #
-#	This table contains the data the Unicode Consortium has on how
+#
-#	single-byte JIS X 0201 characters map into Unicode 1.1
+# This table contains one set of mappings from JIS X 0201 into Unicode.
-#	(ISO/IEC 10646:1-1993 UCS-2).
+# Note that these data are *possible* mappings only and may not be the
 # same as those used by actual products, nor may they be the best suited
 # for all uses.  For more information on the mappings between various code
 # pages incorporating the repertoire of JIS X 0201 and Unicode, consult the
 # VENDORS mapping data.
 #
 #
 #	Format:  Three tab-separated columns
 #		Column #1 is the shift JIS code (in hex as 0xXX)
@ -36,11 +27,22 @@
 #
 #	The entries are in JIS order
 #
-#   These mappings are provisional, pending definition of
+#  Revision History:
 #       official mappings by Japanese standards bodies.
 #
-#	Any comments or problems, contact <John_Jenkins@taligent.com>
+#    [v2.0, 2015 December 02]
 #    updates to copyright notice and terms of use
 #    no changes to character mappings
 #
 #    [v1.0, 2011 October 14]
 #    Updated terms of use to current wording.
 #    Updated contact information.
 #    No changes to the mapping data.
 #
 #    [v0.9, 8 March 1994]
 #    First release.
 #
 #  Use the Unicode reporting form <http://www.unicode.org/reporting.html>
 #    for any questions or comments or to report errors in the data.
 #
 0x20	0x0020	# SPACE
 0x21	0x0021	# EXCLAMATION MARK
--- a/ext/mbstring/tests/data/JISX0212.txt
+++ b/ext/mbstring/tests/data/JISX0212.txt
@ -1,32 +1,24 @@
 # JIS0212.TXT
 # Date: 2015-12-02 23:51:00 GMT [KW]
 # © 2015 Unicode®, Inc.
 # For terms of use, see http://www.unicode.org/terms_of_use.html
 #
 #	Name:             JIS X 0212 (1990) to Unicode
 #	Unicode version:  1.1
-#	Table version:    0.9
+#	Table version:    2.0
 #	Table format:     Format A
-#	Date:             8 March 1994
+#	Date:             2011 October 14 (header updated: 2015 December 02)
 #	Authors:          Glenn Adams <glenn@metis.com>
 #                     John H. Jenkins <John_Jenkins@taligent.com>
 #
 #	Copyright (c) 1991-1994 Unicode, Inc.  All Rights reserved.
 #
 #	This file is provided as-is by Unicode, Inc. (The Unicode Consortium).
 #	No claims are made as to fitness for any particular purpose.  No
 #	warranties of any kind are expressed or implied.  The recipient
 #	agrees to determine applicability of information provided.  If this
 #	file has been provided on magnetic media by Unicode, Inc., the sole
 #	remedy for any claim will be exchange of defective media within 90
 #	days of receipt.
 #
 #	Recipient is granted the right to make copies in any form for
 #	internal distribution and to freely use the information supplied
 #	in the creation of products supporting Unicode.  Unicode, Inc.
 #	specifically excludes the right to re-distribute this file directly
 #	to third parties or other organizations whether for profit or not.
 #
 #	General notes:
 #
-#	This table contains the data the Unicode Consortium has on how
+#
-#       JIS X 0212 (1983) characters map into Unicode.
+# This table contains one set of mappings from JIS X 0212 into Unicode.
 # Note that these data are *possible* mappings only and may not be the
 # same as those used by actual products, nor may they be the best suited
 # for all uses.  For more information on the mappings between various code
 # pages incorporating the repertoire of JIS X 0212 and Unicode, consult the
 # VENDORS mapping data.
 #
 #
 #	Format:  Three tab-separated columns
 #		 Column #1 is the JIS X 0212 code (in hex as 0xXXXX)
@ -51,12 +43,6 @@
 #			the kuten form.  For example, 0x2121 -> 0x0101 -> 0101;
 #			0x6D63 -> 0x4D43 -> 7767
 #
 #   The kanji mappings are a normative part of ISO/IEC 10646.  The
 #       non-kanji mappings are provisional, pending definition of
 #       official mappings by Japanese standards bodies
 #
 #	Any comments or problems, contact <John_Jenkins@taligent.com>
 #
 #	Notes:
 #
 #	1. JIS X 0212 apparently unified the following two symbols
@ -72,6 +58,23 @@
 #	   Consequently, in the Unicode mapping, 0x2922 is treated as
 #	   LATIN CAPITAL LETTER D WITH STROKE.
 #
 #  Revision History:
 #
 #    [v2.0, 2015 December 02]
 #    updates to copyright notice and terms of use
 #    no changes to character mappings
 #
 #    [v1.0, 2011 October 14]
 #    Updated terms of use to current wording.
 #    Updated contact information.
 #    No changes to the mapping data.
 #
 #    [v0.9, 8 March 1994]
 #    First release.
 #
 #  Use the Unicode reporting form <http://www.unicode.org/reporting.html>
 #    for any questions or comments or to report errors in the data.
 #	  
 0x222F	0x02D8	# BREVE
 0x2230	0x02C7	# CARON (Mandarin Chinese third tone)
 0x2231	0x00B8	# CEDILLA
--- a/ext/mbstring/tests/data/KSX1001.txt
+++ b/ext/mbstring/tests/data/KSX1001.txt
@ -1,11 +1,12 @@
 #
 #    Name:     Unified Hangul (KS X 1001) to Unicode table
 #    Unicode version: 2.0
-#    Table version: 1.0
+#    Table version: 1.1
 #    Table format:  Format A
-#    Date:          08/16/99
+#    Date:             2011 October 14
 #    Authors:       Jungshik Shin at jshin@pantheon.yale.edu
-#    General notes: none
+#
 #	Copyright (c) 1999-2011 Unicode, Inc.  All Rights reserved.
 #
 #       This file is provided as-is by Unicode, Inc. (The Unicode Consortium).
 #       No claims are made as to fitness for any particular purpose.  No
@ -15,11 +16,13 @@
 #       remedy for any claim will be exchange of defective media within 90
 #       days of receipt.
 #
-#       Recipient is granted the right to make copies in any form for
+#  Unicode, Inc. hereby grants the right to freely use the information
-#       internal distribution and to freely use the information supplied
+#  supplied in this file in the creation of products supporting the
-#       in the creation of products supporting Unicode.  Unicode, Inc.
+#  Unicode Standard, and to make copies of this file in any form for
-#       specifically excludes the right to re-distribute this file directly
+#  internal or external distribution as long as this notice remains
-#       to third parties or other organizations whether for profit or not.
+#  attached.
 #
 #	General notes:
 #
 # What is enclosed below is the mapping between KS X 1001(KS C 5601-1987
 # and Unicode 2.0.   It's automatically generated from KSC5601.TXT
@ -64,6 +67,19 @@
 #      the high and low bytes correspond to the row(Hang) and the column(Yol),
 #      respectively
 #
 #  Revision History:
 #
 #    [v1.1, 2011 October 14]
 #    Updated terms of use to current wording.
 #    Updated contact information.
 #    No changes to the mapping data.
 #
 #    [v1.0, 08/16/99]
 #    First release.
 #
 #  Use the Unicode reporting form <http://www.unicode.org/reporting.html>
 #    for any questions or comments or to report errors in the data.
 # 
 0x2121  0x3000  # IDEOGRAPHIC SPACE
 0x2122  0x3001  # IDEOGRAPHIC COMMA
 0x2123  0x3002  # IDEOGRAPHIC FULL STOP
--- a/ext/mbstring/tests/encoding_tests.inc
+++ b/ext/mbstring/tests/encoding_tests.inc
@ -34,6 +34,10 @@ function readConversionTable($path, &$from, &$to, $utf32 = false) {
        if ($line[0] == '#')
            continue;
        if (sscanf($line, "0x%x\t0x%x", $char, $codepoint) == 2) {
            // Skip codepoints that do not have a mapping (e.g. in BIG5.txt)
            if ($codepoint === 0xFFFD) {
                continue;
            }
            $codepoint = $utf32 ? pack('N', $codepoint) : pack('n', $codepoint);
            if ($char == PHP_INT_MAX) {
                // We may be on a 32-bit machine and testing a text encoding with 4-byte codes