ext/mbstring: update UCD parser to accept characters with multiple properties

This commit is contained in:
Ayesh Karunaratne 2024-06-27 01:55:37 +07:00 committed by Alex Dowad
parent 998997b6a1
commit 23f99f08c9

View file

@ -367,14 +367,22 @@ function parseSpecialCasing(UnicodeData $data, string $input) : void {
function parseDerivedCoreProperties(UnicodeData $data, string $input) : void { function parseDerivedCoreProperties(UnicodeData $data, string $input) : void {
foreach (parseDataFile($input) as $fields) { foreach (parseDataFile($input) as $fields) {
if (count($fields) != 2) { $fieldCount = count($fields);
throw new Exception("Line does not contain 2 fields"); if ($fieldCount != 2 && $fieldCount !== 3) {
throw new Exception("Line does not contain 2 or 3 fields");
} }
$property = $fields[1]; $usedProperties = ['Cased', 'Case_Ignorable'];
if ($property != 'Cased' && $property != 'Case_Ignorable') { if (isset($fields[2]) && in_array($fields[2], $usedProperties, true)) {
$property = $fields[2];
}
elseif (!in_array($fields[1], $usedProperties, true)) {
continue; continue;
} }
else{
$property = $fields[1];
}
$range = explode('..', $fields[0]); $range = explode('..', $fields[0]);
if (count($range) == 2) { if (count($range) == 2) {