Use different mblen_table for different SJIS variants

This commit is contained in:
Alex Dowad 2023-01-05 11:34:33 +02:00
parent d104481af8
commit 3152b7b26f
5 changed files with 48 additions and 10 deletions

3
NEWS
View file

@ -21,6 +21,9 @@ PHP NEWS
. Fixed bug GH-10112 (LDAP\Connection::__construct() refers to ldap_create()).
(cmb)
- MBString:
. Fixed: mb_strlen (and a couple of other mbstring functions) would wrongly treat 0x80, 0xFD, 0xFE, 0xFF, and certain other byte values as the first byte of a 2-byte SJIS character. (Alex Dowad)
- Opcache:
. Fix inverted bailout value in zend_runtime_jit() (Max Kellermann).
. Fix access to uninitialized variable in accel_preload(). (nielsdos)

View file

@ -38,7 +38,7 @@
static int mbfl_filt_conv_sjis_wchar_flush(mbfl_convert_filter *filter);
const unsigned char mblen_table_sjis[] = { /* 0x81-0x9F,0xE0-0xFC */
const unsigned char mblen_table_sjis[] = { /* 0x81-0x9F,0xE0-0xEF */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
@ -54,7 +54,7 @@ const unsigned char mblen_table_sjis[] = { /* 0x81-0x9F,0xE0-0xFC */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
};
static const char *mbfl_encoding_sjis_aliases[] = {"x-sjis", "SHIFT-JIS", NULL};

View file

@ -39,7 +39,7 @@
#include "unicode_table_jis2004.h"
#include "unicode_table_jis.h"
extern const unsigned char mblen_table_sjis[];
extern const unsigned char mblen_table_sjis_mobile[];
extern int mbfl_bisec_srch(int w, const unsigned short *tbl, int n);
extern int mbfl_bisec_srch2(int w, const unsigned short tbl[], int n);
@ -51,7 +51,7 @@ const mbfl_encoding mbfl_encoding_sjis2004 = {
"SJIS-2004",
"Shift_JIS",
mbfl_encoding_sjis2004_aliases,
mblen_table_sjis,
mblen_table_sjis_mobile, /* Leading byte values used for SJIS-2004 are the same as mobile SJIS variants */
MBFL_ENCTYPE_GL_UNSAFE,
&vtbl_sjis2004_wchar,
&vtbl_wchar_sjis2004

View file

@ -35,7 +35,24 @@
#include "sjis_mac2uni.h"
extern const unsigned char mblen_table_sjis[];
const unsigned char mblen_table_sjismac[] = { /* 0x81-0x9F,0xE0-0xED */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
};
static int mbfl_filt_conv_wchar_sjis_mac_flush(mbfl_convert_filter *filter);
static int mbfl_filt_conv_sjis_mac_wchar_flush(mbfl_convert_filter *filter);
@ -47,7 +64,7 @@ const mbfl_encoding mbfl_encoding_sjis_mac = {
"SJIS-mac",
"Shift_JIS",
mbfl_encoding_sjis_mac_aliases,
mblen_table_sjis,
mblen_table_sjismac,
MBFL_ENCTYPE_GL_UNSAFE,
&vtbl_sjis_mac_wchar,
&vtbl_wchar_sjis_mac

View file

@ -35,8 +35,26 @@
#include "emoji2uni.h"
const unsigned char mblen_table_sjis_mobile[] = { /* 0x81-0x9F,0xE0-0xFC */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1
};
extern int mbfl_bisec_srch2(int w, const unsigned short tbl[], int n);
extern const unsigned char mblen_table_sjis[];
static int mbfl_filt_conv_sjis_wchar_flush(mbfl_convert_filter *filter);
@ -49,7 +67,7 @@ const mbfl_encoding mbfl_encoding_sjis_docomo = {
"SJIS-Mobile#DOCOMO",
"Shift_JIS",
mbfl_encoding_sjis_docomo_aliases,
mblen_table_sjis,
mblen_table_sjis_mobile,
MBFL_ENCTYPE_GL_UNSAFE,
&vtbl_sjis_docomo_wchar,
&vtbl_wchar_sjis_docomo
@ -60,7 +78,7 @@ const mbfl_encoding mbfl_encoding_sjis_kddi = {
"SJIS-Mobile#KDDI",
"Shift_JIS",
mbfl_encoding_sjis_kddi_aliases,
mblen_table_sjis,
mblen_table_sjis_mobile,
MBFL_ENCTYPE_GL_UNSAFE,
&vtbl_sjis_kddi_wchar,
&vtbl_wchar_sjis_kddi
@ -71,7 +89,7 @@ const mbfl_encoding mbfl_encoding_sjis_sb = {
"SJIS-Mobile#SOFTBANK",
"Shift_JIS",
mbfl_encoding_sjis_sb_aliases,
mblen_table_sjis,
mblen_table_sjis_mobile,
MBFL_ENCTYPE_GL_UNSAFE,
&vtbl_sjis_sb_wchar,
&vtbl_wchar_sjis_sb