mirror of
https://github.com/php/php-src.git
synced 2025-08-15 21:48:51 +02:00
Remove duplicate implementation of CP932 from mbstring
Sigh. Double sigh. After fruitlessly searching the Internet for information on this mysterious text encoding called "SJIS-open", I wrote a script to try converting every Unicode codepoint from 0-0xFFFF and compare the results from different variants of Shift-JIS, to see which one "SJIS-open" would be most similar to. The result? It's just CP932. There is no difference at all. So why do we have two implementations of CP932 in mbstring? In case somebody, somewhere is using "SJIS-open" (or its aliases "SJIS-win" or "SJIS-ms"), add these as aliases to CP932 so existing code will continue to work.
This commit is contained in:
parent
7502c86342
commit
e2459857af
9 changed files with 4 additions and 360 deletions
|
@ -117,7 +117,6 @@ AC_DEFUN([PHP_MBSTRING_SETUP_LIBMBFL], [
|
|||
libmbfl/filters/mbfilter_qprint.c
|
||||
libmbfl/filters/mbfilter_singlebyte.c
|
||||
libmbfl/filters/mbfilter_sjis.c
|
||||
libmbfl/filters/mbfilter_sjis_open.c
|
||||
libmbfl/filters/mbfilter_sjis_mobile.c
|
||||
libmbfl/filters/mbfilter_sjis_mac.c
|
||||
libmbfl/filters/mbfilter_sjis_2004.c
|
||||
|
|
|
@ -26,7 +26,7 @@ if (PHP_MBSTRING != "no") {
|
|||
mbfilter_ucs4.c mbfilter_uhc.c mbfilter_utf16.c mbfilter_utf32.c \
|
||||
mbfilter_utf7.c mbfilter_utf7imap.c mbfilter_utf8.c \
|
||||
mbfilter_utf8_mobile.c mbfilter_euc_jp_2004.c mbfilter_uuencode.c \
|
||||
mbfilter_cp5022x.c mbfilter_sjis_open.c mbfilter_sjis_mobile.c \
|
||||
mbfilter_cp5022x.c mbfilter_sjis_mobile.c \
|
||||
mbfilter_sjis_mac.c mbfilter_iso2022jp_2004.c \
|
||||
mbfilter_iso2022jp_mobile.c mbfilter_singlebyte.c \
|
||||
mbfilter_tl_jisx0201_jisx0208.c", "mbstring");
|
||||
|
|
|
@ -54,7 +54,7 @@ static const unsigned char mblen_table_sjis[] = { /* 0x80-0x9f,0xE0-0xFF */
|
|||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
|
||||
};
|
||||
|
||||
static const char *mbfl_encoding_cp932_aliases[] = {"MS932", "Windows-31J", "MS_Kanji", NULL};
|
||||
static const char *mbfl_encoding_cp932_aliases[] = {"MS932", "Windows-31J", "MS_Kanji", "SJIS-win", "SJIS-ms", "SJIS-open", NULL};
|
||||
|
||||
const mbfl_encoding mbfl_encoding_cp932 = {
|
||||
mbfl_no_encoding_cp932,
|
||||
|
|
|
@ -1,308 +0,0 @@
|
|||
/*
|
||||
* "streamable kanji code filter and converter"
|
||||
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
|
||||
*
|
||||
* LICENSE NOTICES
|
||||
*
|
||||
* This file is part of "streamable kanji code filter and converter",
|
||||
* which is distributed under the terms of GNU Lesser General Public
|
||||
* License (version 2) as published by the Free Software Foundation.
|
||||
*
|
||||
* This software is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with "streamable kanji code filter and converter";
|
||||
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
|
||||
* Suite 330, Boston, MA 02111-1307 USA
|
||||
*
|
||||
* The author of this file:
|
||||
*
|
||||
*/
|
||||
/*
|
||||
* the source code included in this files was separated from mbfilter_ja.c
|
||||
* by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "mbfilter.h"
|
||||
#include "mbfilter_sjis_open.h"
|
||||
|
||||
#include "unicode_table_cp932_ext.h"
|
||||
#include "unicode_table_jis.h"
|
||||
|
||||
static const unsigned char mblen_table_sjis[] = { /* 0x80-0x9f,0xE0-0xFF */
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
|
||||
};
|
||||
|
||||
static const char *mbfl_encoding_sjis_open_aliases[] = {"SJIS-open", "SJIS-ms", NULL};
|
||||
|
||||
const mbfl_encoding mbfl_encoding_sjis_open = {
|
||||
mbfl_no_encoding_sjis_open,
|
||||
"SJIS-win",
|
||||
"Shift_JIS",
|
||||
mbfl_encoding_sjis_open_aliases,
|
||||
mblen_table_sjis,
|
||||
MBFL_ENCTYPE_GL_UNSAFE,
|
||||
&vtbl_sjis_open_wchar,
|
||||
&vtbl_wchar_sjis_open
|
||||
};
|
||||
|
||||
const struct mbfl_convert_vtbl vtbl_sjis_open_wchar = {
|
||||
mbfl_no_encoding_sjis_open,
|
||||
mbfl_no_encoding_wchar,
|
||||
mbfl_filt_conv_common_ctor,
|
||||
NULL,
|
||||
mbfl_filt_conv_sjis_open_wchar,
|
||||
mbfl_filt_conv_common_flush,
|
||||
NULL,
|
||||
};
|
||||
|
||||
const struct mbfl_convert_vtbl vtbl_wchar_sjis_open = {
|
||||
mbfl_no_encoding_wchar,
|
||||
mbfl_no_encoding_sjis_open,
|
||||
mbfl_filt_conv_common_ctor,
|
||||
NULL,
|
||||
mbfl_filt_conv_wchar_sjis_open,
|
||||
mbfl_filt_conv_common_flush,
|
||||
NULL,
|
||||
};
|
||||
|
||||
#define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
|
||||
|
||||
#define SJIS_ENCODE(c1,c2,s1,s2) \
|
||||
do { \
|
||||
s1 = c1; \
|
||||
s1--; \
|
||||
s1 >>= 1; \
|
||||
if ((c1) < 0x5f) { \
|
||||
s1 += 0x71; \
|
||||
} else { \
|
||||
s1 += 0xb1; \
|
||||
} \
|
||||
s2 = c2; \
|
||||
if ((c1) & 1) { \
|
||||
if ((c2) < 0x60) { \
|
||||
s2--; \
|
||||
} \
|
||||
s2 += 0x20; \
|
||||
} else { \
|
||||
s2 += 0x7e; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define SJIS_DECODE(c1,c2,s1,s2) \
|
||||
do { \
|
||||
s1 = c1; \
|
||||
if (s1 < 0xa0) { \
|
||||
s1 -= 0x81; \
|
||||
} else { \
|
||||
s1 -= 0xc1; \
|
||||
} \
|
||||
s1 <<= 1; \
|
||||
s1 += 0x21; \
|
||||
s2 = c2; \
|
||||
if (s2 < 0x9f) { \
|
||||
if (s2 < 0x7f) { \
|
||||
s2++; \
|
||||
} \
|
||||
s2 -= 0x20; \
|
||||
} else { \
|
||||
s1++; \
|
||||
s2 -= 0x7e; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
/*
|
||||
* SJIS-win => wchar
|
||||
*/
|
||||
int
|
||||
mbfl_filt_conv_sjis_open_wchar(int c, mbfl_convert_filter *filter)
|
||||
{
|
||||
int c1, s, s1, s2, w;
|
||||
|
||||
switch (filter->status) {
|
||||
case 0:
|
||||
if (c >= 0 && c < 0x80) { /* latin */
|
||||
CK((*filter->output_function)(c, filter->data));
|
||||
} else if (c > 0xa0 && c < 0xe0) { /* kana */
|
||||
CK((*filter->output_function)(0xfec0 + c, filter->data));
|
||||
} else if (c > 0x80 && c < 0xfd && c != 0xa0) { /* kanji first char */
|
||||
filter->status = 1;
|
||||
filter->cache = c;
|
||||
} else {
|
||||
w = c & MBFL_WCSGROUP_MASK;
|
||||
w |= MBFL_WCSGROUP_THROUGH;
|
||||
CK((*filter->output_function)(w, filter->data));
|
||||
}
|
||||
break;
|
||||
|
||||
case 1: /* kanji second char */
|
||||
filter->status = 0;
|
||||
c1 = filter->cache;
|
||||
if (c >= 0x40 && c <= 0xfc && c != 0x7f) {
|
||||
w = 0;
|
||||
SJIS_DECODE(c1, c, s1, s2);
|
||||
s = (s1 - 0x21)*94 + s2 - 0x21;
|
||||
if (s <= 137) {
|
||||
if (s == 31) {
|
||||
w = 0xff3c; /* FULLWIDTH REVERSE SOLIDUS */
|
||||
} else if (s == 32) {
|
||||
w = 0xff5e; /* FULLWIDTH TILDE */
|
||||
} else if (s == 33) {
|
||||
w = 0x2225; /* PARALLEL TO */
|
||||
} else if (s == 60) {
|
||||
w = 0xff0d; /* FULLWIDTH HYPHEN-MINUS */
|
||||
} else if (s == 80) {
|
||||
w = 0xffe0; /* FULLWIDTH CENT SIGN */
|
||||
} else if (s == 81) {
|
||||
w = 0xffe1; /* FULLWIDTH POUND SIGN */
|
||||
} else if (s == 137) {
|
||||
w = 0xffe2; /* FULLWIDTH NOT SIGN */
|
||||
}
|
||||
}
|
||||
if (w == 0) {
|
||||
if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) { /* vendor ext1 (13ku) */
|
||||
w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min];
|
||||
} else if (s >= 0 && s < jisx0208_ucs_table_size) { /* X 0208 */
|
||||
w = jisx0208_ucs_table[s];
|
||||
} else if (s >= cp932ext2_ucs_table_min && s < cp932ext2_ucs_table_max) { /* vendor ext2 (89ku - 92ku) */
|
||||
w = cp932ext2_ucs_table[s - cp932ext2_ucs_table_min];
|
||||
} else if (s >= cp932ext3_ucs_table_min && s < cp932ext3_ucs_table_max) { /* vendor ext3 (115ku - 119ku) */
|
||||
w = cp932ext3_ucs_table[s - cp932ext3_ucs_table_min];
|
||||
} else if (s >= (94*94) && s < (114*94)) { /* user (95ku - 114ku) */
|
||||
w = s - (94*94) + 0xe000;
|
||||
}
|
||||
}
|
||||
if (w <= 0) {
|
||||
w = (s1 << 8) | s2;
|
||||
w &= MBFL_WCSPLANE_MASK;
|
||||
w |= MBFL_WCSPLANE_WINCP932;
|
||||
}
|
||||
CK((*filter->output_function)(w, filter->data));
|
||||
} else if ((c >= 0 && c < 0x21) || c == 0x7f) { /* CTLs */
|
||||
CK((*filter->output_function)(c, filter->data));
|
||||
} else {
|
||||
w = (c1 << 8) | c;
|
||||
w &= MBFL_WCSGROUP_MASK;
|
||||
w |= MBFL_WCSGROUP_THROUGH;
|
||||
CK((*filter->output_function)(w, filter->data));
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
filter->status = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
/*
|
||||
* wchar => SJIS-win
|
||||
*/
|
||||
int
|
||||
mbfl_filt_conv_wchar_sjis_open(int c, mbfl_convert_filter *filter)
|
||||
{
|
||||
int c1, c2, s1, s2;
|
||||
|
||||
s1 = 0;
|
||||
s2 = 0;
|
||||
if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) {
|
||||
s1 = ucs_a1_jis_table[c - ucs_a1_jis_table_min];
|
||||
} else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) {
|
||||
s1 = ucs_a2_jis_table[c - ucs_a2_jis_table_min];
|
||||
} else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) {
|
||||
s1 = ucs_i_jis_table[c - ucs_i_jis_table_min];
|
||||
} else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) {
|
||||
s1 = ucs_r_jis_table[c - ucs_r_jis_table_min];
|
||||
} else if (c >= 0xe000 && c < (0xe000 + 20*94)) { /* user (95ku - 114ku) */
|
||||
s1 = c - 0xe000;
|
||||
c1 = s1/94 + 0x7f;
|
||||
c2 = s1%94 + 0x21;
|
||||
s1 = (c1 << 8) | c2;
|
||||
s2 = 1;
|
||||
}
|
||||
if (s1 <= 0) {
|
||||
if (c == 0xa5) { /* YEN SIGN */
|
||||
s1 = 0x216f; /* FULLWIDTH YEN SIGN */
|
||||
} else if (c == 0x203e) { /* OVER LINE */
|
||||
s1 = 0x2131; /* FULLWIDTH MACRON */
|
||||
} else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */
|
||||
s1 = 0x2140;
|
||||
} else if (c == 0xff5e) { /* FULLWIDTH TILDE */
|
||||
s1 = 0x2141;
|
||||
} else if (c == 0x2225) { /* PARALLEL TO */
|
||||
s1 = 0x2142;
|
||||
} else if (c == 0xff0d) { /* FULLWIDTH HYPHEN-MINUS */
|
||||
s1 = 0x215d;
|
||||
} else if (c == 0xffe0) { /* FULLWIDTH CENT SIGN */
|
||||
s1 = 0x2171;
|
||||
} else if (c == 0xffe1) { /* FULLWIDTH POUND SIGN */
|
||||
s1 = 0x2172;
|
||||
} else if (c == 0xffe2) { /* FULLWIDTH NOT SIGN */
|
||||
s1 = 0x224c;
|
||||
}
|
||||
}
|
||||
if ((s1 <= 0) || (s1 >= 0x8080 && s2 == 0)) { /* not found or X 0212 */
|
||||
s1 = -1;
|
||||
c1 = 0;
|
||||
c2 = cp932ext1_ucs_table_max - cp932ext1_ucs_table_min;
|
||||
while (c1 < c2) { /* CP932 vendor ext1 (13ku) */
|
||||
if (c == cp932ext1_ucs_table[c1]) {
|
||||
s1 = ((c1/94 + 0x2d) << 8) + (c1%94 + 0x21);
|
||||
break;
|
||||
}
|
||||
c1++;
|
||||
}
|
||||
if (s1 <= 0) {
|
||||
c1 = 0;
|
||||
c2 = cp932ext3_ucs_table_max - cp932ext3_ucs_table_min;
|
||||
while (c1 < c2) { /* CP932 vendor ext3 (115ku - 119ku) */
|
||||
if (c == cp932ext3_ucs_table[c1]) {
|
||||
s1 = ((c1/94 + 0x93) << 8) + (c1%94 + 0x21);
|
||||
break;
|
||||
}
|
||||
c1++;
|
||||
}
|
||||
}
|
||||
if (c == 0) {
|
||||
s1 = 0;
|
||||
} else if (s1 <= 0) {
|
||||
s1 = -1;
|
||||
}
|
||||
}
|
||||
|
||||
if (s1 >= 0) {
|
||||
if (s1 < 0x100) { /* latin or kana */
|
||||
CK((*filter->output_function)(s1, filter->data));
|
||||
} else { /* kanji */
|
||||
c1 = (s1 >> 8) & 0xff;
|
||||
c2 = s1 & 0xff;
|
||||
SJIS_ENCODE(c1, c2, s1, s2);
|
||||
CK((*filter->output_function)(s1, filter->data));
|
||||
CK((*filter->output_function)(s2, filter->data));
|
||||
}
|
||||
} else {
|
||||
CK(mbfl_filt_conv_illegal_output(c, filter));
|
||||
}
|
||||
|
||||
return c;
|
||||
}
|
|
@ -1,43 +0,0 @@
|
|||
/*
|
||||
* "streamable kanji code filter and converter"
|
||||
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
|
||||
*
|
||||
* LICENSE NOTICES
|
||||
*
|
||||
* This file is part of "streamable kanji code filter and converter",
|
||||
* which is distributed under the terms of GNU Lesser General Public
|
||||
* License (version 2) as published by the Free Software Foundation.
|
||||
*
|
||||
* This software is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with "streamable kanji code filter and converter";
|
||||
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
|
||||
* Suite 330, Boston, MA 02111-1307 USA
|
||||
*
|
||||
* The author of this file:
|
||||
*
|
||||
*/
|
||||
/*
|
||||
* the source code included in this files was separated from mbfilter_ja.c
|
||||
* by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef MBFL_MBFILTER_SJIS_OPEN_H
|
||||
#define MBFL_MBFILTER_SJIS_OPEN_H
|
||||
|
||||
#include "mbfilter.h"
|
||||
|
||||
extern const mbfl_encoding mbfl_encoding_sjis_open;
|
||||
|
||||
extern const struct mbfl_convert_vtbl vtbl_sjis_open_wchar;
|
||||
extern const struct mbfl_convert_vtbl vtbl_wchar_sjis_open;
|
||||
|
||||
int mbfl_filt_conv_sjis_open_wchar(int c, mbfl_convert_filter *filter);
|
||||
int mbfl_filt_conv_wchar_sjis_open(int c, mbfl_convert_filter *filter);
|
||||
|
||||
#endif /* MBFL_MBFILTER_SJIS_OPEN_H */
|
|
@ -44,7 +44,6 @@
|
|||
#include "filters/mbfilter_euc_kr.h"
|
||||
#include "filters/mbfilter_iso2022_kr.h"
|
||||
#include "filters/mbfilter_sjis.h"
|
||||
#include "filters/mbfilter_sjis_open.h"
|
||||
#include "filters/mbfilter_sjis_2004.h"
|
||||
#include "filters/mbfilter_sjis_mobile.h"
|
||||
#include "filters/mbfilter_sjis_mac.h"
|
||||
|
|
|
@ -48,7 +48,6 @@
|
|||
#include "filters/mbfilter_euc_kr.h"
|
||||
#include "filters/mbfilter_iso2022_kr.h"
|
||||
#include "filters/mbfilter_sjis.h"
|
||||
#include "filters/mbfilter_sjis_open.h"
|
||||
#include "filters/mbfilter_sjis_mobile.h"
|
||||
#include "filters/mbfilter_sjis_mac.h"
|
||||
#include "filters/mbfilter_sjis_2004.h"
|
||||
|
@ -114,7 +113,6 @@ static const mbfl_encoding *mbfl_encoding_ptr_list[] = {
|
|||
&mbfl_encoding_sjis,
|
||||
&mbfl_encoding_eucjp_win,
|
||||
&mbfl_encoding_eucjp2004,
|
||||
&mbfl_encoding_sjis_open,
|
||||
&mbfl_encoding_sjis_docomo,
|
||||
&mbfl_encoding_sjis_kddi,
|
||||
&mbfl_encoding_sjis_sb,
|
||||
|
|
|
@ -68,7 +68,6 @@ enum mbfl_no_encoding {
|
|||
mbfl_no_encoding_eucjp2004,
|
||||
mbfl_no_encoding_sjis,
|
||||
mbfl_no_encoding_eucjp_win,
|
||||
mbfl_no_encoding_sjis_open,
|
||||
mbfl_no_encoding_sjis_docomo,
|
||||
mbfl_no_encoding_sjis_kddi,
|
||||
mbfl_no_encoding_sjis_sb,
|
||||
|
|
|
@ -176,10 +176,10 @@ string(9) "eucJP-win"
|
|||
-- Iteration 20 --
|
||||
string(9) "eucJP-win"
|
||||
bool(true)
|
||||
string(8) "SJIS-win"
|
||||
string(5) "CP932"
|
||||
|
||||
-- Iteration 21 --
|
||||
string(8) "SJIS-win"
|
||||
string(5) "CP932"
|
||||
bool(true)
|
||||
string(11) "ISO-2022-JP"
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue