Combine CJK encoding conversion code in a single source file

This will make it easier to combine duplicated code between all the
CJK text encodings (a significant amount is already combined in this
commit, such as the repeated definitions of SJIS_DECODE and
SJIS_ENCODE), but I hope to remove even more redundancy in the future.

The table used to implement mb_strlen for CP932 has been changed to
the same table as "SJIS-win".
This commit is contained in:
Alex Dowad 2023-04-14 09:00:11 +02:00
parent 548e0615cd
commit c717c79a09
52 changed files with 12667 additions and 14854 deletions

View file

@ -95,30 +95,12 @@ AC_DEFUN([PHP_MBSTRING_SETUP_LIBMBFL], [
libmbfl/filters/html_entities.c
libmbfl/filters/mbfilter_7bit.c
libmbfl/filters/mbfilter_base64.c
libmbfl/filters/mbfilter_big5.c
libmbfl/filters/mbfilter_cp5022x.c
libmbfl/filters/mbfilter_cp51932.c
libmbfl/filters/mbfilter_cp932.c
libmbfl/filters/mbfilter_cp936.c
libmbfl/filters/mbfilter_gb18030.c
libmbfl/filters/mbfilter_euc_cn.c
libmbfl/filters/mbfilter_euc_jp.c
libmbfl/filters/mbfilter_euc_jp_win.c
libmbfl/filters/mbfilter_euc_kr.c
libmbfl/filters/mbfilter_euc_tw.c
libmbfl/filters/mbfilter_cjk.c
libmbfl/filters/mbfilter_htmlent.c
libmbfl/filters/mbfilter_hz.c
libmbfl/filters/mbfilter_iso2022_jp_ms.c
libmbfl/filters/mbfilter_iso2022jp_mobile.c
libmbfl/filters/mbfilter_iso2022_kr.c
libmbfl/filters/mbfilter_jis.c
libmbfl/filters/mbfilter_qprint.c
libmbfl/filters/mbfilter_singlebyte.c
libmbfl/filters/mbfilter_sjis.c
libmbfl/filters/mbfilter_sjis_2004.c
libmbfl/filters/mbfilter_ucs2.c
libmbfl/filters/mbfilter_ucs4.c
libmbfl/filters/mbfilter_uhc.c
libmbfl/filters/mbfilter_utf16.c
libmbfl/filters/mbfilter_utf32.c
libmbfl/filters/mbfilter_utf7.c

View file

@ -17,17 +17,13 @@ if (PHP_MBSTRING != "no") {
"ext\\mbstring\\libmbfl\\config.h", true);
ADD_SOURCES("ext/mbstring/libmbfl/filters", "html_entities.c \
mbfilter_7bit.c mbfilter_base64.c mbfilter_big5.c mbfilter_cp932.c \
mbfilter_cp936.c mbfilter_cp51932.c mbfilter_euc_cn.c \
mbfilter_euc_jp.c mbfilter_euc_jp_win.c mbfilter_euc_kr.c \
mbfilter_euc_tw.c mbfilter_htmlent.c mbfilter_hz.c mbfilter_iso2022_kr.c \
mbfilter_jis.c mbfilter_iso2022_jp_ms.c mbfilter_gb18030.c \
mbfilter_sjis_2004.c mbfilter_qprint.c mbfilter_sjis.c mbfilter_ucs2.c \
mbfilter_ucs4.c mbfilter_uhc.c mbfilter_utf16.c mbfilter_utf32.c \
mbfilter_7bit.c mbfilter_base64.c \
mbfilter_cjk.c mbfilter_htmlent.c \
mbfilter_qprint.c mbfilter_ucs2.c \
mbfilter_ucs4.c mbfilter_utf16.c mbfilter_utf32.c \
mbfilter_utf7.c mbfilter_utf7imap.c mbfilter_utf8.c \
mbfilter_utf8_mobile.c mbfilter_uuencode.c \
mbfilter_cp5022x.c \
mbfilter_iso2022jp_mobile.c mbfilter_singlebyte.c", "mbstring");
mbfilter_singlebyte.c", "mbstring");
ADD_SOURCES("ext/mbstring/libmbfl/mbfl", "mbfilter.c mbfilter_8bit.c \
mbfilter_pass.c mbfilter_wchar.c mbfl_convert.c mbfl_encoding.c \

View file

@ -1,660 +0,0 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file: Rui Hirokawa <hirokawa@php.net>
*
*/
/*
* The source code included in this file was separated from mbfilter_tw.c
* by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
*
*/
#include "mbfilter.h"
#include "mbfilter_big5.h"
#include "unicode_table_big5.h"
static int mbfl_filt_conv_big5_wchar_flush(mbfl_convert_filter *filter);
static size_t mb_big5_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
static void mb_wchar_to_big5(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
static size_t mb_cp950_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
static void mb_wchar_to_cp950(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
static const unsigned char mblen_table_big5[] = { /* 0x81-0xFE */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
};
static const char *mbfl_encoding_big5_aliases[] = {"CN-BIG5", "BIG-FIVE", "BIGFIVE", NULL};
const mbfl_encoding mbfl_encoding_big5 = {
mbfl_no_encoding_big5,
"BIG-5",
"BIG5",
mbfl_encoding_big5_aliases,
mblen_table_big5,
MBFL_ENCTYPE_GL_UNSAFE,
&vtbl_big5_wchar,
&vtbl_wchar_big5,
mb_big5_to_wchar,
mb_wchar_to_big5,
NULL
};
const mbfl_encoding mbfl_encoding_cp950 = {
mbfl_no_encoding_cp950,
"CP950",
"BIG5",
NULL,
mblen_table_big5,
MBFL_ENCTYPE_GL_UNSAFE,
&vtbl_cp950_wchar,
&vtbl_wchar_cp950,
mb_cp950_to_wchar,
mb_wchar_to_cp950,
NULL
};
const struct mbfl_convert_vtbl vtbl_big5_wchar = {
mbfl_no_encoding_big5,
mbfl_no_encoding_wchar,
mbfl_filt_conv_common_ctor,
NULL,
mbfl_filt_conv_big5_wchar,
mbfl_filt_conv_big5_wchar_flush,
NULL,
};
const struct mbfl_convert_vtbl vtbl_wchar_big5 = {
mbfl_no_encoding_wchar,
mbfl_no_encoding_big5,
mbfl_filt_conv_common_ctor,
NULL,
mbfl_filt_conv_wchar_big5,
mbfl_filt_conv_common_flush,
NULL
};
const struct mbfl_convert_vtbl vtbl_cp950_wchar = {
mbfl_no_encoding_cp950,
mbfl_no_encoding_wchar,
mbfl_filt_conv_common_ctor,
NULL,
mbfl_filt_conv_big5_wchar,
mbfl_filt_conv_big5_wchar_flush,
NULL,
};
const struct mbfl_convert_vtbl vtbl_wchar_cp950 = {
mbfl_no_encoding_wchar,
mbfl_no_encoding_cp950,
mbfl_filt_conv_common_ctor,
NULL,
mbfl_filt_conv_wchar_big5,
mbfl_filt_conv_common_flush,
NULL,
};
#define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
/* 63 + 94 = 157 or 94 */
static unsigned short cp950_pua_tbl[][4] = {
{0xe000, 0xe310, 0xfa40, 0xfefe},
{0xe311, 0xeeb7, 0x8e40, 0xa0fe},
{0xeeb8, 0xf6b0, 0x8140, 0x8dfe},
{0xf6b1, 0xf70e, 0xc6a1, 0xc6fe},
{0xf70f, 0xf848, 0xc740, 0xc8fe},
};
static inline int is_in_cp950_pua(int c1, int c)
{
if ((c1 >= 0xfa && c1 <= 0xfe) || (c1 >= 0x8e && c1 <= 0xa0) ||
(c1 >= 0x81 && c1 <= 0x8d) || (c1 >= 0xc7 && c1 <= 0xc8)) {
return (c >= 0x40 && c <= 0x7e) || (c >= 0xa1 && c <= 0xfe);
} else if (c1 == 0xc6) {
return c >= 0xa1 && c <= 0xfe;
}
return 0;
}
int mbfl_filt_conv_big5_wchar(int c, mbfl_convert_filter *filter)
{
int k, c1, w;
switch (filter->status) {
case 0:
if (c >= 0 && c < 0x80) { /* latin */
CK((*filter->output_function)(c, filter->data));
} else if (filter->from->no_encoding != mbfl_no_encoding_cp950 && c > 0xA0 && c <= 0xF9 && c != 0xC8) {
filter->status = 1;
filter->cache = c;
} else if (filter->from->no_encoding == mbfl_no_encoding_cp950 && c > 0x80 && c <= 0xFE) {
filter->status = 1;
filter->cache = c;
} else {
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
}
break;
case 1: /* dbcs second byte */
filter->status = 0;
c1 = filter->cache;
if ((c > 0x3f && c < 0x7f) || (c > 0xa0 && c < 0xff)) {
if (c < 0x7f) {
w = (c1 - 0xa1)*157 + (c - 0x40);
} else {
w = (c1 - 0xa1)*157 + (c - 0xa1) + 0x3f;
}
if (w >= 0 && w < big5_ucs_table_size) {
w = big5_ucs_table[w];
} else {
w = 0;
}
if (filter->from->no_encoding == mbfl_no_encoding_cp950) {
/* PUA for CP950 */
if (is_in_cp950_pua(c1, c)) {
int c2 = (c1 << 8) | c;
for (k = 0; k < sizeof(cp950_pua_tbl) / (sizeof(unsigned short)*4); k++) {
if (c2 >= cp950_pua_tbl[k][2] && c2 <= cp950_pua_tbl[k][3]) {
break;
}
}
if ((cp950_pua_tbl[k][2] & 0xff) == 0x40) {
w = 157*(c1 - (cp950_pua_tbl[k][2]>>8)) + c - (c >= 0xa1 ? 0x62 : 0x40) + cp950_pua_tbl[k][0];
} else {
w = c2 - cp950_pua_tbl[k][2] + cp950_pua_tbl[k][0];
}
} else if (c1 == 0xA1) {
if (c == 0x45) {
w = 0x2027;
} else if (c == 0x4E) {
w = 0xFE51;
} else if (c == 0x5A) {
w = 0x2574;
} else if (c == 0xC2) {
w = 0x00AF;
} else if (c == 0xC3) {
w = 0xFFE3;
} else if (c == 0xC5) {
w = 0x02CD;
} else if (c == 0xE3) {
w = 0xFF5E;
} else if (c == 0xF2) {
w = 0x2295;
} else if (c == 0xF3) {
w = 0x2299;
} else if (c == 0xFE) {
w = 0xFF0F;
}
} else if (c1 == 0xA2) {
if (c == 0x40) {
w = 0xFF3C;
} else if (c == 0x41) {
w = 0x2215;
} else if (c == 0x42) {
w = 0xFE68;
} else if (c == 0x46) {
w = 0xFFE0;
} else if (c == 0x47) {
w = 0xFFE1;
} else if (c == 0xCC) {
w = 0x5341;
} else if (c == 0xCE) {
w = 0x5345;
}
}
}
if (w <= 0) {
w = MBFL_BAD_INPUT;
}
CK((*filter->output_function)(w, filter->data));
} else {
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
}
break;
EMPTY_SWITCH_DEFAULT_CASE();
}
return 0;
}
static int mbfl_filt_conv_big5_wchar_flush(mbfl_convert_filter *filter)
{
if (filter->status == 1) {
/* 2-byte character was truncated */
filter->status = 0;
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
}
if (filter->flush_function) {
(*filter->flush_function)(filter->data);
}
return 0;
}
int mbfl_filt_conv_wchar_big5(int c, mbfl_convert_filter *filter)
{
int k, s = 0;
if (c >= ucs_a1_big5_table_min && c < ucs_a1_big5_table_max) {
s = ucs_a1_big5_table[c - ucs_a1_big5_table_min];
} else if (c >= ucs_a2_big5_table_min && c < ucs_a2_big5_table_max) {
s = ucs_a2_big5_table[c - ucs_a2_big5_table_min];
} else if (c >= ucs_a3_big5_table_min && c < ucs_a3_big5_table_max) {
s = ucs_a3_big5_table[c - ucs_a3_big5_table_min];
} else if (c >= ucs_i_big5_table_min && c < ucs_i_big5_table_max) {
s = ucs_i_big5_table[c - ucs_i_big5_table_min];
} else if (c >= ucs_r1_big5_table_min && c < ucs_r1_big5_table_max) {
s = ucs_r1_big5_table[c - ucs_r1_big5_table_min];
} else if (c >= ucs_r2_big5_table_min && c < ucs_r2_big5_table_max) {
s = ucs_r2_big5_table[c - ucs_r2_big5_table_min];
}
if (filter->to->no_encoding == mbfl_no_encoding_cp950) {
if (c >= 0xe000 && c <= 0xf848) { /* PUA for CP950 */
for (k = 0; k < sizeof(cp950_pua_tbl) / (sizeof(unsigned short)*4); k++) {
if (c <= cp950_pua_tbl[k][1]) {
break;
}
}
int c1 = c - cp950_pua_tbl[k][0];
if ((cp950_pua_tbl[k][2] & 0xff) == 0x40) {
int c2 = cp950_pua_tbl[k][2] >> 8;
s = ((c1 / 157) + c2) << 8;
c1 %= 157;
s |= c1 + (c1 >= 0x3f ? 0x62 : 0x40);
} else {
s = c1 + cp950_pua_tbl[k][2];
}
} else if (c == 0x00A2) {
s = 0;
} else if (c == 0x00A3) {
s = 0;
} else if (c == 0x00AF) {
s = 0xA1C2;
} else if (c == 0x02CD) {
s = 0xA1C5;
} else if (c == 0x0401) {
s = 0;
} else if (c >= 0x0414 && c <= 0x041C) {
s = 0;
} else if (c >= 0x0423 && c <= 0x044F) {
s = 0;
} else if (c == 0x0451) {
s = 0;
} else if (c == 0x2022) {
s = 0;
} else if (c == 0x2027) {
s = 0xA145;
} else if (c == 0x203E) {
s = 0;
} else if (c == 0x2215) {
s = 0xA241;
} else if (c == 0x223C) {
s = 0;
} else if (c == 0x2295) {
s = 0xA1F2;
} else if (c == 0x2299) {
s = 0xA1F3;
} else if (c >= 0x2460 && c <= 0x247D) {
s = 0;
} else if (c == 0x2574) {
s = 0xA15A;
} else if (c == 0x2609) {
s = 0;
} else if (c == 0x2641) {
s = 0;
} else if (c == 0x3005 || (c >= 0x302A && c <= 0x30FF)) {
s = 0;
} else if (c == 0xFE51) {
s = 0xA14E;
} else if (c == 0xFE68) {
s = 0xA242;
} else if (c == 0xFF3C) {
s = 0xA240;
} else if (c == 0xFF5E) {
s = 0xA1E3;
} else if (c == 0xFF64) {
s = 0;
} else if (c == 0xFFE0) {
s = 0xA246;
} else if (c == 0xFFE1) {
s = 0xA247;
} else if (c == 0xFFE3) {
s = 0xA1C3;
} else if (c == 0xFF0F) {
s = 0xA1FE;
}
}
if (s <= 0) {
if (c == 0) {
s = 0;
} else {
s = -1;
}
}
if (s >= 0) {
if (s <= 0x80) { /* latin */
CK((*filter->output_function)(s, filter->data));
} else {
CK((*filter->output_function)((s >> 8) & 0xff, filter->data));
CK((*filter->output_function)(s & 0xff, filter->data));
}
} else {
CK(mbfl_filt_conv_illegal_output(c, filter));
}
return 0;
}
static size_t mb_big5_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
{
unsigned char *p = *in, *e = p + *in_len;
uint32_t *out = buf, *limit = buf + bufsize;
e--; /* Stop the main loop 1 byte short of the end of the input */
while (p < e && out < limit) {
unsigned char c = *p++;
if (c <= 0x7F) {
*out++ = c;
} else if (c > 0xA0 && c <= 0xF9) {
/* We don't need to check p < e here; it's not possible that this pointer dereference
* will be outside the input string, because of e-- above */
unsigned char c2 = *p++;
if ((c2 >= 0x40 && c2 <= 0x7E) || (c2 >= 0xA1 && c2 <= 0xFE)) {
unsigned int w = (c - 0xA1)*157 + c2 - ((c2 <= 0x7E) ? 0x40 : 0xA1 - 0x3F);
ZEND_ASSERT(w < big5_ucs_table_size);
w = big5_ucs_table[w];
if (!w) {
if (c == 0xC8) {
p--;
}
w = MBFL_BAD_INPUT;
}
*out++ = w;
} else {
*out++ = MBFL_BAD_INPUT;
}
} else {
*out++ = MBFL_BAD_INPUT;
}
}
/* Finish up last byte of input string if there is one */
if (p == e && out < limit) {
unsigned char c = *p++;
*out++ = (c <= 0x7F) ? c : MBFL_BAD_INPUT;
}
*in_len = e - p + 1;
*in = p;
return out - buf;
}
static void mb_wchar_to_big5(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
{
unsigned char *out, *limit;
MB_CONVERT_BUF_LOAD(buf, out, limit);
MB_CONVERT_BUF_ENSURE(buf, out, limit, len);
while (len--) {
uint32_t w = *in++;
unsigned int s = 0;
if (w >= ucs_a1_big5_table_min && w < ucs_a1_big5_table_max) {
s = ucs_a1_big5_table[w - ucs_a1_big5_table_min];
} else if (w >= ucs_a2_big5_table_min && w < ucs_a2_big5_table_max) {
s = ucs_a2_big5_table[w - ucs_a2_big5_table_min];
} else if (w >= ucs_a3_big5_table_min && w < ucs_a3_big5_table_max) {
s = ucs_a3_big5_table[w - ucs_a3_big5_table_min];
} else if (w >= ucs_i_big5_table_min && w < ucs_i_big5_table_max) {
s = ucs_i_big5_table[w - ucs_i_big5_table_min];
} else if (w >= ucs_r1_big5_table_min && w < ucs_r1_big5_table_max) {
s = ucs_r1_big5_table[w - ucs_r1_big5_table_min];
} else if (w >= ucs_r2_big5_table_min && w < ucs_r2_big5_table_max) {
s = ucs_r2_big5_table[w - ucs_r2_big5_table_min];
}
if (!s) {
if (w == 0) {
out = mb_convert_buf_add(out, 0);
} else {
MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_big5);
MB_CONVERT_BUF_ENSURE(buf, out, limit, len);
}
} else if (s <= 0x80) {
out = mb_convert_buf_add(out, s);
} else {
MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2);
out = mb_convert_buf_add2(out, (s >> 8) & 0xFF, s & 0xFF);
}
}
MB_CONVERT_BUF_STORE(buf, out, limit);
}
static size_t mb_cp950_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
{
unsigned char *p = *in, *e = p + *in_len;
uint32_t *out = buf, *limit = buf + bufsize;
while (p < e && out < limit) {
unsigned char c = *p++;
if (c <= 0x7F) {
*out++ = c;
} else if (c > 0x80 && c <= 0xFE && p < e) {
unsigned char c2 = *p++;
if ((c2 >= 0x40 && c2 <= 0x7E) || (c2 >= 0xA1 && c2 <= 0xFE)) {
unsigned int w = ((c - 0xA1)*157) + c2 - ((c2 <= 0x7E) ? 0x40 : 0xA1 - 0x3F);
w = (w < big5_ucs_table_size) ? big5_ucs_table[w] : 0;
/* PUA for CP950 */
if (is_in_cp950_pua(c, c2)) {
unsigned int s = (c << 8) | c2;
int k;
for (k = 0; k < sizeof(cp950_pua_tbl) / (sizeof(unsigned short)*4); k++) {
if (s >= cp950_pua_tbl[k][2] && s <= cp950_pua_tbl[k][3]) {
break;
}
}
if ((cp950_pua_tbl[k][2] & 0xFF) == 0x40) {
w = 157*(c - (cp950_pua_tbl[k][2] >> 8)) + c2 - (c2 >= 0xA1 ? 0x62 : 0x40) + cp950_pua_tbl[k][0];
} else {
w = s - cp950_pua_tbl[k][2] + cp950_pua_tbl[k][0];
}
} else if (c == 0xA1) {
if (c2 == 0x45) {
w = 0x2027;
} else if (c2 == 0x4E) {
w = 0xFE51;
} else if (c2 == 0x5A) {
w = 0x2574;
} else if (c2 == 0xC2) {
w = 0x00AF;
} else if (c2 == 0xC3) {
w = 0xFFE3;
} else if (c2 == 0xC5) {
w = 0x02CD;
} else if (c2 == 0xE3) {
w = 0xFF5E;
} else if (c2 == 0xF2) {
w = 0x2295;
} else if (c2 == 0xF3) {
w = 0x2299;
} else if (c2 == 0xFE) {
w = 0xFF0F;
}
} else if (c == 0xA2) {
if (c2 == 0x40) {
w = 0xFF3C;
} else if (c2 == 0x41) {
w = 0x2215;
} else if (c2 == 0x42) {
w = 0xFE68;
} else if (c2 == 0x46) {
w = 0xFFE0;
} else if (c2 == 0x47) {
w = 0xFFE1;
} else if (c2 == 0xCC) {
w = 0x5341;
} else if (c2 == 0xCE) {
w = 0x5345;
}
}
if (!w)
w = MBFL_BAD_INPUT;
*out++ = w;
} else {
*out++ = MBFL_BAD_INPUT;
}
} else {
*out++ = MBFL_BAD_INPUT;
}
}
*in_len = e - p;
*in = p;
return out - buf;
}
static void mb_wchar_to_cp950(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
{
unsigned char *out, *limit;
MB_CONVERT_BUF_LOAD(buf, out, limit);
MB_CONVERT_BUF_ENSURE(buf, out, limit, len);
while (len--) {
uint32_t w = *in++;
unsigned int s = 0;
if (w >= ucs_a1_big5_table_min && w < ucs_a1_big5_table_max) {
s = ucs_a1_big5_table[w - ucs_a1_big5_table_min];
} else if (w >= ucs_a2_big5_table_min && w < ucs_a2_big5_table_max) {
s = ucs_a2_big5_table[w - ucs_a2_big5_table_min];
} else if (w >= ucs_a3_big5_table_min && w < ucs_a3_big5_table_max) {
s = ucs_a3_big5_table[w - ucs_a3_big5_table_min];
} else if (w >= ucs_i_big5_table_min && w < ucs_i_big5_table_max) {
s = ucs_i_big5_table[w - ucs_i_big5_table_min];
} else if (w >= ucs_r1_big5_table_min && w < ucs_r1_big5_table_max) {
s = ucs_r1_big5_table[w - ucs_r1_big5_table_min];
} else if (w >= ucs_r2_big5_table_min && w < ucs_r2_big5_table_max) {
s = ucs_r2_big5_table[w - ucs_r2_big5_table_min];
}
if (w >= 0xE000 && w <= 0xF848) {
int k;
for (k = 0; k < sizeof(cp950_pua_tbl) / (sizeof(unsigned short)*4); k++) {
if (w <= cp950_pua_tbl[k][1]) {
break;
}
}
int c1 = w - cp950_pua_tbl[k][0];
if ((cp950_pua_tbl[k][2] & 0xFF) == 0x40) {
int c2 = cp950_pua_tbl[k][2] >> 8;
s = ((c1 / 157) + c2) << 8;
c1 %= 157;
s |= c1 + (c1 >= 0x3F ? 0x62 : 0x40);
} else {
s = c1 + cp950_pua_tbl[k][2];
}
} else if (w == 0xA2 || w == 0xA3 || w == 0x401 || (w >= 0x414 && w <= 0x41C) || (w >= 0x423 && w <= 0x44F) || w == 0x451 || w == 0x2022 || w == 0x203E || w == 0x223C || (w >= 0x2460 && w <= 0x247D) || w == 0x2609 || w == 0x2641 || w == 0x3005 || (w >= 0x302A && w <= 0x30FF) || w == 0xFF64) {
s = 0;
} else if (w == 0xAF) {
s = 0xA1C2;
} else if (w == 0x2CD) {
s = 0xA1C5;
} else if (w == 0x2027) {
s = 0xA145;
} else if (w == 0x2215) {
s = 0xA241;
} else if (w == 0x2295) {
s = 0xA1F2;
} else if (w == 0x2299) {
s = 0xA1F3;
} else if (w == 0x2574) {
s = 0xA15A;
} else if (w == 0xFE51) {
s = 0xA14E;
} else if (w == 0xFE68) {
s = 0xA242;
} else if (w == 0xFF3C) {
s = 0xA240;
} else if (w == 0xFF5E) {
s = 0xA1E3;
} else if (w == 0xFFE0) {
s = 0xA246;
} else if (w == 0xFFE1) {
s = 0xA247;
} else if (w == 0xFFE3) {
s = 0xA1C3;
} else if (w == 0xFF0F) {
s = 0xA1FE;
}
if (!s) {
if (w == 0) {
out = mb_convert_buf_add(out, 0);
} else {
MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_big5);
MB_CONVERT_BUF_ENSURE(buf, out, limit, len);
}
} else if (s <= 0x80) {
out = mb_convert_buf_add(out, s);
} else {
MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2);
out = mb_convert_buf_add2(out, (s >> 8) & 0xFF, s & 0xFF);
}
}
MB_CONVERT_BUF_STORE(buf, out, limit);
}

View file

@ -1,46 +0,0 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file: Rui Hirokawa <hirokawa@php.net>
*
*/
/*
* The source code included in this files was separated from mbfilter_tw.h
* by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
*
*/
#ifndef MBFL_MBFILTER_BIG5_H
#define MBFL_MBFILTER_BIG5_H
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_big5;
extern const struct mbfl_convert_vtbl vtbl_big5_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_big5;
extern const mbfl_encoding mbfl_encoding_cp950;
extern const struct mbfl_convert_vtbl vtbl_cp950_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_cp950;
int mbfl_filt_conv_big5_wchar(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_wchar_big5(int c, mbfl_convert_filter *filter);
#endif /* MBFL_MBFILTER_BIG5_H */

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,48 @@
#ifndef MBFL_MBFILTER_CJK_H
#define MBFL_MBFILTER_CJK_H
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_jis;
extern const mbfl_encoding mbfl_encoding_2022jp;
extern const mbfl_encoding mbfl_encoding_2022jp_kddi;
extern const mbfl_encoding mbfl_encoding_2022jpms;
extern const mbfl_encoding mbfl_encoding_2022jp_2004;
extern const mbfl_encoding mbfl_encoding_cp50220;
extern const mbfl_encoding mbfl_encoding_cp50221;
extern const mbfl_encoding mbfl_encoding_cp50222;
extern const mbfl_encoding mbfl_encoding_2022kr;
extern const mbfl_encoding mbfl_encoding_sjis;
extern const mbfl_encoding mbfl_encoding_sjis_mac;
extern const mbfl_encoding mbfl_encoding_sjis_docomo;
extern const mbfl_encoding mbfl_encoding_sjis_kddi;
extern const mbfl_encoding mbfl_encoding_sjis_sb;
extern const mbfl_encoding mbfl_encoding_sjis2004;
extern const mbfl_encoding mbfl_encoding_cp932;
extern const mbfl_encoding mbfl_encoding_sjiswin;
extern const mbfl_encoding mbfl_encoding_euc_jp;
extern const mbfl_encoding mbfl_encoding_eucjp_win;
extern const mbfl_encoding mbfl_encoding_eucjp2004;
extern const mbfl_encoding mbfl_encoding_cp51932;
extern const mbfl_encoding mbfl_encoding_euc_cn;
extern const mbfl_encoding mbfl_encoding_euc_tw;
extern const mbfl_encoding mbfl_encoding_euc_kr;
extern const mbfl_encoding mbfl_encoding_uhc;
extern const mbfl_encoding mbfl_encoding_gb18030;
extern const mbfl_encoding mbfl_encoding_cp936;
extern const mbfl_encoding mbfl_encoding_big5;
extern const mbfl_encoding mbfl_encoding_cp950;
extern const mbfl_encoding mbfl_encoding_hz;
int mbfilter_sjis_emoji_docomo2unicode(int s, int *snd);
int mbfilter_sjis_emoji_kddi2unicode(int s, int *snd);
int mbfilter_sjis_emoji_sb2unicode(int s, int *snd);
int mbfilter_unicode2sjis_emoji_docomo(int c, int *s1, mbfl_convert_filter *filter);
int mbfilter_unicode2sjis_emoji_kddi_sjis(int c, int *s1, mbfl_convert_filter *filter);
int mbfilter_unicode2sjis_emoji_sb(int c, int *s1, mbfl_convert_filter *filter);
#endif /* MBFL_MBFILTER_CJK_H */

File diff suppressed because it is too large Load diff

View file

@ -1,50 +0,0 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* The source code included in this files was separated from mbfilter_ja.c
* by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
*
*/
#ifndef MBFL_MBFILTER_CP50221_h
#define MBFL_MBFILTER_CP50221_h
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_cp50220;
extern const mbfl_encoding mbfl_encoding_cp50221;
extern const mbfl_encoding mbfl_encoding_cp50222;
extern const struct mbfl_convert_vtbl vtbl_cp50220_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_cp50220;
extern const struct mbfl_convert_vtbl vtbl_cp50221_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_cp50221;
extern const struct mbfl_convert_vtbl vtbl_cp50222_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_cp50222;
int mbfl_filt_conv_cp5022x_wchar(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_wchar_cp50221(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_wchar_cp50222(int c, mbfl_convert_filter *filter);
#endif /* MBFL_MBFILTER_CP50221_h */

View file

@ -1,412 +0,0 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* The source code included in this files was separated from mbfilter_ja.c
* by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
*
*/
#include "mbfilter.h"
#include "mbfilter_cp51932.h"
#include "unicode_table_cp932_ext.h"
#include "unicode_table_jis.h"
#include "cp932_table.h"
static int mbfl_filt_conv_cp51932_wchar_flush(mbfl_convert_filter *filter);
static size_t mb_cp51932_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
static void mb_wchar_to_cp51932(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
static const unsigned char mblen_table_eucjp[] = { /* 0xA1-0xFE */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
};
static const char *mbfl_encoding_cp51932_aliases[] = {"cp51932", NULL};
const mbfl_encoding mbfl_encoding_cp51932 = {
mbfl_no_encoding_cp51932,
"CP51932",
"CP51932",
mbfl_encoding_cp51932_aliases,
mblen_table_eucjp,
0,
&vtbl_cp51932_wchar,
&vtbl_wchar_cp51932,
mb_cp51932_to_wchar,
mb_wchar_to_cp51932,
NULL
};
const struct mbfl_convert_vtbl vtbl_cp51932_wchar = {
mbfl_no_encoding_cp51932,
mbfl_no_encoding_wchar,
mbfl_filt_conv_common_ctor,
NULL,
mbfl_filt_conv_cp51932_wchar,
mbfl_filt_conv_cp51932_wchar_flush,
NULL,
};
const struct mbfl_convert_vtbl vtbl_wchar_cp51932 = {
mbfl_no_encoding_wchar,
mbfl_no_encoding_cp51932,
mbfl_filt_conv_common_ctor,
NULL,
mbfl_filt_conv_wchar_cp51932,
mbfl_filt_conv_common_flush,
NULL,
};
#define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
/*
* cp51932 => wchar
*/
int
mbfl_filt_conv_cp51932_wchar(int c, mbfl_convert_filter *filter)
{
int c1, s, w;
switch (filter->status) {
case 0:
if (c >= 0 && c < 0x80) { /* latin */
CK((*filter->output_function)(c, filter->data));
} else if (c >= 0xA1 && c <= 0xFE) { /* CP932, first byte */
filter->status = 1;
filter->cache = c;
} else if (c == 0x8e) { /* kana first char */
filter->status = 2;
} else {
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
}
break;
case 1: /* got first half */
filter->status = 0;
c1 = filter->cache;
if (c > 0xa0 && c < 0xff) {
w = 0;
s = (c1 - 0xa1)*94 + c - 0xa1;
if (s <= 137) {
if (s == 31) {
w = 0xff3c; /* FULLWIDTH REVERSE SOLIDUS */
} else if (s == 32) {
w = 0xff5e; /* FULLWIDTH TILDE */
} else if (s == 33) {
w = 0x2225; /* PARALLEL TO */
} else if (s == 60) {
w = 0xff0d; /* FULLWIDTH HYPHEN-MINUS */
} else if (s == 80) {
w = 0xffe0; /* FULLWIDTH CENT SIGN */
} else if (s == 81) {
w = 0xffe1; /* FULLWIDTH POUND SIGN */
} else if (s == 137) {
w = 0xffe2; /* FULLWIDTH NOT SIGN */
}
}
if (w == 0) {
if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) { /* vendor ext1 (13ku) */
w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min];
} else if (s >= 0 && s < jisx0208_ucs_table_size) { /* X 0208 */
w = jisx0208_ucs_table[s];
} else if (s >= cp932ext2_ucs_table_min && s < cp932ext2_ucs_table_max) { /* vendor ext2 (89ku - 92ku) */
w = cp932ext2_ucs_table[s - cp932ext2_ucs_table_min];
}
}
if (w <= 0) {
w = MBFL_BAD_INPUT;
}
CK((*filter->output_function)(w, filter->data));
} else {
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
}
break;
case 2: /* got 0x8e, X0201 kana */
filter->status = 0;
if (c > 0xa0 && c < 0xe0) {
w = 0xfec0 + c;
CK((*filter->output_function)(w, filter->data));
} else {
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
}
break;
EMPTY_SWITCH_DEFAULT_CASE();
}
return 0;
}
static int mbfl_filt_conv_cp51932_wchar_flush(mbfl_convert_filter *filter)
{
if (filter->status) {
/* Input string was truncated */
(*filter->output_function)(MBFL_BAD_INPUT, filter->data);
filter->status = 0;
}
if (filter->flush_function) {
(*filter->flush_function)(filter->data);
}
return 0;
}
/*
* wchar => cp51932
*/
int
mbfl_filt_conv_wchar_cp51932(int c, mbfl_convert_filter *filter)
{
int c1, c2, s1;
s1 = 0;
if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) {
s1 = ucs_a1_jis_table[c - ucs_a1_jis_table_min];
} else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) {
s1 = ucs_a2_jis_table[c - ucs_a2_jis_table_min];
} else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) {
s1 = ucs_i_jis_table[c - ucs_i_jis_table_min];
} else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) {
s1 = ucs_r_jis_table[c - ucs_r_jis_table_min];
}
if (s1 >= 0x8080) s1 = -1; /* we don't support JIS X0213 */
if (s1 <= 0) {
if (c == 0xa5) { /* YEN SIGN */
s1 = 0x216F; /* FULLWIDTH YEN SIGN */
} else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */
s1 = 0x2140;
} else if (c == 0x2225) { /* PARALLEL TO */
s1 = 0x2142;
} else if (c == 0xff0d) { /* FULLWIDTH HYPHEN-MINUS */
s1 = 0x215d;
} else if (c == 0xffe0) { /* FULLWIDTH CENT SIGN */
s1 = 0x2171;
} else if (c == 0xffe1) { /* FULLWIDTH POUND SIGN */
s1 = 0x2172;
} else if (c == 0xffe2) { /* FULLWIDTH NOT SIGN */
s1 = 0x224c;
} else {
s1 = -1;
c1 = 0;
c2 = cp932ext1_ucs_table_max - cp932ext1_ucs_table_min;
while (c1 < c2) { /* CP932 vendor ext1 (13ku) */
if (c == cp932ext1_ucs_table[c1]) {
s1 = ((c1/94 + 0x2d) << 8) + (c1%94 + 0x21);
break;
}
c1++;
}
if (s1 < 0) {
c1 = 0;
c2 = cp932ext2_ucs_table_max - cp932ext2_ucs_table_min;
while (c1 < c2) { /* CP932 vendor ext3 (115ku - 119ku) */
if (c == cp932ext2_ucs_table[c1]) {
s1 = ((c1/94 + 0x79) << 8) +(c1%94 + 0x21);
break;
}
c1++;
}
}
}
if (c == 0) {
s1 = 0;
} else if (s1 <= 0) {
s1 = -1;
}
}
if (s1 >= 0) {
if (s1 < 0x80) { /* latin */
CK((*filter->output_function)(s1, filter->data));
} else if (s1 < 0x100) { /* kana */
CK((*filter->output_function)(0x8e, filter->data));
CK((*filter->output_function)(s1, filter->data));
} else if (s1 < 0x8080) { /* X 0208 */
CK((*filter->output_function)(((s1 >> 8) & 0xff) | 0x80, filter->data));
CK((*filter->output_function)((s1 & 0xff) | 0x80, filter->data));
} else {
CK(mbfl_filt_conv_illegal_output(c, filter));
}
} else {
CK(mbfl_filt_conv_illegal_output(c, filter));
}
return 0;
}
static size_t mb_cp51932_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
{
unsigned char *p = *in, *e = p + *in_len;
uint32_t *out = buf, *limit = buf + bufsize;
while (p < e && out < limit) {
unsigned char c = *p++;
if (c < 0x80) {
*out++ = c;
} else if (c >= 0xA1 && c <= 0xFE && p < e) {
unsigned char c2 = *p++;
if (c2 >= 0xA1 && c2 <= 0xFE) {
unsigned int s = (c - 0xA1)*94 + c2 - 0xA1, w = 0;
if (s <= 137) {
if (s == 31) {
w = 0xFF3C; /* FULLWIDTH REVERSE SOLIDUS */
} else if (s == 32) {
w = 0xFF5E; /* FULLWIDTH TILDE */
} else if (s == 33) {
w = 0x2225; /* PARALLEL TO */
} else if (s == 60) {
w = 0xFF0D; /* FULLWIDTH HYPHEN-MINUS */
} else if (s == 80) {
w = 0xFFE0; /* FULLWIDTH CENT SIGN */
} else if (s == 81) {
w = 0xFFE1; /* FULLWIDTH POUND SIGN */
} else if (s == 137) {
w = 0xFFE2; /* FULLWIDTH NOT SIGN */
}
}
if (w == 0) {
if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) {
w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min];
} else if (s < jisx0208_ucs_table_size) {
w = jisx0208_ucs_table[s];
} else if (s >= cp932ext2_ucs_table_min && s < cp932ext2_ucs_table_max) {
w = cp932ext2_ucs_table[s - cp932ext2_ucs_table_min];
}
}
if (!w)
w = MBFL_BAD_INPUT;
*out++ = w;
} else {
*out++ = MBFL_BAD_INPUT;
}
} else if (c == 0x8E && p < e) {
unsigned char c2 = *p++;
if (c2 >= 0xA1 && c2 <= 0xDF) {
*out++ = 0xFEC0 + c2;
} else {
*out++ = MBFL_BAD_INPUT;
}
} else {
*out++ = MBFL_BAD_INPUT;
}
}
*in_len = e - p;
*in = p;
return out - buf;
}
static void mb_wchar_to_cp51932(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
{
unsigned char *out, *limit;
MB_CONVERT_BUF_LOAD(buf, out, limit);
MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2);
while (len--) {
uint32_t w = *in++;
unsigned int s = 0;
if (w == 0) {
out = mb_convert_buf_add(out, 0);
continue;
} else if (w >= ucs_a1_jis_table_min && w < ucs_a1_jis_table_max) {
s = ucs_a1_jis_table[w - ucs_a1_jis_table_min];
} else if (w >= ucs_a2_jis_table_min && w < ucs_a2_jis_table_max) {
s = ucs_a2_jis_table[w - ucs_a2_jis_table_min];
} else if (w >= ucs_i_jis_table_min && w < ucs_i_jis_table_max) {
s = ucs_i_jis_table[w - ucs_i_jis_table_min];
} else if (w >= ucs_r_jis_table_min && w < ucs_r_jis_table_max) {
s = ucs_r_jis_table[w - ucs_r_jis_table_min];
}
if (s >= 0x8080) s = 0; /* We don't support JIS X0213 */
if (s == 0) {
if (w == 0xA5) { /* YEN SIGN */
s = 0x216F; /* FULLWIDTH YEN SIGN */
} else if (w == 0xFF3C) { /* FULLWIDTH REVERSE SOLIDUS */
s = 0x2140;
} else if (w == 0x2225) { /* PARALLEL TO */
s = 0x2142;
} else if (w == 0xFF0D) { /* FULLWIDTH HYPHEN-MINUS */
s = 0x215D;
} else if (w == 0xFFE0) { /* FULLWIDTH CENT SIGN */
s = 0x2171;
} else if (w == 0xFFE1) { /* FULLWIDTH POUND SIGN */
s = 0x2172;
} else if (w == 0xFFE2) { /* FULLWIDTH NOT SIGN */
s = 0x224C;
} else {
for (int i = 0; i < cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; i++) {
if (cp932ext1_ucs_table[i] == w) {
s = ((i/94 + 0x2D) << 8) + (i%94) + 0x21;
goto found_it;
}
}
for (int i = 0; i < cp932ext2_ucs_table_max - cp932ext2_ucs_table_min; i++) {
if (cp932ext2_ucs_table[i] == w) {
s = ((i/94 + 0x79) << 8) + (i%94) + 0x21;
goto found_it;
}
}
}
found_it: ;
}
if (!s || s >= 0x8080) {
MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_cp51932);
MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2);
} else if (s < 0x80) {
out = mb_convert_buf_add(out, s);
} else if (s < 0x100) {
out = mb_convert_buf_add2(out, 0x8E, s);
} else {
out = mb_convert_buf_add2(out, ((s >> 8) & 0xFF) | 0x80, (s & 0xFF) | 0x80);
}
}
MB_CONVERT_BUF_STORE(buf, out, limit);
}

View file

@ -1,618 +0,0 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* the source code included in this files was separated from mbfilter_ja.c
* by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
*
*/
/* CP932 is Microsoft's version of Shift-JIS.
*
* What we call "SJIS-win" is a variant of CP932 which maps U+00A5
* and U+203E the same way as eucJP-win; namely, instead of mapping
* U+00A5 (YEN SIGN) to 0x5C and U+203E (OVERLINE) to 0x7E,
* these codepoints are mapped to appropriate JIS X 0208 characters.
*
* When converting from Shift-JIS to Unicode, there is no difference
* between CP932 and "SJIS-win".
*
* Additional facts:
*
* In the libmbfl library which formed the base for mbstring, "CP932" and
* "SJIS-win" were originally aliases. The differing mappings were added in
* December 2002. The libmbfl author later stated that this was done so that
* "CP932" would comply with a certain specification, while "SJIS-win" would
* maintain the existing mappings. He does not remember which specification
* it was.
* The WHATWG specification for "Shift_JIS" (followed by web browsers)
* agrees with our mappings for "CP932".
* Microsoft Windows' "best-fit" mappings for CP932 (via the
* WideCharToMultiByte API) convert U+00A5 to 0x5C, which also agrees with
* our mappings for "CP932".
* glibc's iconv converts U+203E to CP932 0x7E, which again agrees with
* our mappings for "CP932".
* When converting Shift-JIS to CP932, the conversion goes through Unicode.
* Shift-JIS 0x7E converts to U+203E, so mapping U+203E to 0x7E means that
* 0x7E will go to 0x7E when converting Shift-JIS to CP932.
*/
#include "mbfilter.h"
#include "mbfilter_cp932.h"
#include "unicode_table_cp932_ext.h"
#include "unicode_table_jis.h"
static int mbfl_filt_conv_cp932_wchar_flush(mbfl_convert_filter *filter);
static size_t mb_cp932_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
static void mb_wchar_to_cp932(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
static void mb_wchar_to_sjiswin(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
static const unsigned char mblen_table_sjis[] = { /* 0x80-0x9f,0xE0-0xFF */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
};
static const char *mbfl_encoding_cp932_aliases[] = {"MS932", "Windows-31J", "MS_Kanji", NULL};
static const char *mbfl_encoding_sjiswin_aliases[] = {"SJIS-ms", "SJIS-open", NULL};
const mbfl_encoding mbfl_encoding_cp932 = {
mbfl_no_encoding_cp932,
"CP932",
"Shift_JIS",
mbfl_encoding_cp932_aliases,
mblen_table_sjis,
MBFL_ENCTYPE_GL_UNSAFE,
&vtbl_cp932_wchar,
&vtbl_wchar_cp932,
mb_cp932_to_wchar,
mb_wchar_to_cp932,
NULL
};
const struct mbfl_convert_vtbl vtbl_cp932_wchar = {
mbfl_no_encoding_cp932,
mbfl_no_encoding_wchar,
mbfl_filt_conv_common_ctor,
NULL,
mbfl_filt_conv_cp932_wchar,
mbfl_filt_conv_cp932_wchar_flush,
NULL,
};
const struct mbfl_convert_vtbl vtbl_wchar_cp932 = {
mbfl_no_encoding_wchar,
mbfl_no_encoding_cp932,
mbfl_filt_conv_common_ctor,
NULL,
mbfl_filt_conv_wchar_cp932,
mbfl_filt_conv_common_flush,
NULL,
};
const mbfl_encoding mbfl_encoding_sjiswin = {
mbfl_no_encoding_sjiswin,
"SJIS-win",
"Shift_JIS",
mbfl_encoding_sjiswin_aliases,
mblen_table_sjis,
MBFL_ENCTYPE_GL_UNSAFE,
&vtbl_sjiswin_wchar,
&vtbl_wchar_sjiswin,
mb_cp932_to_wchar,
mb_wchar_to_sjiswin,
NULL
};
const struct mbfl_convert_vtbl vtbl_sjiswin_wchar = {
mbfl_no_encoding_sjiswin,
mbfl_no_encoding_wchar,
mbfl_filt_conv_common_ctor,
NULL,
mbfl_filt_conv_cp932_wchar,
mbfl_filt_conv_cp932_wchar_flush,
NULL,
};
const struct mbfl_convert_vtbl vtbl_wchar_sjiswin = {
mbfl_no_encoding_wchar,
mbfl_no_encoding_sjiswin,
mbfl_filt_conv_common_ctor,
NULL,
mbfl_filt_conv_wchar_sjiswin,
mbfl_filt_conv_common_flush,
NULL,
};
#define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
#define SJIS_ENCODE(c1,c2,s1,s2) \
do { \
s1 = c1; \
s1--; \
s1 >>= 1; \
if ((c1) < 0x5f) { \
s1 += 0x71; \
} else { \
s1 += 0xb1; \
} \
s2 = c2; \
if ((c1) & 1) { \
if ((c2) < 0x60) { \
s2--; \
} \
s2 += 0x20; \
} else { \
s2 += 0x7e; \
} \
} while (0)
#define SJIS_DECODE(c1,c2,s1,s2) \
do { \
s1 = c1; \
if (s1 < 0xa0) { \
s1 -= 0x81; \
} else { \
s1 -= 0xc1; \
} \
s1 <<= 1; \
s1 += 0x21; \
s2 = c2; \
if (s2 < 0x9f) { \
if (s2 < 0x7f) { \
s2++; \
} \
s2 -= 0x20; \
} else { \
s1++; \
s2 -= 0x7e; \
} \
} while (0)
int mbfl_filt_conv_cp932_wchar(int c, mbfl_convert_filter *filter)
{
int c1, s, s1, s2, w;
switch (filter->status) {
case 0:
if (c >= 0 && c < 0x80) { /* latin */
CK((*filter->output_function)(c, filter->data));
} else if (c > 0xa0 && c < 0xe0) { /* kana */
CK((*filter->output_function)(0xfec0 + c, filter->data));
} else if (c > 0x80 && c < 0xfd && c != 0xa0) { /* kanji first char */
filter->status = 1;
filter->cache = c;
} else {
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
}
break;
case 1: /* kanji second char */
filter->status = 0;
c1 = filter->cache;
if (c >= 0x40 && c <= 0xfc && c != 0x7f) {
w = 0;
SJIS_DECODE(c1, c, s1, s2);
s = (s1 - 0x21)*94 + s2 - 0x21;
if (s <= 137) {
if (s == 31) {
w = 0xff3c; /* FULLWIDTH REVERSE SOLIDUS */
} else if (s == 32) {
w = 0xff5e; /* FULLWIDTH TILDE */
} else if (s == 33) {
w = 0x2225; /* PARALLEL TO */
} else if (s == 60) {
w = 0xff0d; /* FULLWIDTH HYPHEN-MINUS */
} else if (s == 80) {
w = 0xffe0; /* FULLWIDTH CENT SIGN */
} else if (s == 81) {
w = 0xffe1; /* FULLWIDTH POUND SIGN */
} else if (s == 137) {
w = 0xffe2; /* FULLWIDTH NOT SIGN */
}
}
if (w == 0) {
if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) { /* vendor ext1 (13ku) */
w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min];
} else if (s >= 0 && s < jisx0208_ucs_table_size) { /* X 0208 */
w = jisx0208_ucs_table[s];
} else if (s >= cp932ext2_ucs_table_min && s < cp932ext2_ucs_table_max) { /* vendor ext2 (89ku - 92ku) */
w = cp932ext2_ucs_table[s - cp932ext2_ucs_table_min];
} else if (s >= cp932ext3_ucs_table_min && s < cp932ext3_ucs_table_max) { /* vendor ext3 (115ku - 119ku) */
w = cp932ext3_ucs_table[s - cp932ext3_ucs_table_min];
} else if (s >= (94*94) && s < (114*94)) { /* user (95ku - 114ku) */
w = s - (94*94) + 0xe000;
}
}
if (w <= 0) {
w = MBFL_BAD_INPUT;
}
CK((*filter->output_function)(w, filter->data));
} else {
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
}
break;
EMPTY_SWITCH_DEFAULT_CASE();
}
return 0;
}
static int mbfl_filt_conv_cp932_wchar_flush(mbfl_convert_filter *filter)
{
if (filter->status) {
(*filter->output_function)(MBFL_BAD_INPUT, filter->data);
filter->status = 0;
}
if (filter->flush_function) {
(*filter->flush_function)(filter->data);
}
return 0;
}
int mbfl_filt_conv_wchar_cp932(int c, mbfl_convert_filter *filter)
{
int c1, c2, s1, s2;
s1 = 0;
s2 = 0;
if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) {
s1 = ucs_a1_jis_table[c - ucs_a1_jis_table_min];
} else if (c == 0x203E) {
s1 = 0x7E;
} else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) {
s1 = ucs_a2_jis_table[c - ucs_a2_jis_table_min];
} else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) {
s1 = ucs_i_jis_table[c - ucs_i_jis_table_min];
} else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) {
s1 = ucs_r_jis_table[c - ucs_r_jis_table_min];
} else if (c >= 0xe000 && c < (0xe000 + 20*94)) { /* user (95ku - 114ku) */
s1 = c - 0xe000;
c1 = s1/94 + 0x7f;
c2 = s1%94 + 0x21;
s1 = (c1 << 8) | c2;
s2 = 1;
}
if (s1 <= 0) {
if (c == 0xa5) { /* YEN SIGN */
s1 = 0x5C;
} else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */
s1 = 0x2140;
} else if (c == 0x2225) { /* PARALLEL TO */
s1 = 0x2142;
} else if (c == 0xff0d) { /* FULLWIDTH HYPHEN-MINUS */
s1 = 0x215d;
} else if (c == 0xffe0) { /* FULLWIDTH CENT SIGN */
s1 = 0x2171;
} else if (c == 0xffe1) { /* FULLWIDTH POUND SIGN */
s1 = 0x2172;
} else if (c == 0xffe2) { /* FULLWIDTH NOT SIGN */
s1 = 0x224c;
}
}
if ((s1 <= 0) || (s1 >= 0x8080 && s2 == 0)) { /* not found or X 0212 */
s1 = -1;
c1 = 0;
c2 = cp932ext1_ucs_table_max - cp932ext1_ucs_table_min;
while (c1 < c2) { /* CP932 vendor ext1 (13ku) */
if (c == cp932ext1_ucs_table[c1]) {
s1 = ((c1/94 + 0x2d) << 8) + (c1%94 + 0x21);
break;
}
c1++;
}
if (s1 <= 0) {
c1 = 0;
c2 = cp932ext3_ucs_table_max - cp932ext3_ucs_table_min;
while (c1 < c2) { /* CP932 vendor ext3 (115ku - 119ku) */
if (c == cp932ext3_ucs_table[c1]) {
s1 = ((c1/94 + 0x93) << 8) + (c1%94 + 0x21);
break;
}
c1++;
}
}
if (c == 0) {
s1 = 0;
} else if (s1 <= 0) {
s1 = -1;
}
}
if (s1 >= 0) {
if (s1 < 0x100) { /* latin or kana */
CK((*filter->output_function)(s1, filter->data));
} else { /* kanji */
c1 = (s1 >> 8) & 0xff;
c2 = s1 & 0xff;
SJIS_ENCODE(c1, c2, s1, s2);
CK((*filter->output_function)(s1, filter->data));
CK((*filter->output_function)(s2, filter->data));
}
} else {
CK(mbfl_filt_conv_illegal_output(c, filter));
}
return 0;
}
int mbfl_filt_conv_wchar_sjiswin(int c, mbfl_convert_filter *filter)
{
if (c == 0xA5) {
CK((*filter->output_function)(0x81, filter->data));
CK((*filter->output_function)(0x8F, filter->data));
} else if (c == 0x203E) {
CK((*filter->output_function)(0x81, filter->data));
CK((*filter->output_function)(0x50, filter->data));
} else {
return mbfl_filt_conv_wchar_cp932(c, filter);
}
return 0;
}
static size_t mb_cp932_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
{
unsigned char *p = *in, *e = p + *in_len;
uint32_t *out = buf, *limit = buf + bufsize;
while (p < e && out < limit) {
unsigned char c = *p++;
if (c < 0x80) {
*out++ = c;
} else if (c > 0xA0 && c < 0xE0) {
/* Kana */
*out++ = 0xFEC0 + c;
} else if (c > 0x80 && c < 0xFD && c != 0xA0 && p < e) {
unsigned char c2 = *p++;
if (c2 >= 0x40 && c2 <= 0xFC && c2 != 0x7F) {
unsigned int s1, s2, w = 0;
SJIS_DECODE(c, c2, s1, s2);
unsigned int s = (s1 - 0x21)*94 + s2 - 0x21;
if (s <= 137) {
if (s == 31) {
w = 0xFF3C; /* FULLWIDTH REVERSE SOLIDUS */
} else if (s == 32) {
w = 0xFF5E; /* FULLWIDTH TILDE */
} else if (s == 33) {
w = 0x2225; /* PARALLEL TO */
} else if (s == 60) {
w = 0xFF0D; /* FULLWIDTH HYPHEN-MINUS */
} else if (s == 80) {
w = 0xFFE0; /* FULLWIDTH CENT SIGN */
} else if (s == 81) {
w = 0xFFE1; /* FULLWIDTH POUND SIGN */
} else if (s == 137) {
w = 0xFFE2; /* FULLWIDTH NOT SIGN */
}
}
if (w == 0) {
if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) {
w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min];
} else if (s < jisx0208_ucs_table_size) {
w = jisx0208_ucs_table[s];
} else if (s >= cp932ext2_ucs_table_min && s < cp932ext2_ucs_table_max) {
w = cp932ext2_ucs_table[s - cp932ext2_ucs_table_min];
} else if (s >= cp932ext3_ucs_table_min && s < cp932ext3_ucs_table_max) {
w = cp932ext3_ucs_table[s - cp932ext3_ucs_table_min];
} else if (s >= (94*94) && s < (114*94)) {
w = s - (94*94) + 0xE000;
}
}
if (!w)
w = MBFL_BAD_INPUT;
*out++ = w;
} else {
*out++ = MBFL_BAD_INPUT;
}
} else {
*out++ = MBFL_BAD_INPUT;
}
}
*in_len = e - p;
*in = p;
return out - buf;
}
static void mb_wchar_to_cp932(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
{
unsigned char *out, *limit;
MB_CONVERT_BUF_LOAD(buf, out, limit);
MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2);
while (len--) {
uint32_t w = *in++;
unsigned int s1 = 0, s2 = 0, c1, c2;
if (w >= ucs_a1_jis_table_min && w < ucs_a1_jis_table_max) {
s1 = ucs_a1_jis_table[w - ucs_a1_jis_table_min];
} else if (w == 0x203E) {
s1 = 0x7E;
} else if (w >= ucs_a2_jis_table_min && w < ucs_a2_jis_table_max) {
s1 = ucs_a2_jis_table[w - ucs_a2_jis_table_min];
} else if (w >= ucs_i_jis_table_min && w < ucs_i_jis_table_max) {
s1 = ucs_i_jis_table[w - ucs_i_jis_table_min];
} else if (w >= ucs_r_jis_table_min && w < ucs_r_jis_table_max) {
s1 = ucs_r_jis_table[w - ucs_r_jis_table_min];
} else if (w >= 0xE000 && w < (0xE000 + 20*94)) {
s1 = w - 0xE000;
c1 = s1/94 + 0x7F;
c2 = s1%94 + 0x21;
s1 = (c1 << 8) | c2;
s2 = 1;
}
if (w == 0xA5) { /* YEN SIGN */
s1 = 0x5C;
} else if (w == 0xFF3C) { /* FULLWIDTH REVERSE SOLIDUS */
s1 = 0x2140;
} else if (w == 0x2225) { /* PARALLEL TO */
s1 = 0x2142;
} else if (w == 0xFF0D) { /* FULLWIDTH HYPHEN-MINUS */
s1 = 0x215D;
} else if (w == 0xFFE0) { /* FULLWIDTH CENT SIGN */
s1 = 0x2171;
} else if (w == 0xFFE1) { /* FULLWIDTH POUND SIGN */
s1 = 0x2172;
} else if (w == 0xFFE2) { /* FULLWIDTH NOT SIGN */
s1 = 0x224C;
} else if (w == 0) {
out = mb_convert_buf_add(out, 0);
continue;
}
if (!s1 || (s1 >= 0x8080 && !s2)) { /* not found or X 0212 */
for (unsigned int i = 0; i < cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; i++) {
if (cp932ext1_ucs_table[i] == w) {
s1 = ((i/94 + 0x2D) << 8) + (i%94 + 0x21);
goto emit_output;
}
}
for (unsigned int i = 0; i < cp932ext3_ucs_table_max - cp932ext3_ucs_table_min; i++) {
if (cp932ext3_ucs_table[i] == w) {
s1 = ((i/94 + 0x93) << 8) + (i%94 + 0x21);
goto emit_output;
}
}
MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_cp932);
MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2);
continue;
}
emit_output:
if (s1 < 0x100) {
out = mb_convert_buf_add(out, s1);
} else {
c1 = (s1 >> 8) & 0xFF;
c2 = s1 & 0xFF;
SJIS_ENCODE(c1, c2, s1, s2);
out = mb_convert_buf_add2(out, s1, s2);
}
}
MB_CONVERT_BUF_STORE(buf, out, limit);
}
static void mb_wchar_to_sjiswin(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
{
unsigned char *out, *limit;
MB_CONVERT_BUF_LOAD(buf, out, limit);
MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2);
while (len--) {
uint32_t w = *in++;
unsigned int s1 = 0, s2 = 0, c1, c2;
if (w >= ucs_a1_jis_table_min && w < ucs_a1_jis_table_max) {
s1 = ucs_a1_jis_table[w - ucs_a1_jis_table_min];
} else if (w >= ucs_a2_jis_table_min && w < ucs_a2_jis_table_max) {
s1 = ucs_a2_jis_table[w - ucs_a2_jis_table_min];
} else if (w >= ucs_i_jis_table_min && w < ucs_i_jis_table_max) {
s1 = ucs_i_jis_table[w - ucs_i_jis_table_min];
} else if (w >= ucs_r_jis_table_min && w < ucs_r_jis_table_max) {
s1 = ucs_r_jis_table[w - ucs_r_jis_table_min];
} else if (w >= 0xE000 && w < (0xE000 + 20*94)) {
s1 = w - 0xE000;
c1 = s1/94 + 0x7F;
c2 = s1%94 + 0x21;
s1 = (c1 << 8) | c2;
s2 = 1;
}
if (w == 0xA5) { /* YEN SIGN */
s1 = 0x216F; /* FULLWIDTH YEN SIGN */
} else if (w == 0xFF3C) { /* FULLWIDTH REVERSE SOLIDUS */
s1 = 0x2140;
} else if (w == 0x2225) { /* PARALLEL TO */
s1 = 0x2142;
} else if (w == 0xFF0D) { /* FULLWIDTH HYPHEN-MINUS */
s1 = 0x215D;
} else if (w == 0xFFE0) { /* FULLWIDTH CENT SIGN */
s1 = 0x2171;
} else if (w == 0xFFE1) { /* FULLWIDTH POUND SIGN */
s1 = 0x2172;
} else if (w == 0xFFE2) { /* FULLWIDTH NOT SIGN */
s1 = 0x224C;
} else if (w == 0) {
out = mb_convert_buf_add(out, 0);
continue;
}
if (!s1 || (s1 >= 0x8080 && !s2)) { /* not found or X 0212 */
for (unsigned int i = 0; i < cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; i++) {
if (cp932ext1_ucs_table[i] == w) {
s1 = ((i/94 + 0x2D) << 8) + (i%94 + 0x21);
goto emit_output;
}
}
for (unsigned int i = 0; i < cp932ext3_ucs_table_max - cp932ext3_ucs_table_min; i++) {
if (cp932ext3_ucs_table[i] == w) {
s1 = ((i/94 + 0x93) << 8) + (i%94 + 0x21);
goto emit_output;
}
}
MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_cp932);
MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2);
continue;
}
emit_output:
if (s1 < 0x100) {
out = mb_convert_buf_add(out, s1);
} else {
c1 = (s1 >> 8) & 0xFF;
c2 = s1 & 0xFF;
SJIS_ENCODE(c1, c2, s1, s2);
out = mb_convert_buf_add2(out, s1, s2);
}
}
MB_CONVERT_BUF_STORE(buf, out, limit);
}

View file

@ -1,47 +0,0 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* the source code included in this files was separated from mbfilter_ja.c
* by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
*
*/
#ifndef MBFL_MBFILTER_CP932_H
#define MBFL_MBFILTER_CP932_H
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_cp932;
extern const struct mbfl_convert_vtbl vtbl_cp932_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_cp932;
extern const mbfl_encoding mbfl_encoding_sjiswin;
extern const struct mbfl_convert_vtbl vtbl_sjiswin_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_sjiswin;
int mbfl_filt_conv_cp932_wchar(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_wchar_cp932(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_wchar_sjiswin(int c, mbfl_convert_filter *filter);
#endif /* MBFL_MBFILTER_CP932_H */

View file

@ -1,439 +0,0 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* the source code included in this file was separated from mbfilter_cn.c
* by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
*
*/
#include "mbfilter.h"
#include "mbfilter_cp936.h"
#define UNICODE_TABLE_CP936_DEF
#include "unicode_table_cp936.h"
static int mbfl_filt_conv_cp936_wchar_flush(mbfl_convert_filter *filter);
static size_t mb_cp936_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
static void mb_wchar_to_cp936(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
static const unsigned char mblen_table_cp936[] = { /* 0x81-0xFE */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
};
static const char *mbfl_encoding_cp936_aliases[] = {"CP-936", "GBK", NULL};
const mbfl_encoding mbfl_encoding_cp936 = {
mbfl_no_encoding_cp936,
"CP936",
"CP936",
mbfl_encoding_cp936_aliases,
mblen_table_cp936,
MBFL_ENCTYPE_GL_UNSAFE,
&vtbl_cp936_wchar,
&vtbl_wchar_cp936,
mb_cp936_to_wchar,
mb_wchar_to_cp936,
NULL
};
const struct mbfl_convert_vtbl vtbl_cp936_wchar = {
mbfl_no_encoding_cp936,
mbfl_no_encoding_wchar,
mbfl_filt_conv_common_ctor,
NULL,
mbfl_filt_conv_cp936_wchar,
mbfl_filt_conv_cp936_wchar_flush,
NULL,
};
const struct mbfl_convert_vtbl vtbl_wchar_cp936 = {
mbfl_no_encoding_wchar,
mbfl_no_encoding_cp936,
mbfl_filt_conv_common_ctor,
NULL,
mbfl_filt_conv_wchar_cp936,
mbfl_filt_conv_common_flush,
NULL,
};
#define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
int mbfl_filt_conv_cp936_wchar(int c, mbfl_convert_filter *filter)
{
int k;
int c1, c2, w = -1;
switch (filter->status) {
case 0:
if (c >= 0 && c < 0x80) { /* latin */
CK((*filter->output_function)(c, filter->data));
} else if (c == 0x80) { /* euro sign */
CK((*filter->output_function)(0x20ac, filter->data));
} else if (c < 0xff) { /* dbcs lead byte */
filter->status = 1;
filter->cache = c;
} else { /* 0xff */
CK((*filter->output_function)(0xf8f5, filter->data));
}
break;
case 1: /* dbcs second byte */
filter->status = 0;
c1 = filter->cache;
if (((c1 >= 0xaa && c1 <= 0xaf) || (c1 >= 0xf8 && c1 <= 0xfe)) &&
(c >= 0xa1 && c <= 0xfe)) {
/* UDA part1,2: U+E000-U+E4C5 */
w = 94*(c1 >= 0xf8 ? c1 - 0xf2 : c1 - 0xaa) + (c - 0xa1) + 0xe000;
CK((*filter->output_function)(w, filter->data));
} else if (c1 >= 0xa1 && c1 <= 0xa7 && c >= 0x40 && c < 0xa1 && c != 0x7f) {
/* UDA part3 : U+E4C6-U+E765*/
w = 96*(c1 - 0xa1) + c - (c >= 0x80 ? 0x41 : 0x40) + 0xe4c6;
CK((*filter->output_function)(w, filter->data));
}
c2 = (c1 << 8) | c;
if (w <= 0 &&
((c2 >= 0xa2ab && c2 <= 0xa9f0 + (0xe80f-0xe801)) ||
(c2 >= 0xd7fa && c2 <= 0xd7fa + (0xe814-0xe810)) ||
(c2 >= 0xfe50 && c2 <= 0xfe80 + (0xe864-0xe844)))) {
for (k = 0; k < mbfl_cp936_pua_tbl_max; k++) {
if (c2 >= mbfl_cp936_pua_tbl[k][2] &&
c2 <= mbfl_cp936_pua_tbl[k][2] +
mbfl_cp936_pua_tbl[k][1] - mbfl_cp936_pua_tbl[k][0]) {
w = c2 - mbfl_cp936_pua_tbl[k][2] + mbfl_cp936_pua_tbl[k][0];
CK((*filter->output_function)(w, filter->data));
break;
}
}
}
if (w <= 0) {
if (c1 < 0xff && c1 > 0x80 && c >= 0x40 && c < 0xff && c != 0x7f) {
w = (c1 - 0x81)*192 + c - 0x40;
ZEND_ASSERT(w < cp936_ucs_table_size);
CK((*filter->output_function)(cp936_ucs_table[w], filter->data));
} else {
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
}
}
break;
EMPTY_SWITCH_DEFAULT_CASE();
}
return 0;
}
static int mbfl_filt_conv_cp936_wchar_flush(mbfl_convert_filter *filter)
{
if (filter->status) {
/* 2-byte character was truncated */
filter->status = 0;
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
}
if (filter->flush_function) {
(*filter->flush_function)(filter->data);
}
return 0;
}
int mbfl_filt_conv_wchar_cp936(int c, mbfl_convert_filter *filter)
{
int k, k1, k2;
int c1, s = 0;
if (c >= ucs_a1_cp936_table_min && c < ucs_a1_cp936_table_max) {
/* U+0000 - U+0451 */
s = ucs_a1_cp936_table[c - ucs_a1_cp936_table_min];
} else if (c >= ucs_a2_cp936_table_min && c < ucs_a2_cp936_table_max) {
/* U+2000 - U+26FF */
if (c == 0x203e) {
s = 0xa3fe;
} else if (c == 0x2218) {
s = 0xa1e3;
} else if (c == 0x223c) {
s = 0xa1ab;
} else {
s = ucs_a2_cp936_table[c - ucs_a2_cp936_table_min];
}
} else if (c >= ucs_a3_cp936_table_min && c < ucs_a3_cp936_table_max) {
/* U+2F00 - U+33FF */
s = ucs_a3_cp936_table[c - ucs_a3_cp936_table_min];
} else if (c >= ucs_i_cp936_table_min && c < ucs_i_cp936_table_max) {
/* U+4D00-9FFF CJK Unified Ideographs (+ Extension A) */
s = ucs_i_cp936_table[c - ucs_i_cp936_table_min];
} else if (c >= 0xe000 && c <= 0xe864) { /* PUA */
if (c < 0xe766) {
if (c < 0xe4c6) {
c1 = c - 0xe000;
s = (c1 % 94) + 0xa1; c1 /= 94;
s |= (c1 < 0x06 ? c1 + 0xaa : c1 + 0xf2) << 8;
} else {
c1 = c - 0xe4c6;
s = ((c1 / 96) + 0xa1) << 8; c1 %= 96;
s |= c1 + (c1 >= 0x3f ? 0x41 : 0x40);
}
} else {
/* U+E766..U+E864 */
k1 = 0; k2 = mbfl_cp936_pua_tbl_max;
while (k1 < k2) {
k = (k1 + k2) >> 1;
if (c < mbfl_cp936_pua_tbl[k][0]) {
k2 = k;
} else if (c > mbfl_cp936_pua_tbl[k][1]) {
k1 = k + 1;
} else {
s = c - mbfl_cp936_pua_tbl[k][0] + mbfl_cp936_pua_tbl[k][2];
break;
}
}
}
} else if (c == 0xf8f5) {
s = 0xff;
} else if (c >= ucs_ci_cp936_table_min && c < ucs_ci_cp936_table_max) {
/* U+F900-FA2F CJK Compatibility Ideographs */
s = ucs_ci_cp936_table[c - ucs_ci_cp936_table_min];
} else if (c >= ucs_cf_cp936_table_min && c < ucs_cf_cp936_table_max) {
s = ucs_cf_cp936_table[c - ucs_cf_cp936_table_min];
} else if (c >= ucs_sfv_cp936_table_min && c < ucs_sfv_cp936_table_max) {
s = ucs_sfv_cp936_table[c - ucs_sfv_cp936_table_min]; /* U+FE50-FE6F Small Form Variants */
} else if (c >= ucs_hff_cp936_table_min && c < ucs_hff_cp936_table_max) {
/* U+FF00-FFFF HW/FW Forms */
if (c == 0xff04) {
s = 0xa1e7;
} else if (c == 0xff5e) {
s = 0xa1ab;
} else if (c >= 0xff01 && c <= 0xff5d) {
s = c - 0xff01 + 0xa3a1;
} else if (c >= 0xffe0 && c <= 0xffe5) {
s = ucs_hff_s_cp936_table[c-0xffe0];
}
}
if (s <= 0) {
if (c == 0) {
s = 0;
} else if (s <= 0) {
s = -1;
}
}
if (s >= 0) {
if (s <= 0x80 || s == 0xff) { /* latin */
CK((*filter->output_function)(s, filter->data));
} else {
CK((*filter->output_function)((s >> 8) & 0xff, filter->data));
CK((*filter->output_function)(s & 0xff, filter->data));
}
} else {
CK(mbfl_filt_conv_illegal_output(c, filter));
}
return 0;
}
static size_t mb_cp936_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
{
unsigned char *p = *in, *e = p + *in_len;
uint32_t *out = buf, *limit = buf + bufsize;
while (p < e && out < limit) {
unsigned char c = *p++;
if (c < 0x80) {
*out++ = c;
} else if (c == 0x80) {
*out++ = 0x20AC; /* Euro sign */
} else if (c < 0xFF) {
if (p >= e) {
*out++ = MBFL_BAD_INPUT;
continue;
}
unsigned char c2 = *p++;
if (c2 < 0x40 || c2 == 0x7F || c2 == 0xFF) {
*out++ = MBFL_BAD_INPUT;
continue;
}
if (((c >= 0xAA && c <= 0xAF) || (c >= 0xF8 && c <= 0xFE)) && c2 >= 0xA1) {
/* UDA part 1, 2: U+E000-U+E4C5 */
*out++ = 94*(c >= 0xF8 ? c - 0xF2 : c - 0xAA) + (c2 - 0xA1) + 0xE000;
} else if (c >= 0xA1 && c <= 0xA7 && c2 < 0xA1) {
/* UDA part 3: U+E4C6-U+E765*/
*out++ = 96*(c - 0xA1) + c2 - (c2 >= 0x80 ? 0x41 : 0x40) + 0xE4C6;
} else {
unsigned int w = (c - 0x81)*192 + c2 - 0x40; /* Convert c, c2 into GB 2312 table lookup index */
/* For CP936 and GB18030, certain GB 2312 byte combinations are mapped to PUA codepoints,
* whereas the same combinations aren't mapped to any codepoint for HZ and EUC-CN
* To avoid duplicating the entire GB 2312 -> Unicode lookup table, we have three
* auxiliary tables which are consulted instead for specific ranges of lookup indices */
if (w >= 0x192B) {
if (w <= 0x1EBE) {
*out++ = cp936_pua_tbl1[w - 0x192B];
continue;
} else if (w >= 0x413A) {
if (w <= 0x413E) {
*out++ = cp936_pua_tbl2[w - 0x413A];
continue;
} else if (w >= 0x5DD0 && w <= 0x5E20) {
*out++ = cp936_pua_tbl3[w - 0x5DD0];
continue;
}
}
}
ZEND_ASSERT(w < cp936_ucs_table_size);
*out++ = cp936_ucs_table[w];
}
} else {
*out++ = 0xF8F5;
}
}
*in_len = e - p;
*in = p;
return out - buf;
}
static void mb_wchar_to_cp936(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
{
unsigned char *out, *limit;
MB_CONVERT_BUF_LOAD(buf, out, limit);
MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2);
while (len--) {
uint32_t w = *in++;
unsigned int s = 0;
if (w >= ucs_a1_cp936_table_min && w < ucs_a1_cp936_table_max) {
/* U+0000-U+0451 */
s = ucs_a1_cp936_table[w - ucs_a1_cp936_table_min];
} else if (w >= ucs_a2_cp936_table_min && w < ucs_a2_cp936_table_max) {
/* U+2000-U+26FF */
if (w == 0x203E) {
s = 0xA3FE;
} else if (w == 0x2218) {
s = 0xA1E3;
} else if (w == 0x223C) {
s = 0xA1AB;
} else {
s = ucs_a2_cp936_table[w - ucs_a2_cp936_table_min];
}
} else if (w >= ucs_a3_cp936_table_min && w < ucs_a3_cp936_table_max) {
/* U+2F00-U+33FF */
s = ucs_a3_cp936_table[w - ucs_a3_cp936_table_min];
} else if (w >= ucs_i_cp936_table_min && w < ucs_i_cp936_table_max) {
/* U+4D00-9FFF CJK Unified Ideographs (+ Extension A) */
s = ucs_i_cp936_table[w - ucs_i_cp936_table_min];
} else if (w >= 0xE000 && w <= 0xE864) {
/* PUA */
if (w < 0xe766) {
if (w < 0xe4c6) {
unsigned int c1 = w - 0xE000;
s = (c1 % 94) + 0xA1;
c1 /= 94;
s |= (c1 < 0x6 ? c1 + 0xAA : c1 + 0xF2) << 8;
} else {
unsigned int c1 = w - 0xE4C6;
s = ((c1 / 96) + 0xA1) << 8;
c1 %= 96;
s |= c1 + (c1 >= 0x3F ? 0x41 : 0x40);
}
} else {
/* U+E766-U+E864 */
unsigned int k1 = 0;
unsigned int k2 = mbfl_cp936_pua_tbl_max;
while (k1 < k2) {
int k = (k1 + k2) >> 1;
if (w < mbfl_cp936_pua_tbl[k][0]) {
k2 = k;
} else if (w > mbfl_cp936_pua_tbl[k][1]) {
k1 = k + 1;
} else {
s = w - mbfl_cp936_pua_tbl[k][0] + mbfl_cp936_pua_tbl[k][2];
break;
}
}
}
} else if (w == 0xF8F5) {
s = 0xFF;
} else if (w >= ucs_ci_cp936_table_min && w < ucs_ci_cp936_table_max) {
/* U+F900-U+FA2F CJK Compatibility Ideographs */
s = ucs_ci_cp936_table[w - ucs_ci_cp936_table_min];
} else if (w >= ucs_cf_cp936_table_min && w < ucs_cf_cp936_table_max) {
s = ucs_cf_cp936_table[w - ucs_cf_cp936_table_min];
} else if (w >= ucs_sfv_cp936_table_min && w < ucs_sfv_cp936_table_max) {
/* U+FE50-U+FE6F Small Form Variants */
s = ucs_sfv_cp936_table[w - ucs_sfv_cp936_table_min];
} else if (w >= ucs_hff_cp936_table_min && w < ucs_hff_cp936_table_max) {
/* U+FF00-U+FFFF HW/FW Forms */
if (w == 0xFF04) {
s = 0xA1E7;
} else if (w == 0xFF5E) {
s = 0xA1AB;
} else if (w >= 0xFF01 && w <= 0xFF5D) {
s = w - 0xFF01 + 0xA3A1;
} else if (w >= 0xFFE0 && w <= 0xFFE5) {
s = ucs_hff_s_cp936_table[w - 0xFFE0];
}
}
if (!s) {
if (w == 0) {
out = mb_convert_buf_add(out, 0);
} else {
MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_cp936);
MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2);
}
} else if (s <= 0x80 || s == 0xFF) {
out = mb_convert_buf_add(out, s);
} else {
out = mb_convert_buf_add2(out, (s >> 8) & 0xFF, s & 0xFF);
}
}
MB_CONVERT_BUF_STORE(buf, out, limit);
}

View file

@ -1,42 +0,0 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* the source code included in this files was separated from mbfilter_cn.h
* by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
*
*/
#ifndef MBFL_MBFILTER_CP936_H
#define MBFL_MBFILTER_CP936_H
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_cp936;
extern const struct mbfl_convert_vtbl vtbl_cp936_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_cp936;
int mbfl_filt_conv_cp936_wchar(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_wchar_cp936(int c, mbfl_convert_filter *filter);
#endif /* MBFL_MBFILTER_CP936_H */

View file

@ -1,326 +0,0 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* The source code included in this file was separated from mbfilter_cn.c
* by Moriyoshi Koizumi <moriyoshi@php.net> on 4 Dec 2002.
*
*/
#include "mbfilter.h"
#include "mbfilter_euc_cn.h"
#include "unicode_table_cp936.h"
static int mbfl_filt_conv_euccn_wchar_flush(mbfl_convert_filter *filter);
static size_t mb_euccn_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
static void mb_wchar_to_euccn(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
static const unsigned char mblen_table_euccn[] = { /* 0xA1-0xFE */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
};
static const char *mbfl_encoding_euc_cn_aliases[] = {"CN-GB", "EUC_CN", "eucCN", "x-euc-cn", "gb2312", NULL};
const mbfl_encoding mbfl_encoding_euc_cn = {
mbfl_no_encoding_euc_cn,
"EUC-CN",
"CN-GB",
mbfl_encoding_euc_cn_aliases,
mblen_table_euccn,
0,
&vtbl_euccn_wchar,
&vtbl_wchar_euccn,
mb_euccn_to_wchar,
mb_wchar_to_euccn,
NULL
};
const struct mbfl_convert_vtbl vtbl_euccn_wchar = {
mbfl_no_encoding_euc_cn,
mbfl_no_encoding_wchar,
mbfl_filt_conv_common_ctor,
NULL,
mbfl_filt_conv_euccn_wchar,
mbfl_filt_conv_euccn_wchar_flush,
NULL,
};
const struct mbfl_convert_vtbl vtbl_wchar_euccn = {
mbfl_no_encoding_wchar,
mbfl_no_encoding_euc_cn,
mbfl_filt_conv_common_ctor,
NULL,
mbfl_filt_conv_wchar_euccn,
mbfl_filt_conv_common_flush,
NULL,
};
#define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
int mbfl_filt_conv_euccn_wchar(int c, mbfl_convert_filter *filter)
{
int c1, w;
switch (filter->status) {
case 0:
if (c >= 0 && c < 0x80) { /* latin */
CK((*filter->output_function)(c, filter->data));
} else if ((c >= 0xA1 && c <= 0xA9) || (c >= 0xB0 && c <= 0xF7)) { /* dbcs lead byte */
filter->status = 1;
filter->cache = c;
} else {
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
}
break;
case 1: /* dbcs second byte */
filter->status = 0;
c1 = filter->cache;
if (c > 0xA0 && c < 0xFF) {
w = (c1 - 0x81)*192 + c - 0x40;
ZEND_ASSERT(w < cp936_ucs_table_size);
if (w == 0x1864) {
w = 0x30FB;
} else if (w == 0x186A) {
w = 0x2015;
} else if ((w >= 0x1921 && w <= 0x192A) || w == 0x1963 || (w >= 0x1C59 && w <= 0x1C7E) || (w >= 0x1DBB && w <= 0x1DC4)) {
w = 0;
} else {
w = cp936_ucs_table[w];
}
if (w <= 0) {
w = MBFL_BAD_INPUT;
}
CK((*filter->output_function)(w, filter->data));
} else {
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
}
break;
EMPTY_SWITCH_DEFAULT_CASE();
}
return 0;
}
int mbfl_filt_conv_wchar_euccn(int c, mbfl_convert_filter *filter)
{
int s = 0;
if (c >= ucs_a1_cp936_table_min && c < ucs_a1_cp936_table_max) {
if (c == 0xB7 || c == 0x144 || c == 0x148 || c == 0x251 || c == 0x261) {
s = 0;
} else {
s = ucs_a1_cp936_table[c - ucs_a1_cp936_table_min];
}
} else if (c >= ucs_a2_cp936_table_min && c < ucs_a2_cp936_table_max) {
if (c == 0x2015) {
s = 0xA1AA;
} else if (c == 0x2014 || (c >= 0x2170 && c <= 0x2179)) {
s = 0;
} else {
s = ucs_a2_cp936_table[c - ucs_a2_cp936_table_min];
}
} else if (c >= ucs_a3_cp936_table_min && c < ucs_a3_cp936_table_max) {
if (c == 0x30FB) {
s = 0xA1A4;
} else {
s = ucs_a3_cp936_table[c - ucs_a3_cp936_table_min];
}
} else if (c >= ucs_i_cp936_table_min && c < ucs_i_cp936_table_max) {
s = ucs_i_cp936_table[c - ucs_i_cp936_table_min];
} else if (c >= ucs_hff_cp936_table_min && c < ucs_hff_cp936_table_max) {
if (c == 0xFF04) {
s = 0xA1E7;
} else if (c == 0xFF5E) {
s = 0xA1AB;
} else if (c >= 0xFF01 && c <= 0xFF5D) {
s = c - 0xFF01 + 0xA3A1;
} else if (c >= 0xFFE0 && c <= 0xFFE5) {
s = ucs_hff_s_cp936_table[c - 0xFFE0];
}
}
/* exclude CP936 extensions */
if (((s >> 8) & 0xFF) < 0xA1 || (s & 0xFF) < 0xA1) {
s = 0;
}
if (s <= 0) {
if (c < 0x80) {
s = c;
} else if (s <= 0) {
s = -1;
}
}
if (s >= 0) {
if (s < 0x80) { /* latin */
CK((*filter->output_function)(s, filter->data));
} else {
CK((*filter->output_function)((s >> 8) & 0xFF, filter->data));
CK((*filter->output_function)(s & 0xFF, filter->data));
}
} else {
CK(mbfl_filt_conv_illegal_output(c, filter));
}
return 0;
}
static int mbfl_filt_conv_euccn_wchar_flush(mbfl_convert_filter *filter)
{
if (filter->status == 1) {
/* 2-byte character was truncated */
filter->status = 0;
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
}
if (filter->flush_function) {
(*filter->flush_function)(filter->data);
}
return 0;
}
static size_t mb_euccn_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
{
unsigned char *p = *in, *e = p + *in_len;
uint32_t *out = buf, *limit = buf + bufsize;
while (p < e && out < limit) {
unsigned char c = *p++;
if (c < 0x80) {
*out++ = c;
} else if (((c >= 0xA1 && c <= 0xA9) || (c >= 0xB0 && c <= 0xF7)) && p < e) {
unsigned char c2 = *p++;
if (c2 >= 0xA1 && c2 <= 0xFE) {
unsigned int w = (c - 0x81)*192 + c2 - 0x40;
ZEND_ASSERT(w < cp936_ucs_table_size);
if (w == 0x1864) {
w = 0x30FB;
} else if (w == 0x186A) {
w = 0x2015;
} else if ((w >= 0x1921 && w <= 0x192A) || w == 0x1963 || (w >= 0x1C59 && w <= 0x1C7E) || (w >= 0x1DBB && w <= 0x1DC4)) {
w = 0;
} else {
w = cp936_ucs_table[w];
}
if (!w)
w = MBFL_BAD_INPUT;
*out++ = w;
} else {
*out++ = MBFL_BAD_INPUT;
}
} else {
*out++ = MBFL_BAD_INPUT;
}
}
*in_len = e - p;
*in = p;
return out - buf;
}
static void mb_wchar_to_euccn(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
{
unsigned char *out, *limit;
MB_CONVERT_BUF_LOAD(buf, out, limit);
MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2);
while (len--) {
uint32_t w = *in++;
unsigned int s = 0;
if (w >= ucs_a1_cp936_table_min && w < ucs_a1_cp936_table_max) {
if (w != 0xB7 && w != 0x144 && w != 0x148 && w != 0x251 && w != 0x261) {
s = ucs_a1_cp936_table[w - ucs_a1_cp936_table_min];
}
} else if (w >= ucs_a2_cp936_table_min && w < ucs_a2_cp936_table_max) {
if (w == 0x2015) {
s = 0xA1AA;
} else if (w != 0x2014 && (w < 0x2170 || w > 0x2179)) {
s = ucs_a2_cp936_table[w - ucs_a2_cp936_table_min];
}
} else if (w >= ucs_a3_cp936_table_min && w < ucs_a3_cp936_table_max) {
if (w == 0x30FB) {
s = 0xA1A4;
} else {
s = ucs_a3_cp936_table[w - ucs_a3_cp936_table_min];
}
} else if (w >= ucs_i_cp936_table_min && w < ucs_i_cp936_table_max) {
s = ucs_i_cp936_table[w - ucs_i_cp936_table_min];
} else if (w >= ucs_hff_cp936_table_min && w < ucs_hff_cp936_table_max) {
if (w == 0xFF04) {
s = 0xA1E7;
} else if (w == 0xFF5E) {
s = 0xA1AB;
} else if (w >= 0xFF01 && w <= 0xFF5D) {
s = w - 0xFF01 + 0xA3A1;
} else if (w >= 0xFFE0 && w <= 0xFFE5) {
s = ucs_hff_s_cp936_table[w - 0xFFE0];
}
}
/* Exclude CP936 extensions */
if (((s >> 8) & 0xFF) < 0xA1 || (s & 0xFF) < 0xA1) {
s = 0;
}
if (!s) {
if (w < 0x80) {
out = mb_convert_buf_add(out, w);
} else {
MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_euccn);
MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2);
}
} else if (s < 0x80) {
out = mb_convert_buf_add(out, s);
} else {
out = mb_convert_buf_add2(out, (s >> 8) & 0xFF, s & 0xFF);
}
}
MB_CONVERT_BUF_STORE(buf, out, limit);
}

View file

@ -1,42 +0,0 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* The source code included in this files was separated from mbfilter_euc_cn.h
* by Moriyoshi Koizumi <moriyoshi@php.net> on 4 Dec 2002.
*
*/
#ifndef MBFL_MBFILTER_EUC_CN_H
#define MBFL_MBFILTER_EUC_CN_H
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_euc_cn;
extern const struct mbfl_convert_vtbl vtbl_euccn_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_euccn;
int mbfl_filt_conv_euccn_wchar(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_wchar_euccn(int c, mbfl_convert_filter *filter);
#endif /* MBFL_MBFILTER_EUC_CN_H */

View file

@ -1,373 +0,0 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* The source code included in this file was separated from mbfilter_ja.c
* by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
*
*/
#include "mbfilter.h"
#include "mbfilter_euc_jp.h"
#include "unicode_table_cp932_ext.h"
#include "unicode_table_jis.h"
static int mbfl_filt_conv_eucjp_wchar_flush(mbfl_convert_filter *filter);
static size_t mb_eucjp_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
static void mb_wchar_to_eucjp(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
const unsigned char mblen_table_eucjp[] = { /* 0xA1-0xFE */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
};
static const char *mbfl_encoding_euc_jp_aliases[] = {"EUC", "EUC_JP", "eucJP", "x-euc-jp", NULL};
const mbfl_encoding mbfl_encoding_euc_jp = {
mbfl_no_encoding_euc_jp,
"EUC-JP",
"EUC-JP",
mbfl_encoding_euc_jp_aliases,
mblen_table_eucjp,
0,
&vtbl_eucjp_wchar,
&vtbl_wchar_eucjp,
mb_eucjp_to_wchar,
mb_wchar_to_eucjp,
NULL
};
const struct mbfl_convert_vtbl vtbl_eucjp_wchar = {
mbfl_no_encoding_euc_jp,
mbfl_no_encoding_wchar,
mbfl_filt_conv_common_ctor,
NULL,
mbfl_filt_conv_eucjp_wchar,
mbfl_filt_conv_eucjp_wchar_flush,
NULL,
};
const struct mbfl_convert_vtbl vtbl_wchar_eucjp = {
mbfl_no_encoding_wchar,
mbfl_no_encoding_euc_jp,
mbfl_filt_conv_common_ctor,
NULL,
mbfl_filt_conv_wchar_eucjp,
mbfl_filt_conv_common_flush,
NULL,
};
#define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
/*
* EUC-JP => wchar
*/
int
mbfl_filt_conv_eucjp_wchar(int c, mbfl_convert_filter *filter)
{
int c1, s, w = 0;
switch (filter->status) {
case 0:
if (c >= 0 && c < 0x80) { /* latin */
CK((*filter->output_function)(c, filter->data));
} else if (c > 0xa0 && c < 0xff) { /* X 0208 first char */
filter->status = 1;
filter->cache = c;
} else if (c == 0x8e) { /* kana first char */
filter->status = 2;
} else if (c == 0x8f) { /* X 0212 first char */
filter->status = 3;
} else {
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
}
break;
case 1: /* got first half */
filter->status = 0;
c1 = filter->cache;
if (c > 0xa0 && c < 0xff) {
s = (c1 - 0xa1)*94 + c - 0xa1;
if (s >= 0 && s < jisx0208_ucs_table_size) {
w = jisx0208_ucs_table[s];
if (!w)
w = MBFL_BAD_INPUT;
} else {
w = MBFL_BAD_INPUT;
}
CK((*filter->output_function)(w, filter->data));
} else {
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
}
break;
case 2: /* got 0x8e */
filter->status = 0;
if (c > 0xa0 && c < 0xe0) {
w = 0xfec0 + c;
CK((*filter->output_function)(w, filter->data));
} else {
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
}
break;
case 3: /* got 0x8f, JIS X 0212 first byte */
filter->status++;
filter->cache = c;
break;
case 4: /* got 0x8f, JIS X 0212 second byte */
filter->status = 0;
c1 = filter->cache;
if (c > 0xA0 && c < 0xFF && c1 > 0xA0 && c1 < 0xFF) {
s = (c1 - 0xa1)*94 + c - 0xa1;
if (s >= 0 && s < jisx0212_ucs_table_size) {
w = jisx0212_ucs_table[s];
if (!w)
w = MBFL_BAD_INPUT;
} else {
w = MBFL_BAD_INPUT;
}
CK((*filter->output_function)(w, filter->data));
} else {
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
}
break;
EMPTY_SWITCH_DEFAULT_CASE();
}
return 0;
}
static int mbfl_filt_conv_eucjp_wchar_flush(mbfl_convert_filter *filter)
{
if (filter->status) {
(*filter->output_function)(MBFL_BAD_INPUT, filter->data);
filter->status = 0;
}
if (filter->flush_function) {
(*filter->flush_function)(filter->data);
}
return 0;
}
/*
* wchar => EUC-JP
*/
int
mbfl_filt_conv_wchar_eucjp(int c, mbfl_convert_filter *filter)
{
int s = 0;
if (c == 0xAF) { /* U+00AF is MACRON */
s = 0xA2B4; /* Use JIS X 0212 overline */
} else if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) {
s = ucs_a1_jis_table[c - ucs_a1_jis_table_min];
} else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) {
s = ucs_a2_jis_table[c - ucs_a2_jis_table_min];
} else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) {
s = ucs_i_jis_table[c - ucs_i_jis_table_min];
} else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) {
s = ucs_r_jis_table[c - ucs_r_jis_table_min];
}
if (s <= 0) {
if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */
s = 0x2140;
} else if (c == 0x2225) { /* PARALLEL TO */
s = 0x2142;
} else if (c == 0xff0d) { /* FULLWIDTH HYPHEN-MINUS */
s = 0x215d;
} else if (c == 0xffe0) { /* FULLWIDTH CENT SIGN */
s = 0x2171;
} else if (c == 0xffe1) { /* FULLWIDTH POUND SIGN */
s = 0x2172;
} else if (c == 0xffe2) { /* FULLWIDTH NOT SIGN */
s = 0x224c;
} else if (c == 0) {
s = 0;
} else {
s = -1;
}
}
if (s >= 0) {
if (s < 0x80) { /* latin */
CK((*filter->output_function)(s, filter->data));
} else if (s < 0x100) { /* kana */
CK((*filter->output_function)(0x8e, filter->data));
CK((*filter->output_function)(s, filter->data));
} else if (s < 0x8080) { /* X 0208 */
CK((*filter->output_function)(((s >> 8) & 0xff) | 0x80, filter->data));
CK((*filter->output_function)((s & 0xff) | 0x80, filter->data));
} else { /* X 0212 */
CK((*filter->output_function)(0x8f, filter->data));
CK((*filter->output_function)(((s >> 8) & 0xff) | 0x80, filter->data));
CK((*filter->output_function)((s & 0xff) | 0x80, filter->data));
}
} else {
CK(mbfl_filt_conv_illegal_output(c, filter));
}
return 0;
}
static size_t mb_eucjp_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
{
unsigned char *p = *in, *e = p + *in_len;
uint32_t *out = buf, *limit = buf + bufsize;
while (p < e && out < limit) {
unsigned char c = *p++;
if (c < 0x80) {
*out++ = c;
} else if (c >= 0xA1 && c <= 0xFE && p < e) {
/* JISX 0208 */
unsigned char c2 = *p++;
if (c2 >= 0xA1 && c2 <= 0xFE) {
unsigned int s = (c - 0xA1)*94 + c2 - 0xA1;
if (s < jisx0208_ucs_table_size) {
uint32_t w = jisx0208_ucs_table[s];
if (!w)
w = MBFL_BAD_INPUT;
*out++ = w;
} else {
*out++ = MBFL_BAD_INPUT;
}
} else {
*out++ = MBFL_BAD_INPUT;
}
} else if (c == 0x8E && p < e) {
/* Kana */
unsigned char c2 = *p++;
*out++ = (c2 >= 0xA1 && c2 <= 0xDF) ? 0xFEC0 + c2 : MBFL_BAD_INPUT;
} else if (c == 0x8F) {
/* JISX 0212 */
if ((e - p) >= 2) {
unsigned char c2 = *p++;
unsigned char c3 = *p++;
if (c3 >= 0xA1 && c3 <= 0xFE && c2 >= 0xA1 && c2 <= 0xFE) {
unsigned int s = (c2 - 0xA1)*94 + c3 - 0xA1;
if (s < jisx0212_ucs_table_size) {
uint32_t w = jisx0212_ucs_table[s];
if (!w)
w = MBFL_BAD_INPUT;
*out++ = w;
} else {
*out++ = MBFL_BAD_INPUT;
}
} else {
*out++ = MBFL_BAD_INPUT;
}
} else {
*out++ = MBFL_BAD_INPUT;
p = e; /* Jump to end of string */
}
} else {
*out++ = MBFL_BAD_INPUT;
}
}
*in_len = e - p;
*in = p;
return out - buf;
}
static void mb_wchar_to_eucjp(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
{
unsigned char *out, *limit;
MB_CONVERT_BUF_LOAD(buf, out, limit);
MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2);
while (len--) {
uint32_t w = *in++;
unsigned int s = 0;
if (w == 0xAF) { /* U+00AF is MACRON */
s = 0xA2B4; /* Use JIS X 0212 overline */
} else if (w >= ucs_a1_jis_table_min && w < ucs_a1_jis_table_max) {
s = ucs_a1_jis_table[w - ucs_a1_jis_table_min];
} else if (w >= ucs_a2_jis_table_min && w < ucs_a2_jis_table_max) {
s = ucs_a2_jis_table[w - ucs_a2_jis_table_min];
} else if (w >= ucs_i_jis_table_min && w < ucs_i_jis_table_max) {
s = ucs_i_jis_table[w - ucs_i_jis_table_min];
} else if (w >= ucs_r_jis_table_min && w < ucs_r_jis_table_max) {
s = ucs_r_jis_table[w - ucs_r_jis_table_min];
}
if (s == 0) {
if (w == 0xFF3C) { /* FULLWIDTH REVERSE SOLIDUS */
s = 0x2140;
} else if (w == 0x2225) { /* PARALLEL TO */
s = 0x2142;
} else if (w == 0xFF0D) { /* FULLWIDTH HYPHEN-MINUS */
s = 0x215D;
} else if (w == 0xFFE0) { /* FULLWIDTH CENT SIGN */
s = 0x2171;
} else if (w == 0xFFE1) { /* FULLWIDTH POUND SIGN */
s = 0x2172;
} else if (w == 0xFFE2) { /* FULLWIDTH NOT SIGN */
s = 0x224C;
} else if (w == 0) {
out = mb_convert_buf_add(out, 0);
continue;
} else {
MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_eucjp);
MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2);
continue;
}
}
if (s < 0x80) {
out = mb_convert_buf_add(out, s);
} else if (s < 0x100) {
out = mb_convert_buf_add2(out, 0x8E, s);
} else if (s < 0x8080) {
out = mb_convert_buf_add2(out, ((s >> 8) & 0xFF) | 0x80, (s & 0xFF) | 0x80);
} else {
MB_CONVERT_BUF_ENSURE(buf, out, limit, (len * 2) + 3);
out = mb_convert_buf_add3(out, 0x8F, ((s >> 8) & 0xFF) | 0x80, (s & 0xFF) | 0x80);
}
}
MB_CONVERT_BUF_STORE(buf, out, limit);
}

View file

@ -1,42 +0,0 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* The source code included in this files was separated from mbfilter_ja.h
* by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
*
*/
#ifndef MBFL_MBFILTER_EUC_JP_H
#define MBFL_MBFILTER_EUC_JP_H
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_euc_jp;
extern const struct mbfl_convert_vtbl vtbl_eucjp_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_eucjp;
int mbfl_filt_conv_eucjp_wchar(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_wchar_eucjp(int c, mbfl_convert_filter *filter);
#endif /* MBFL_MBFILTER_EUC_JP_H */

View file

@ -1,39 +0,0 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* The source code included in this files was separated from mbfilter_ja.h
* by rui hirokawa <hirokawa@php.net> on 15 aug 2011.
*
*/
#ifndef MBFL_MBFILTER_EUC_JP_2004_H
#define MBFL_MBFILTER_EUC_JP_2004_H
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_eucjp2004;
extern const struct mbfl_convert_vtbl vtbl_eucjp2004_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_eucjp2004;
#endif /* MBFL_MBFILTER_EUC_JP_2004_H */

View file

@ -1,536 +0,0 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* The source code included in this file was separated from mbfilter_ja.c
* by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
*
*/
#include "mbfilter.h"
#include "mbfilter_euc_jp_win.h"
#include "unicode_table_cp932_ext.h"
#include "unicode_table_jis.h"
#include "cp932_table.h"
static int mbfl_filt_conv_eucjpwin_wchar_flush(mbfl_convert_filter *filter);
static size_t mb_eucjpwin_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
static void mb_wchar_to_eucjpwin(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
static const unsigned char mblen_table_eucjp[] = { /* 0xA1-0xFE */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
};
static const char *mbfl_encoding_eucjp_win_aliases[] = {"eucJP-open", "eucJP-ms", NULL};
const mbfl_encoding mbfl_encoding_eucjp_win = {
mbfl_no_encoding_eucjp_win,
"eucJP-win",
"EUC-JP",
mbfl_encoding_eucjp_win_aliases,
mblen_table_eucjp,
0,
&vtbl_eucjpwin_wchar,
&vtbl_wchar_eucjpwin,
mb_eucjpwin_to_wchar,
mb_wchar_to_eucjpwin,
NULL
};
const struct mbfl_convert_vtbl vtbl_eucjpwin_wchar = {
mbfl_no_encoding_eucjp_win,
mbfl_no_encoding_wchar,
mbfl_filt_conv_common_ctor,
NULL,
mbfl_filt_conv_eucjpwin_wchar,
mbfl_filt_conv_eucjpwin_wchar_flush,
NULL,
};
const struct mbfl_convert_vtbl vtbl_wchar_eucjpwin = {
mbfl_no_encoding_wchar,
mbfl_no_encoding_eucjp_win,
mbfl_filt_conv_common_ctor,
NULL,
mbfl_filt_conv_wchar_eucjpwin,
mbfl_filt_conv_common_flush,
NULL,
};
#define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
int mbfl_filt_conv_eucjpwin_wchar(int c, mbfl_convert_filter *filter)
{
int c1, s, w, n;
switch (filter->status) {
case 0:
if (c >= 0 && c < 0x80) { /* latin */
CK((*filter->output_function)(c, filter->data));
} else if (c >= 0xa1 && c <= 0xfe) { /* CP932 first char */
filter->status = 1;
filter->cache = c;
} else if (c == 0x8e) { /* kana first char */
filter->status = 2;
} else if (c == 0x8f) { /* X 0212 first char */
filter->status = 3;
} else {
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
}
break;
case 1: /* got first half */
filter->status = 0;
c1 = filter->cache;
if (c > 0xa0 && c < 0xff) {
w = 0;
s = (c1 - 0xa1)*94 + c - 0xa1;
if (s <= 137) {
if (s == 31) {
w = 0xff3c; /* FULLWIDTH REVERSE SOLIDUS */
} else if (s == 32) {
w = 0xff5e; /* FULLWIDTH TILDE */
} else if (s == 33) {
w = 0x2225; /* PARALLEL TO */
} else if (s == 60) {
w = 0xff0d; /* FULLWIDTH HYPHEN-MINUS */
} else if (s == 80) {
w = 0xffe0; /* FULLWIDTH CENT SIGN */
} else if (s == 81) {
w = 0xffe1; /* FULLWIDTH POUND SIGN */
} else if (s == 137) {
w = 0xffe2; /* FULLWIDTH NOT SIGN */
}
}
if (w == 0) {
if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) { /* vendor ext1 (13ku) */
w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min];
} else if (s >= 0 && s < jisx0208_ucs_table_size) { /* X 0208 */
w = jisx0208_ucs_table[s];
} else if (s >= (84 * 94)) { /* user (85ku - 94ku) */
w = s - (84 * 94) + 0xe000;
}
}
if (w <= 0) {
w = MBFL_BAD_INPUT;
}
CK((*filter->output_function)(w, filter->data));
} else {
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
}
break;
case 2: /* got 0x8e, X0201 kana */
filter->status = 0;
if (c > 0xa0 && c < 0xe0) {
w = 0xfec0 + c;
CK((*filter->output_function)(w, filter->data));
} else {
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
}
break;
case 3: /* got 0x8f, X 0212 first char */
filter->status++;
filter->cache = c;
break;
case 4: /* got 0x8f, X 0212 second char */
filter->status = 0;
c1 = filter->cache;
if (c1 > 0xa0 && c1 < 0xff && c > 0xa0 && c < 0xff) {
s = (c1 - 0xa1)*94 + c - 0xa1;
if (s >= 0 && s < jisx0212_ucs_table_size) {
w = jisx0212_ucs_table[s];
if (w == 0x007e) {
w = 0xff5e; /* FULLWIDTH TILDE */
}
} else if (s >= (82*94) && s < (84*94)) { /* vender ext3 (83ku - 84ku) <-> CP932 (115ku -120ku) */
s = (c1 << 8) | c;
w = 0;
n = 0;
while (n < cp932ext3_eucjp_table_size) {
if (s == cp932ext3_eucjp_table[n]) {
if (n < (cp932ext3_ucs_table_max - cp932ext3_ucs_table_min)) {
w = cp932ext3_ucs_table[n];
}
break;
}
n++;
}
} else if (s >= (84*94)) { /* user (85ku - 94ku) */
w = s - (84*94) + (0xe000 + (94*10));
} else {
w = 0;
}
if (w == 0x00A6) {
w = 0xFFE4; /* FULLWIDTH BROKEN BAR */
}
if (w <= 0) {
w = MBFL_BAD_INPUT;
}
CK((*filter->output_function)(w, filter->data));
} else {
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
}
break;
EMPTY_SWITCH_DEFAULT_CASE();
}
return 0;
}
static int mbfl_filt_conv_eucjpwin_wchar_flush(mbfl_convert_filter *filter)
{
if (filter->status) {
(*filter->output_function)(MBFL_BAD_INPUT, filter->data);
filter->status = 0;
}
if (filter->flush_function) {
(*filter->flush_function)(filter->data);
}
return 0;
}
int mbfl_filt_conv_wchar_eucjpwin(int c, mbfl_convert_filter *filter)
{
int c1, c2, s1 = 0;
if (c == 0xAF) { /* U+00AF is MACRON */
s1 = 0xA2B4; /* Use JIS X 0212 overline */
} else if (c == 0x203E) {
s1 = 0x7E;
} else if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) {
s1 = ucs_a1_jis_table[c - ucs_a1_jis_table_min];
} else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) {
s1 = ucs_a2_jis_table[c - ucs_a2_jis_table_min];
} else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) {
s1 = ucs_i_jis_table[c - ucs_i_jis_table_min];
} else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) {
s1 = ucs_r_jis_table[c - ucs_r_jis_table_min];
} else if (c >= 0xe000 && c < (0xe000 + 10*94)) { /* user (X0208 85ku - 94ku) */
s1 = c - 0xe000;
c1 = s1/94 + 0x75;
c2 = s1%94 + 0x21;
s1 = (c1 << 8) | c2;
} else if (c >= (0xe000 + 10*94) && c < (0xe000 + 20*94)) { /* user (X0212 85ku - 94ku) */
s1 = c - (0xe000 + 10*94);
c1 = s1/94 + 0xf5;
c2 = s1%94 + 0xa1;
s1 = (c1 << 8) | c2;
}
if (s1 == 0xa2f1) {
s1 = 0x2d62; /* NUMERO SIGN */
}
if (s1 <= 0) {
if (c == 0xa5) { /* YEN SIGN */
s1 = 0x5C;
} else if (c == 0x2014) {
s1 = 0x213D;
} else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */
s1 = 0x2140;
} else if (c == 0x2225) { /* PARALLEL TO */
s1 = 0x2142;
} else if (c == 0xff0d) { /* FULLWIDTH HYPHEN-MINUS */
s1 = 0x215d;
} else if (c == 0xffe0) { /* FULLWIDTH CENT SIGN */
s1 = 0x2171;
} else if (c == 0xffe1) { /* FULLWIDTH POUND SIGN */
s1 = 0x2172;
} else if (c == 0xffe2) { /* FULLWIDTH NOT SIGN */
s1 = 0x224c;
} else {
s1 = -1;
c1 = 0;
c2 = cp932ext1_ucs_table_max - cp932ext1_ucs_table_min;
while (c1 < c2) { /* CP932 vendor ext1 (13ku) */
const int oh = cp932ext1_ucs_table_min / 94;
if (c == cp932ext1_ucs_table[c1]) {
s1 = ((c1 / 94 + oh + 0x21) << 8) + (c1 % 94 + 0x21);
break;
}
c1++;
}
if (s1 < 0) {
c1 = 0;
c2 = cp932ext3_ucs_table_max - cp932ext3_ucs_table_min;
while (c1 < c2) { /* CP932 vendor ext3 (115ku - 119ku) */
if (c == cp932ext3_ucs_table[c1]) {
if (c1 < cp932ext3_eucjp_table_size) {
s1 = cp932ext3_eucjp_table[c1];
}
break;
}
c1++;
}
}
}
if (c == 0) {
s1 = 0;
} else if (s1 <= 0) {
s1 = -1;
}
}
if (s1 >= 0) {
if (s1 < 0x80) { /* latin */
CK((*filter->output_function)(s1, filter->data));
} else if (s1 < 0x100) { /* kana */
CK((*filter->output_function)(0x8e, filter->data));
CK((*filter->output_function)(s1, filter->data));
} else if (s1 < 0x8080) { /* X 0208 */
CK((*filter->output_function)(((s1 >> 8) & 0xff) | 0x80, filter->data));
CK((*filter->output_function)((s1 & 0xff) | 0x80, filter->data));
} else { /* X 0212 */
CK((*filter->output_function)(0x8f, filter->data));
CK((*filter->output_function)(((s1 >> 8) & 0xff) | 0x80, filter->data));
CK((*filter->output_function)((s1 & 0xff) | 0x80, filter->data));
}
} else {
CK(mbfl_filt_conv_illegal_output(c, filter));
}
return 0;
}
static size_t mb_eucjpwin_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
{
unsigned char *p = *in, *e = p + *in_len;
uint32_t *out = buf, *limit = buf + bufsize;
while (p < e && out < limit) {
unsigned char c = *p++;
if (c < 0x80) {
*out++ = c;
} else if (c >= 0xA1 && c <= 0xFE && p < e) {
unsigned char c2 = *p++;
if (c2 >= 0xA1 && c2 <= 0xFE) {
unsigned int s = (c - 0xA1)*94 + c2 - 0xA1, w = 0;
if (s <= 137) {
if (s == 31) {
w = 0xFF3C; /* FULLWIDTH REVERSE SOLIDUS */
} else if (s == 32) {
w = 0xFF5E; /* FULLWIDTH TILDE */
} else if (s == 33) {
w = 0x2225; /* PARALLEL TO */
} else if (s == 60) {
w = 0xFF0D; /* FULLWIDTH HYPHEN-MINUS */
} else if (s == 80) {
w = 0xFFE0; /* FULLWIDTH CENT SIGN */
} else if (s == 81) {
w = 0xFFE1; /* FULLWIDTH POUND SIGN */
} else if (s == 137) {
w = 0xFFE2; /* FULLWIDTH NOT SIGN */
}
}
if (w == 0) {
if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) {
w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min];
} else if (s < jisx0208_ucs_table_size) {
w = jisx0208_ucs_table[s];
} else if (s >= (84 * 94)) {
w = s - (84 * 94) + 0xE000;
}
}
if (!w)
w = MBFL_BAD_INPUT;
*out++ = w;
} else {
*out++ = MBFL_BAD_INPUT;
}
} else if (c == 0x8E && p < e) {
unsigned char c2 = *p++;
if (c2 >= 0xA1 && c2 <= 0xDF) {
*out++ = 0xFEC0 + c2;
} else {
*out++ = MBFL_BAD_INPUT;
}
} else if (c == 0x8F && p < e) {
unsigned char c2 = *p++;
if (p == e) {
*out++ = MBFL_BAD_INPUT;
continue;
}
unsigned char c3 = *p++;
if (c2 >= 0xA1 && c2 <= 0xFE && c3 >= 0xA1 && c3 <= 0xFE) {
unsigned int s = (c2 - 0xA1)*94 + c3 - 0xA1, w = 0;
if (s < jisx0212_ucs_table_size) {
w = jisx0212_ucs_table[s];
if (w == 0x7E)
w = 0xFF5E; /* FULLWIDTH TILDE */
} else if (s >= (82*94) && s < (84*94)) {
s = (c2 << 8) | c3;
for (int i = 0; i < cp932ext3_eucjp_table_size; i++) {
if (cp932ext3_eucjp_table[i] == s) {
w = cp932ext3_ucs_table[i];
break;
}
}
} else if (s >= (84*94)) {
w = s - (84*94) + 0xE000 + (94*10);
}
if (w == 0xA6)
w = 0xFFE4; /* FULLWIDTH BROKEN BAR */
if (!w)
w = MBFL_BAD_INPUT;
*out++ = w;
} else {
*out++ = MBFL_BAD_INPUT;
}
} else {
*out++ = MBFL_BAD_INPUT;
}
}
*in_len = e - p;
*in = p;
return out - buf;
}
static void mb_wchar_to_eucjpwin(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
{
unsigned char *out, *limit;
MB_CONVERT_BUF_LOAD(buf, out, limit);
MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2);
while (len--) {
uint32_t w = *in++;
unsigned int s = 0;
if (w == 0) {
out = mb_convert_buf_add(out, 0);
continue;
} else if (w == 0xAF) { /* U+00AF is MACRON */
s = 0xA2B4; /* Use JIS X 0212 overline */
} else if (w == 0x203E) {
s = 0x7E;
} else if (w >= ucs_a1_jis_table_min && w < ucs_a1_jis_table_max) {
s = ucs_a1_jis_table[w - ucs_a1_jis_table_min];
} else if (w >= ucs_a2_jis_table_min && w < ucs_a2_jis_table_max) {
s = ucs_a2_jis_table[w - ucs_a2_jis_table_min];
} else if (w >= ucs_i_jis_table_min && w < ucs_i_jis_table_max) {
s = ucs_i_jis_table[w - ucs_i_jis_table_min];
} else if (w >= ucs_r_jis_table_min && w < ucs_r_jis_table_max) {
s = ucs_r_jis_table[w - ucs_r_jis_table_min];
} else if (w >= 0xE000 && w < (0xE000 + 10*94)) {
s = w - 0xE000;
s = ((s/94 + 0x75) << 8) + (s%94) + 0x21;
} else if (w >= (0xE000 + 10*94) && w < (0xE000 + 20*94)) {
s = w - (0xE000 + 10*94);
s = ((s/94 + 0xF5) << 8) + (s%94) + 0xA1;
}
if (s == 0xA2F1)
s = 0x2D62; /* NUMERO SIGN */
if (s == 0) {
if (w == 0xA5) { /* YEN SIGN */
s = 0x5C;
} else if (w == 0x2014) { /* EM DASH */
s = 0x213D;
} else if (w == 0xFF3C) { /* FULLWIDTH REVERSE SOLIDUS */
s = 0x2140;
} else if (w == 0x2225) { /* PARALLEL TO */
s = 0x2142;
} else if (w == 0xFF0D) { /* FULLWIDTH HYPHEN-MINUS */
s = 0x215D;
} else if (w == 0xFFE0) { /* FULLWIDTH CENT SIGN */
s = 0x2171;
} else if (w == 0xFFE1) { /* FULLWIDTH POUND SIGN */
s = 0x2172;
} else if (w == 0xFFE2) { /* FULLWIDTH NOT SIGN */
s = 0x224C;
} else {
for (int i = 0; i < cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; i++) {
if (cp932ext1_ucs_table[i] == w) {
s = (((i/94) + (cp932ext1_ucs_table_min/94) + 0x21) << 8) + (i%94) + 0x21;
break;
}
}
if (!s) {
for (int i = 0; i < cp932ext3_ucs_table_max - cp932ext3_ucs_table_min; i++) {
if (cp932ext3_ucs_table[i] == w) {
s = cp932ext3_eucjp_table[i];
break;
}
}
}
}
}
if (!s) {
MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_eucjpwin);
MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2);
} else if (s < 0x80) {
out = mb_convert_buf_add(out, s);
} else if (s < 0x100) {
out = mb_convert_buf_add2(out, 0x8E, s);
} else if (s < 0x8080) {
out = mb_convert_buf_add2(out, ((s >> 8) & 0xFF) | 0x80, (s & 0xFF) | 0x80);
} else {
MB_CONVERT_BUF_ENSURE(buf, out, limit, (len * 2) + 3);
out = mb_convert_buf_add3(out, 0x8F, ((s >> 8) & 0xFF) | 0x80, (s & 0xFF) | 0x80);
}
}
MB_CONVERT_BUF_STORE(buf, out, limit);
}

View file

@ -1,42 +0,0 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* The source code included in this files was separated from mbfilter_ja.h
* by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
*
*/
#ifndef MBFL_MBFILTER_EUC_JP_WIN_H
#define MBFL_MBFILTER_EUC_JP_WIN_H
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_eucjp_win;
extern const struct mbfl_convert_vtbl vtbl_eucjpwin_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_eucjpwin;
int mbfl_filt_conv_eucjpwin_wchar(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_wchar_eucjpwin(int c, mbfl_convert_filter *filter);
#endif /* MBFL_MBFILTER_EUC_JP_WIN_H */

View file

@ -1,297 +0,0 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* The source code included in this files was separated from mbfilter_kr.c
* by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
*
*/
#include "mbfilter.h"
#include "mbfilter_euc_kr.h"
#include "unicode_table_uhc.h"
static int mbfl_filt_conv_euckr_wchar_flush(mbfl_convert_filter *filter);
static size_t mb_euckr_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
static void mb_wchar_to_euckr(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
static const unsigned char mblen_table_euckr[] = { /* 0xA1-0xFE */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
};
static const char *mbfl_encoding_euc_kr_aliases[] = {"EUC_KR", "eucKR", "x-euc-kr", NULL};
const mbfl_encoding mbfl_encoding_euc_kr = {
mbfl_no_encoding_euc_kr,
"EUC-KR",
"EUC-KR",
mbfl_encoding_euc_kr_aliases,
mblen_table_euckr,
0,
&vtbl_euckr_wchar,
&vtbl_wchar_euckr,
mb_euckr_to_wchar,
mb_wchar_to_euckr,
NULL
};
const struct mbfl_convert_vtbl vtbl_euckr_wchar = {
mbfl_no_encoding_euc_kr,
mbfl_no_encoding_wchar,
mbfl_filt_conv_common_ctor,
NULL,
mbfl_filt_conv_euckr_wchar,
mbfl_filt_conv_euckr_wchar_flush,
NULL,
};
const struct mbfl_convert_vtbl vtbl_wchar_euckr = {
mbfl_no_encoding_wchar,
mbfl_no_encoding_euc_kr,
mbfl_filt_conv_common_ctor,
NULL,
mbfl_filt_conv_wchar_euckr,
mbfl_filt_conv_common_flush,
NULL,
};
#define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
int mbfl_filt_conv_euckr_wchar(int c, mbfl_convert_filter *filter)
{
int c1, w, flag;
switch (filter->status) {
case 0:
if (c >= 0 && c < 0x80) { /* latin */
CK((*filter->output_function)(c, filter->data));
} else if (((c >= 0xA1 && c <= 0xAC) || (c >= 0xB0 && c <= 0xFD)) && c != 0xC9) { /* dbcs lead byte */
filter->status = 1;
filter->cache = c;
} else {
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
}
break;
case 1: /* dbcs second byte */
filter->status = 0;
c1 = filter->cache;
flag = 0;
if (c1 >= 0xa1 && c1 <= 0xc6) {
flag = 1;
} else if (c1 >= 0xc7 && c1 <= 0xfe && c1 != 0xc9) {
flag = 2;
}
if (flag > 0 && c >= 0xa1 && c <= 0xfe) {
if (flag == 1) { /* 1st: 0xa1..0xc6, 2nd: 0x41..0x7a, 0x81..0xfe */
w = (c1 - 0x81)*190 + c - 0x41;
ZEND_ASSERT(w < uhc1_ucs_table_size);
w = uhc1_ucs_table[w];
} else { /* 1st: 0xc7..0xc8,0xca..0xfe, 2nd: 0xa1..0xfe */
w = (c1 - 0xc7)*94 + c - 0xa1;
ZEND_ASSERT(w < uhc3_ucs_table_size);
w = uhc3_ucs_table[w];
}
if (w <= 0) {
w = MBFL_BAD_INPUT;
}
CK((*filter->output_function)(w, filter->data));
} else {
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
}
break;
EMPTY_SWITCH_DEFAULT_CASE();
}
return 0;
}
int mbfl_filt_conv_wchar_euckr(int c, mbfl_convert_filter *filter)
{
int s = 0;
if (c >= ucs_a1_uhc_table_min && c < ucs_a1_uhc_table_max) {
s = ucs_a1_uhc_table[c - ucs_a1_uhc_table_min];
} else if (c >= ucs_a2_uhc_table_min && c < ucs_a2_uhc_table_max) {
s = ucs_a2_uhc_table[c - ucs_a2_uhc_table_min];
} else if (c >= ucs_a3_uhc_table_min && c < ucs_a3_uhc_table_max) {
s = ucs_a3_uhc_table[c - ucs_a3_uhc_table_min];
} else if (c >= ucs_i_uhc_table_min && c < ucs_i_uhc_table_max) {
s = ucs_i_uhc_table[c - ucs_i_uhc_table_min];
} else if (c >= ucs_s_uhc_table_min && c < ucs_s_uhc_table_max) {
s = ucs_s_uhc_table[c - ucs_s_uhc_table_min];
} else if (c >= ucs_r1_uhc_table_min && c < ucs_r1_uhc_table_max) {
s = ucs_r1_uhc_table[c - ucs_r1_uhc_table_min];
} else if (c >= ucs_r2_uhc_table_min && c < ucs_r2_uhc_table_max) {
s = ucs_r2_uhc_table[c - ucs_r2_uhc_table_min];
}
/* exclude UHC extension area (although we are using the UHC conversion tables) */
if (((s >> 8) & 0xFF) < 0xA1 || (s & 0xFF) < 0xA1) {
s = 0;
}
if (s <= 0) {
if (c < 0x80) {
s = c;
} else {
s = -1;
}
}
if (s >= 0) {
if (s < 0x80) { /* latin */
CK((*filter->output_function)(s, filter->data));
} else {
CK((*filter->output_function)((s >> 8) & 0xff, filter->data));
CK((*filter->output_function)(s & 0xff, filter->data));
}
} else {
CK(mbfl_filt_conv_illegal_output(c, filter));
}
return 0;
}
static int mbfl_filt_conv_euckr_wchar_flush(mbfl_convert_filter *filter)
{
if (filter->status == 1) {
/* 2-byte character was truncated */
filter->status = 0;
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
}
if (filter->flush_function) {
(*filter->flush_function)(filter->data);
}
return 0;
}
static size_t mb_euckr_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
{
unsigned char *p = *in, *e = p + *in_len;
uint32_t *out = buf, *limit = buf + bufsize;
while (p < e && out < limit) {
unsigned char c = *p++;
if (c < 0x80) {
*out++ = c;
} else if (((c >= 0xA1 && c <= 0xAC) || (c >= 0xB0 && c <= 0xFD)) && c != 0xC9 && p < e) {
unsigned char c2 = *p++;
if (c2 < 0xA1 || c2 == 0xFF) {
*out++ = MBFL_BAD_INPUT;
continue;
}
if (c <= 0xC6) {
unsigned int w = (c - 0x81)*190 + c2 - 0x41;
ZEND_ASSERT(w < uhc1_ucs_table_size);
w = uhc1_ucs_table[w];
if (!w)
w = MBFL_BAD_INPUT;
*out++ = w;
} else {
unsigned int w = (c - 0xC7)*94 + c2 - 0xA1;
ZEND_ASSERT(w < uhc3_ucs_table_size);
w = uhc3_ucs_table[w];
if (!w)
w = MBFL_BAD_INPUT;
*out++ = w;
}
} else {
*out++ = MBFL_BAD_INPUT;
}
}
*in_len = e - p;
*in = p;
return out - buf;
}
static void mb_wchar_to_euckr(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
{
unsigned char *out, *limit;
MB_CONVERT_BUF_LOAD(buf, out, limit);
MB_CONVERT_BUF_ENSURE(buf, out, limit, len);
while (len--) {
uint32_t w = *in++;
unsigned int s = 0;
if (w >= ucs_a1_uhc_table_min && w < ucs_a1_uhc_table_max) {
s = ucs_a1_uhc_table[w - ucs_a1_uhc_table_min];
} else if (w >= ucs_a2_uhc_table_min && w < ucs_a2_uhc_table_max) {
s = ucs_a2_uhc_table[w - ucs_a2_uhc_table_min];
} else if (w >= ucs_a3_uhc_table_min && w < ucs_a3_uhc_table_max) {
s = ucs_a3_uhc_table[w - ucs_a3_uhc_table_min];
} else if (w >= ucs_i_uhc_table_min && w < ucs_i_uhc_table_max) {
s = ucs_i_uhc_table[w - ucs_i_uhc_table_min];
} else if (w >= ucs_s_uhc_table_min && w < ucs_s_uhc_table_max) {
s = ucs_s_uhc_table[w - ucs_s_uhc_table_min];
} else if (w >= ucs_r1_uhc_table_min && w < ucs_r1_uhc_table_max) {
s = ucs_r1_uhc_table[w - ucs_r1_uhc_table_min];
} else if (w >= ucs_r2_uhc_table_min && w < ucs_r2_uhc_table_max) {
s = ucs_r2_uhc_table[w - ucs_r2_uhc_table_min];
}
/* Exclude UHC extension area (although we are using the UHC conversion tables) */
if (((s >> 8) & 0xFF) < 0xA1 || (s & 0xFF) < 0xA1) {
s = 0;
}
if (!s) {
if (w < 0x80) {
out = mb_convert_buf_add(out, w);
} else {
MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_euckr);
MB_CONVERT_BUF_ENSURE(buf, out, limit, len);
}
} else if (s < 0x80) {
out = mb_convert_buf_add(out, s);
} else {
MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2);
out = mb_convert_buf_add2(out, (s >> 8) & 0xFF, s & 0xFF);
}
}
MB_CONVERT_BUF_STORE(buf, out, limit);
}

View file

@ -1,42 +0,0 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* The source code included in this files was separated from mbfilter_kr.h
* by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
*
*/
#ifndef MBFL_MBFILTER_EUC_KR_H
#define MBFL_MBFILTER_EUC_KR_H
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_euc_kr;
extern const struct mbfl_convert_vtbl vtbl_euckr_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_euckr;
int mbfl_filt_conv_euckr_wchar(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_wchar_euckr(int c, mbfl_convert_filter *filter);
#endif /* MBFL_MBFILTER_EUC_KR_H */

View file

@ -1,375 +0,0 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file: Rui Hirokawa <hirokawa@php.net>
*
*/
/*
* The source code included in this files was separated from mbfilter_tw.c
* by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
*
*/
#include "mbfilter.h"
#include "mbfilter_euc_tw.h"
#include "unicode_table_cns11643.h"
static int mbfl_filt_conv_euctw_wchar_flush(mbfl_convert_filter *filter);
static size_t mb_euctw_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
static void mb_wchar_to_euctw(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
static const unsigned char mblen_table_euctw[] = { /* 0xA1-0xFE */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
};
static const char *mbfl_encoding_euc_tw_aliases[] = {"EUC_TW", "eucTW", "x-euc-tw", NULL};
const mbfl_encoding mbfl_encoding_euc_tw = {
mbfl_no_encoding_euc_tw,
"EUC-TW",
"EUC-TW",
mbfl_encoding_euc_tw_aliases,
mblen_table_euctw,
0,
&vtbl_euctw_wchar,
&vtbl_wchar_euctw,
mb_euctw_to_wchar,
mb_wchar_to_euctw,
NULL
};
const struct mbfl_convert_vtbl vtbl_euctw_wchar = {
mbfl_no_encoding_euc_tw,
mbfl_no_encoding_wchar,
mbfl_filt_conv_common_ctor,
NULL,
mbfl_filt_conv_euctw_wchar,
mbfl_filt_conv_euctw_wchar_flush,
NULL,
};
const struct mbfl_convert_vtbl vtbl_wchar_euctw = {
mbfl_no_encoding_wchar,
mbfl_no_encoding_euc_tw,
mbfl_filt_conv_common_ctor,
NULL,
mbfl_filt_conv_wchar_euctw,
mbfl_filt_conv_common_flush,
NULL,
};
#define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
int mbfl_filt_conv_euctw_wchar(int c, mbfl_convert_filter *filter)
{
int c1, s, w;
switch (filter->status) {
case 0:
if (c >= 0 && c < 0x80) { /* latin */
CK((*filter->output_function)(c, filter->data));
} else if (((c >= 0xA1 && c <= 0xA6) || (c >= 0xC2 && c <= 0xFD)) && c != 0xC3) { /* 2-byte character, first byte */
filter->status = 1;
filter->cache = c;
} else if (c == 0x8E) { /* 4-byte character, first byte */
filter->status = 2;
} else {
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
}
break;
case 1: /* 2-byte character, second byte */
filter->status = 0;
c1 = filter->cache;
if (c > 0xA0 && c < 0xFF) {
w = (c1 - 0xA1)*94 + (c - 0xA1);
if (w >= 0 && w < cns11643_1_ucs_table_size) {
w = cns11643_1_ucs_table[w];
} else {
w = 0;
}
if (w <= 0) {
w = MBFL_BAD_INPUT;
}
CK((*filter->output_function)(w, filter->data));
} else {
filter->status = filter->cache = 0;
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
}
break;
case 2: /* got 0x8e, second byte */
if (c == 0xA1 || c == 0xA2 || c == 0xAE) {
filter->status = 3;
filter->cache = c - 0xA1;
} else {
filter->status = filter->cache = 0;
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
}
break;
case 3: /* got 0x8e, third byte */
filter->status = 0;
c1 = filter->cache;
if (c >= 0xA1 && ((c1 == 0 && ((c >= 0xA1 && c <= 0xA6) || (c >= 0xC2 && c <= 0xFD)) && c != 0xC3) ||
(c1 == 1 && c <= 0xF2) || (c1 == 13 && c <= 0xE7))) {
filter->status = 4;
filter->cache = (c1 << 8) + c - 0xA1;
} else {
filter->status = filter->cache = 0;
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
}
break;
case 4: /* multi-byte character, fourth byte */
filter->status = 0;
c1 = filter->cache;
if (c1 <= 0xDFF && c > 0xA0 && c < 0xFF) {
int plane = (c1 & 0xF00) >> 8; /* This is actually the CNS-11643 plane minus one */
s = (c1 & 0xFF)*94 + c - 0xA1;
w = 0;
if (s >= 0) {
/* A later version of CNS-11643 moved all the characters in "plane 14" to "plane 3",
* and added tens of thousands more characters in planes 4, 5, 6, and 7
* We only support the older version of CNS-11643
* This is the same as iconv from glibc 2.2 */
if (plane == 0 && s < cns11643_1_ucs_table_size) {
w = cns11643_1_ucs_table[s];
} else if (plane == 1 && s < cns11643_2_ucs_table_size) {
w = cns11643_2_ucs_table[s];
} else if (plane == 13 && s < cns11643_14_ucs_table_size) {
w = cns11643_14_ucs_table[s];
}
}
if (w <= 0) {
w = MBFL_BAD_INPUT;
}
CK((*filter->output_function)(w, filter->data));
} else {
filter->status = filter->cache = 0;
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
}
break;
EMPTY_SWITCH_DEFAULT_CASE();
}
return 0;
}
int mbfl_filt_conv_wchar_euctw(int c, mbfl_convert_filter *filter)
{
int s = 0;
if (c >= ucs_a1_cns11643_table_min && c < ucs_a1_cns11643_table_max) {
s = ucs_a1_cns11643_table[c - ucs_a1_cns11643_table_min];
} else if (c >= ucs_a2_cns11643_table_min && c < ucs_a2_cns11643_table_max) {
s = ucs_a2_cns11643_table[c - ucs_a2_cns11643_table_min];
} else if (c >= ucs_a3_cns11643_table_min && c < ucs_a3_cns11643_table_max) {
s = ucs_a3_cns11643_table[c - ucs_a3_cns11643_table_min];
} else if (c >= ucs_i_cns11643_table_min && c < ucs_i_cns11643_table_max) {
s = ucs_i_cns11643_table[c - ucs_i_cns11643_table_min];
} else if (c >= ucs_r_cns11643_table_min && c < ucs_r_cns11643_table_max) {
s = ucs_r_cns11643_table[c - ucs_r_cns11643_table_min];
}
if (s <= 0) {
if (c == 0) {
s = 0;
} else if (s <= 0) {
s = -1;
}
}
if (s >= 0) {
int plane = (s & 0x1F0000) >> 16;
if (plane <= 1) {
if (s < 0x80) { /* latin */
CK((*filter->output_function)(s, filter->data));
} else {
s = (s & 0xFFFF) | 0x8080;
CK((*filter->output_function)((s >> 8) & 0xFF, filter->data));
CK((*filter->output_function)(s & 0xFF, filter->data));
}
} else {
s = (0x8EA00000 + (plane << 16)) | ((s & 0xFFFF) | 0x8080);
CK((*filter->output_function)(0x8e , filter->data));
CK((*filter->output_function)((s >> 16) & 0xFF, filter->data));
CK((*filter->output_function)((s >> 8) & 0xFF, filter->data));
CK((*filter->output_function)(s & 0xFF, filter->data));
}
} else {
CK(mbfl_filt_conv_illegal_output(c, filter));
}
return 0;
}
static int mbfl_filt_conv_euctw_wchar_flush(mbfl_convert_filter *filter)
{
if (filter->status) {
/* 2-byte or 4-byte character was truncated */
filter->status = 0;
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
}
if (filter->flush_function) {
(*filter->flush_function)(filter->data);
}
return 0;
}
static size_t mb_euctw_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
{
unsigned char *p = *in, *e = p + *in_len;
uint32_t *out = buf, *limit = buf + bufsize;
while (p < e && out < limit) {
unsigned char c = *p++;
if (c < 0x80) {
*out++ = c;
} else if (((c >= 0xA1 && c <= 0xA6) || (c >= 0xC2 && c <= 0xFD)) && c != 0xC3 && p < e) {
unsigned char c2 = *p++;
if (c2 >= 0xA1 && c2 <= 0xFE) {
unsigned int w = (c - 0xA1)*94 + (c2 - 0xA1);
if (w < cns11643_1_ucs_table_size) {
w = cns11643_1_ucs_table[w];
} else {
w = 0;
}
if (!w)
w = MBFL_BAD_INPUT;
*out++ = w;
} else {
*out++ = MBFL_BAD_INPUT;
}
} else if (c == 0x8E && p < e) {
unsigned char c2 = *p++;
if ((c2 == 0xA1 || c2 == 0xA2 || c2 == 0xAE) && p < e) {
unsigned int plane = c2 - 0xA1; /* This is actually the CNS-11643 plane minus one */
unsigned char c3 = *p++;
if (c3 >= 0xA1 && ((plane == 0 && ((c3 >= 0xA1 && c3 <= 0xA6) || (c3 >= 0xC2 && c3 <= 0xFD)) && c3 != 0xC3) || (plane == 1 && c3 <= 0xF2) || (plane == 13 && c3 <= 0xE7)) && p < e) {
unsigned char c4 = *p++;
if (c2 <= 0xAE && c4 > 0xA0 && c4 < 0xFF) {
unsigned int s = (c3 - 0xA1)*94 + c4 - 0xA1, w = 0;
/* A later version of CNS-11643 moved all the characters in "plane 14" to "plane 3",
* and added tens of thousands more characters in planes 4, 5, 6, and 7
* We only support the older version of CNS-11643
* This is the same as iconv from glibc 2.2 */
if (plane == 0 && s < cns11643_1_ucs_table_size) {
w = cns11643_1_ucs_table[s];
} else if (plane == 1 && s < cns11643_2_ucs_table_size) {
w = cns11643_2_ucs_table[s];
} else if (plane == 13 && s < cns11643_14_ucs_table_size) {
w = cns11643_14_ucs_table[s];
}
if (!w)
w = MBFL_BAD_INPUT;
*out++ = w;
continue;
}
}
}
*out++ = MBFL_BAD_INPUT;
} else {
*out++ = MBFL_BAD_INPUT;
}
}
*in_len = e - p;
*in = p;
return out - buf;
}
static void mb_wchar_to_euctw(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
{
unsigned char *out, *limit;
MB_CONVERT_BUF_LOAD(buf, out, limit);
MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2);
while (len--) {
uint32_t w = *in++;
unsigned int s = 0;
if (w >= ucs_a1_cns11643_table_min && w < ucs_a1_cns11643_table_max) {
s = ucs_a1_cns11643_table[w - ucs_a1_cns11643_table_min];
} else if (w >= ucs_a2_cns11643_table_min && w < ucs_a2_cns11643_table_max) {
s = ucs_a2_cns11643_table[w - ucs_a2_cns11643_table_min];
} else if (w >= ucs_a3_cns11643_table_min && w < ucs_a3_cns11643_table_max) {
s = ucs_a3_cns11643_table[w - ucs_a3_cns11643_table_min];
} else if (w >= ucs_i_cns11643_table_min && w < ucs_i_cns11643_table_max) {
s = ucs_i_cns11643_table[w - ucs_i_cns11643_table_min];
} else if (w >= ucs_r_cns11643_table_min && w < ucs_r_cns11643_table_max) {
s = ucs_r_cns11643_table[w - ucs_r_cns11643_table_min];
}
if (!s) {
if (w == 0) {
out = mb_convert_buf_add(out, 0);
} else {
MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_euctw);
MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2);
}
} else {
unsigned int plane = s >> 16;
if (plane <= 1) {
if (s < 0x80) {
out = mb_convert_buf_add(out, s);
} else {
out = mb_convert_buf_add2(out, ((s >> 8) & 0xFF) | 0x80, (s & 0xFF) | 0x80);
}
} else {
MB_CONVERT_BUF_ENSURE(buf, out, limit, (len * 2) + 4);
out = mb_convert_buf_add4(out, 0x8E, 0xA0 + plane, ((s >> 8) & 0xFF) | 0x80, (s & 0xFF) | 0x80);
}
}
}
MB_CONVERT_BUF_STORE(buf, out, limit);
}

View file

@ -1,42 +0,0 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file: Rui Hirokawa <hirokawa@php.net>
*
*/
/*
* The source code included in this files was separated from mbfilter_tw.h
* by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
*
*/
#ifndef MBFL_MBFILTER_EUC_TW_H
#define MBFL_MBFILTER_EUC_TW_H
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_euc_tw;
extern const struct mbfl_convert_vtbl vtbl_euctw_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_euctw;
int mbfl_filt_conv_euctw_wchar(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_wchar_euctw(int c, mbfl_convert_filter *filter);
#endif /* MBFL_MBFILTER_EUC_TW_H */

View file

@ -1,644 +0,0 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* the source code included in this file was separated from mbfilter_cp936.c
* by rui hirokawa <hirokawa@php.net> on 11 Aug 2011.
*
*/
#include "mbfilter.h"
#include "mbfilter_gb18030.h"
#include "unicode_table_cp936.h"
#include "unicode_table_gb18030.h"
static int mbfl_filt_conv_gb18030_wchar_flush(mbfl_convert_filter *filter);
static size_t mb_gb18030_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
static void mb_wchar_to_gb18030(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
static const char *mbfl_encoding_gb18030_aliases[] = {"gb-18030", "gb-18030-2000", NULL};
const mbfl_encoding mbfl_encoding_gb18030 = {
mbfl_no_encoding_gb18030,
"GB18030",
"GB18030",
mbfl_encoding_gb18030_aliases,
NULL,
MBFL_ENCTYPE_GL_UNSAFE,
&vtbl_gb18030_wchar,
&vtbl_wchar_gb18030,
mb_gb18030_to_wchar,
mb_wchar_to_gb18030,
NULL
};
const struct mbfl_convert_vtbl vtbl_gb18030_wchar = {
mbfl_no_encoding_gb18030,
mbfl_no_encoding_wchar,
mbfl_filt_conv_common_ctor,
NULL,
mbfl_filt_conv_gb18030_wchar,
mbfl_filt_conv_gb18030_wchar_flush,
NULL,
};
const struct mbfl_convert_vtbl vtbl_wchar_gb18030 = {
mbfl_no_encoding_wchar,
mbfl_no_encoding_gb18030,
mbfl_filt_conv_common_ctor,
NULL,
mbfl_filt_conv_wchar_gb18030,
mbfl_filt_conv_common_flush,
NULL,
};
#define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
/* `tbl` contains inclusive ranges, each represented by a pair of unsigned shorts */
int mbfl_bisec_srch(int w, const unsigned short *tbl, int n)
{
int l = 0, r = n-1;
while (l <= r) {
int probe = (l + r) >> 1;
unsigned short lo = tbl[2 * probe], hi = tbl[(2 * probe) + 1];
if (w < lo) {
r = probe - 1;
} else if (w > hi) {
l = probe + 1;
} else {
return probe;
}
}
return -1;
}
/* `tbl` contains single values, not ranges */
int mbfl_bisec_srch2(int w, const unsigned short tbl[], int n)
{
int l = 0, r = n-1;
while (l <= r) {
int probe = (l + r) >> 1;
unsigned short val = tbl[probe];
if (w < val) {
r = probe - 1;
} else if (w > val) {
l = probe + 1;
} else {
return probe;
}
}
return -1;
}
int mbfl_filt_conv_gb18030_wchar(int c, mbfl_convert_filter *filter)
{
int k;
int c1, c2, c3, w = -1;
switch (filter->status) {
case 0:
if (c >= 0 && c < 0x80) { /* latin */
CK((*filter->output_function)(c, filter->data));
} else if (c > 0x80 && c < 0xff) { /* dbcs/qbcs lead byte */
filter->status = 1;
filter->cache = c;
} else {
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
}
break;
case 1: /* dbcs/qbcs second byte */
c1 = filter->cache;
filter->status = 0;
if (c1 >= 0x81 && c1 <= 0x84 && c >= 0x30 && c <= 0x39) {
/* 4 byte range: Unicode BMP */
filter->status = 2;
filter->cache = (c1 << 8) | c;
return 0;
} else if (c1 >= 0x90 && c1 <= 0xe3 && c >= 0x30 && c <= 0x39) {
/* 4 byte range: Unicode 16 planes */
filter->status = 2;
filter->cache = (c1 << 8) | c;
return 0;
} else if (((c1 >= 0xaa && c1 <= 0xaf) || (c1 >= 0xf8 && c1 <= 0xfe)) && (c >= 0xa1 && c <= 0xfe)) {
/* UDA part 1,2: U+E000-U+E4C5 */
w = 94*(c1 >= 0xf8 ? c1 - 0xf2 : c1 - 0xaa) + (c - 0xa1) + 0xe000;
CK((*filter->output_function)(w, filter->data));
} else if (c1 >= 0xa1 && c1 <= 0xa7 && c >= 0x40 && c < 0xa1 && c != 0x7f) {
/* UDA part3 : U+E4C6-U+E765*/
w = 96*(c1 - 0xa1) + c - (c >= 0x80 ? 0x41 : 0x40) + 0xe4c6;
CK((*filter->output_function)(w, filter->data));
}
c2 = (c1 << 8) | c;
if (w <= 0 &&
((c2 >= 0xa2ab && c2 <= 0xa9f0 + (0xe80f-0xe801)) ||
(c2 >= 0xd7fa && c2 <= 0xd7fa + (0xe814-0xe810)) ||
(c2 >= 0xfe50 && c2 <= 0xfe80 + (0xe864-0xe844)))) {
for (k = 0; k < mbfl_gb18030_pua_tbl_max; k++) {
if (c2 >= mbfl_gb18030_pua_tbl[k][2] && c2 <= mbfl_gb18030_pua_tbl[k][2] + mbfl_gb18030_pua_tbl[k][1] - mbfl_gb18030_pua_tbl[k][0]) {
w = c2 - mbfl_gb18030_pua_tbl[k][2] + mbfl_gb18030_pua_tbl[k][0];
CK((*filter->output_function)(w, filter->data));
break;
}
}
}
if (w <= 0) {
if ((c1 >= 0xa1 && c1 <= 0xa9 && c >= 0xa1 && c <= 0xfe) ||
(c1 >= 0xb0 && c1 <= 0xf7 && c >= 0xa1 && c <= 0xfe) ||
(c1 >= 0x81 && c1 <= 0xa0 && c >= 0x40 && c <= 0xfe && c != 0x7f) ||
(c1 >= 0xaa && c1 <= 0xfe && c >= 0x40 && c <= 0xa0 && c != 0x7f) ||
(c1 >= 0xa8 && c1 <= 0xa9 && c >= 0x40 && c <= 0xa0 && c != 0x7f)) {
w = (c1 - 0x81)*192 + c - 0x40;
ZEND_ASSERT(w < cp936_ucs_table_size);
CK((*filter->output_function)(cp936_ucs_table[w], filter->data));
} else {
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
}
}
break;
case 2: /* qbcs third byte */
c1 = (filter->cache >> 8) & 0xff;
c2 = filter->cache & 0xff;
filter->status = filter->cache = 0;
if (((c1 >= 0x81 && c1 <= 0x84) || (c1 >= 0x90 && c1 <= 0xe3)) && c2 >= 0x30 && c2 <= 0x39 && c >= 0x81 && c <= 0xfe) {
filter->cache = (c1 << 16) | (c2 << 8) | c;
filter->status = 3;
} else {
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
}
break;
case 3: /* qbcs fourth byte */
c1 = (filter->cache >> 16) & 0xff;
c2 = (filter->cache >> 8) & 0xff;
c3 = filter->cache & 0xff;
filter->status = filter->cache = 0;
if (((c1 >= 0x81 && c1 <= 0x84) || (c1 >= 0x90 && c1 <= 0xe3)) && c2 >= 0x30 && c2 <= 0x39 && c3 >= 0x81 && c3 <= 0xfe && c >= 0x30 && c <= 0x39) {
if (c1 >= 0x90 && c1 <= 0xe3) {
w = ((((c1 - 0x90)*10 + (c2 - 0x30))*126 + (c3 - 0x81)))*10 + (c - 0x30) + 0x10000;
if (w > 0x10FFFF) {
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
return 0;
}
} else { /* Unicode BMP */
w = (((c1 - 0x81)*10 + (c2 - 0x30))*126 + (c3 - 0x81))*10 + (c - 0x30);
if (w >= 0 && w <= 39419) {
k = mbfl_bisec_srch(w, mbfl_gb2uni_tbl, mbfl_gb_uni_max);
w += mbfl_gb_uni_ofst[k];
} else {
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
return 0;
}
}
CK((*filter->output_function)(w, filter->data));
} else {
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
}
break;
EMPTY_SWITCH_DEFAULT_CASE();
}
return 0;
}
static int mbfl_filt_conv_gb18030_wchar_flush(mbfl_convert_filter *filter)
{
if (filter->status) {
/* multi-byte character was truncated */
filter->status = 0;
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
}
if (filter->flush_function) {
(*filter->flush_function)(filter->data);
}
return 0;
}
int mbfl_filt_conv_wchar_gb18030(int c, mbfl_convert_filter *filter)
{
int k, k1, k2;
int c1, s = 0, s1 = 0;
if (c >= ucs_a1_cp936_table_min && c < ucs_a1_cp936_table_max) {
if (c == 0x01f9) {
s = 0xa8bf;
} else {
s = ucs_a1_cp936_table[c - ucs_a1_cp936_table_min];
}
} else if (c >= ucs_a2_cp936_table_min && c < ucs_a2_cp936_table_max) {
if (c == 0x20ac) { /* euro-sign */
s = 0xa2e3;
} else {
s = ucs_a2_cp936_table[c - ucs_a2_cp936_table_min];
}
} else if (c >= ucs_a3_cp936_table_min && c < ucs_a3_cp936_table_max) {
s = ucs_a3_cp936_table[c - ucs_a3_cp936_table_min];
} else if (c >= ucs_i_cp936_table_min && c < ucs_i_cp936_table_max) {
s = ucs_i_cp936_table[c - ucs_i_cp936_table_min];
} else if (c >= ucs_ci_cp936_table_min && c < ucs_ci_cp936_table_max) {
/* U+F900-FA2F CJK Compatibility Ideographs */
if (c == 0xf92c) {
s = 0xfd9c;
} else if (c == 0xf979) {
s = 0xfd9d;
} else if (c == 0xf995) {
s = 0xfd9e;
} else if (c == 0xf9e7) {
s = 0xfd9f;
} else if (c == 0xf9f1) {
s = 0xfda0;
} else if (c >= 0xfa0c && c <= 0xfa29) {
s = ucs_ci_s_cp936_table[c - 0xfa0c];
}
} else if (c >= ucs_cf_cp936_table_min && c < ucs_cf_cp936_table_max) {
/* FE30h CJK Compatibility Forms */
s = ucs_cf_cp936_table[c - ucs_cf_cp936_table_min];
} else if (c >= ucs_sfv_cp936_table_min && c < ucs_sfv_cp936_table_max) {
/* U+FE50-FE6F Small Form Variants */
s = ucs_sfv_cp936_table[c - ucs_sfv_cp936_table_min];
} else if (c >= ucs_hff_cp936_table_min && c < ucs_hff_cp936_table_max) {
/* U+FF00-FFFF HW/FW Forms */
if (c == 0xff04) {
s = 0xa1e7;
} else if (c == 0xff5e) {
s = 0xa1ab;
} else if (c >= 0xff01 && c <= 0xff5d) {
s = c - 0xff01 + 0xa3a1;
} else if (c >= 0xffe0 && c <= 0xffe5) {
s = ucs_hff_s_cp936_table[c-0xffe0];
}
}
/* While GB18030 and CP936 are very similar, some mappings are different between these encodings;
* do a binary search in a table of differing codepoints to see if we have one */
if (s <= 0 && c >= mbfl_gb18030_c_tbl_key[0] && c <= mbfl_gb18030_c_tbl_key[mbfl_gb18030_c_tbl_max-1]) {
k1 = mbfl_bisec_srch2(c, mbfl_gb18030_c_tbl_key, mbfl_gb18030_c_tbl_max);
if (k1 >= 0) {
s = mbfl_gb18030_c_tbl_val[k1];
}
}
if (c >= 0xe000 && c <= 0xe864) { /* PUA */
if (c < 0xe766) {
if (c < 0xe4c6) {
c1 = c - 0xe000;
s = (c1 % 94) + 0xa1;
c1 /= 94;
s |= (c1 < 0x06 ? c1 + 0xaa : c1 + 0xf2) << 8;
} else {
c1 = c - 0xe4c6;
s = ((c1 / 96) + 0xa1) << 8;
c1 %= 96;
s |= c1 + (c1 >= 0x3f ? 0x41 : 0x40);
}
} else {
/* U+E766..U+E864 */
k1 = 0;
k2 = mbfl_gb18030_pua_tbl_max;
while (k1 < k2) {
k = (k1 + k2) >> 1;
if (c < mbfl_gb18030_pua_tbl[k][0]) {
k2 = k;
} else if (c > mbfl_gb18030_pua_tbl[k][1]) {
k1 = k + 1;
} else {
s = c - mbfl_gb18030_pua_tbl[k][0] + mbfl_gb18030_pua_tbl[k][2];
break;
}
}
}
}
/* If we have not yet found a suitable mapping for this codepoint, it requires a 4-byte code */
if (s <= 0 && c >= 0x0080 && c <= 0xffff) {
/* BMP */
s = mbfl_bisec_srch(c, mbfl_uni2gb_tbl, mbfl_gb_uni_max);
if (s >= 0) {
c1 = c - mbfl_gb_uni_ofst[s];
s = (c1 % 10) + 0x30;
c1 /= 10;
s |= ((c1 % 126) + 0x81) << 8;
c1 /= 126;
s |= ((c1 % 10) + 0x30) << 16;
c1 /= 10;
s1 = c1 + 0x81;
}
} else if (c >= 0x10000 && c <= 0x10ffff) {
/* Code set 3: Unicode U+10000..U+10FFFF */
c1 = c - 0x10000;
s = (c1 % 10) + 0x30;
c1 /= 10;
s |= ((c1 % 126) + 0x81) << 8;
c1 /= 126;
s |= ((c1 % 10) + 0x30) << 16;
c1 /= 10;
s1 = c1 + 0x90;
}
if (c == 0) {
s = 0;
} else if (s == 0) {
s = -1;
}
if (s >= 0) {
if (s <= 0x80) { /* latin */
CK((*filter->output_function)(s, filter->data));
} else if (s1 > 0) { /* qbcs */
CK((*filter->output_function)(s1 & 0xff, filter->data));
CK((*filter->output_function)((s >> 16) & 0xff, filter->data));
CK((*filter->output_function)((s >> 8) & 0xff, filter->data));
CK((*filter->output_function)(s & 0xff, filter->data));
} else { /* dbcs */
CK((*filter->output_function)((s >> 8) & 0xff, filter->data));
CK((*filter->output_function)(s & 0xff, filter->data));
}
} else {
CK(mbfl_filt_conv_illegal_output(c, filter));
}
return 0;
}
static const unsigned short gb18030_pua_tbl3[] = {
/* 0xFE50 */
0x0000,0xE816,0xE817,0xE818,0x0000,0x0000,0x0000,0x0000,
0x0000,0xE81E,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
0x0000,0xE826,0x0000,0x0000,0x0000,0x0000,0xE82B,0xE82C,
0x0000,0x0000,0x0000,0x0000,0xE831,0xE832,0x0000,0x0000,
0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0xE83B,0x0000,
0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0xE843,0x0000,
0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
0xE854,0xE855,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
/* 0xFEA0 */
0xE864
};
static size_t mb_gb18030_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
{
unsigned char *p = *in, *e = p + *in_len;
uint32_t *out = buf, *limit = buf + bufsize;
while (p < e && out < limit) {
unsigned char c = *p++;
if (c < 0x80) {
*out++ = c;
} else if (c == 0x80 || c == 0xFF) {
*out++ = MBFL_BAD_INPUT;
} else {
if (p == e) {
*out++ = MBFL_BAD_INPUT;
break;
}
unsigned char c2 = *p++;
if (((c >= 0x81 && c <= 0x84) || (c >= 0x90 && c <= 0xE3)) && c2 >= 0x30 && c2 <= 0x39) {
if (p >= e) {
*out++ = MBFL_BAD_INPUT;
break;
}
unsigned char c3 = *p++;
if (c3 >= 0x81 && c3 <= 0xFE && p < e) {
unsigned char c4 = *p++;
if (c4 >= 0x30 && c4 <= 0x39) {
if (c >= 0x90 && c <= 0xE3) {
unsigned int w = ((((c - 0x90)*10 + (c2 - 0x30))*126 + (c3 - 0x81)))*10 + (c4 - 0x30) + 0x10000;
*out++ = (w > 0x10FFFF) ? MBFL_BAD_INPUT : w;
} else {
/* Unicode BMP */
unsigned int w = (((c - 0x81)*10 + (c2 - 0x30))*126 + (c3 - 0x81))*10 + (c4 - 0x30);
if (w <= 39419) {
*out++ = w + mbfl_gb_uni_ofst[mbfl_bisec_srch(w, mbfl_gb2uni_tbl, mbfl_gb_uni_max)];
} else {
*out++ = MBFL_BAD_INPUT;
}
}
} else {
*out++ = MBFL_BAD_INPUT;
}
} else {
*out++ = MBFL_BAD_INPUT;
}
} else if (((c >= 0xAA && c <= 0xAF) || (c >= 0xF8 && c <= 0xFE)) && (c2 >= 0xA1 && c2 <= 0xFE)) {
/* UDA part 1, 2: U+E000-U+E4C5 */
*out++ = 94*(c >= 0xF8 ? c - 0xF2 : c - 0xAA) + (c2 - 0xA1) + 0xE000;
} else if (c >= 0xA1 && c <= 0xA7 && c2 >= 0x40 && c2 < 0xA1 && c2 != 0x7F) {
/* UDA part 3: U+E4C6-U+E765 */
*out++ = 96*(c - 0xA1) + c2 - (c2 >= 0x80 ? 0x41 : 0x40) + 0xE4C6;
} else if (c2 >= 0x40 && c2 != 0x7F && c2 != 0xFF) {
unsigned int w = (c - 0x81)*192 + c2 - 0x40;
if (w >= 0x192B) {
if (w <= 0x1EBE) {
if (w != 0x1963 && w != 0x1DBF && (w < 0x1E49 || w > 0x1E55) && w != 0x1E7F) {
*out++ = cp936_pua_tbl1[w - 0x192B];
continue;
}
} else if (w >= 0x413A) {
if (w <= 0x413E) {
*out++ = cp936_pua_tbl2[w - 0x413A];
continue;
} else if (w >= 0x5DD0 && w <= 0x5E20) {
unsigned int c = gb18030_pua_tbl3[w - 0x5DD0];
if (c) {
*out++ = c;
continue;
}
}
}
}
if ((c >= 0x81 && c <= 0xA9) || (c >= 0xB0 && c <= 0xF7 && c2 >= 0xA1) || (c >= 0xAA && c <= 0xFE && c2 <= 0xA0)) {
ZEND_ASSERT(w < cp936_ucs_table_size);
*out++ = cp936_ucs_table[w];
} else {
*out++ = MBFL_BAD_INPUT;
}
} else {
*out++ = MBFL_BAD_INPUT;
}
}
}
*in_len = e - p;
*in = p;
return out - buf;
}
static void mb_wchar_to_gb18030(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
{
unsigned char *out, *limit;
MB_CONVERT_BUF_LOAD(buf, out, limit);
MB_CONVERT_BUF_ENSURE(buf, out, limit, len);
while (len--) {
uint32_t w = *in++;
unsigned int s = 0;
if (w == 0) {
out = mb_convert_buf_add(out, 0);
continue;
} else if (w >= ucs_a1_cp936_table_min && w < ucs_a1_cp936_table_max) {
if (w == 0x1F9) {
s = 0xA8Bf;
} else {
s = ucs_a1_cp936_table[w - ucs_a1_cp936_table_min];
}
} else if (w >= ucs_a2_cp936_table_min && w < ucs_a2_cp936_table_max) {
if (w == 0x20AC) { /* Euro sign */
s = 0xA2E3;
} else {
s = ucs_a2_cp936_table[w - ucs_a2_cp936_table_min];
}
} else if (w >= ucs_a3_cp936_table_min && w < ucs_a3_cp936_table_max) {
s = ucs_a3_cp936_table[w - ucs_a3_cp936_table_min];
} else if (w >= ucs_i_cp936_table_min && w < ucs_i_cp936_table_max) {
s = ucs_i_cp936_table[w - ucs_i_cp936_table_min];
} else if (w >= ucs_ci_cp936_table_min && w < ucs_ci_cp936_table_max) {
/* U+F900-U+FA2F CJK Compatibility Ideographs */
if (w == 0xF92C) {
s = 0xFD9C;
} else if (w == 0xF979) {
s = 0xFD9D;
} else if (w == 0xF995) {
s = 0xFD9E;
} else if (w == 0xF9E7) {
s = 0xFD9F;
} else if (w == 0xF9F1) {
s = 0xFDA0;
} else if (w >= 0xFA0C && w <= 0xFA29) {
s = ucs_ci_s_cp936_table[w - 0xFA0C];
}
} else if (w >= ucs_cf_cp936_table_min && w < ucs_cf_cp936_table_max) {
/* CJK Compatibility Forms */
s = ucs_cf_cp936_table[w - ucs_cf_cp936_table_min];
} else if (w >= ucs_sfv_cp936_table_min && w < ucs_sfv_cp936_table_max) {
/* U+FE50-U+FE6F Small Form Variants */
s = ucs_sfv_cp936_table[w - ucs_sfv_cp936_table_min];
} else if (w >= ucs_hff_cp936_table_min && w < ucs_hff_cp936_table_max) {
/* U+FF00-U+FFFF HW/FW Forms */
if (w == 0xFF04) {
s = 0xA1E7;
} else if (w == 0xFF5E) {
s = 0xA1AB;
} else if (w >= 0xFF01 && w <= 0xFF5D) {
s = w - 0xFF01 + 0xA3A1;
} else if (w >= 0xFFE0 && w <= 0xFFE5) {
s = ucs_hff_s_cp936_table[w - 0xFFE0];
}
} else if (w >= 0xE000 && w <= 0xE864) {
/* PUA */
if (w < 0xE766) {
if (w < 0xE4C6) {
unsigned int c1 = w - 0xE000;
s = (c1 % 94) + 0xA1;
c1 /= 94;
s |= (c1 + (c1 < 0x06 ? 0xAA : 0xF2)) << 8;
} else {
unsigned int c1 = w - 0xE4C6;
s = ((c1 / 96) + 0xA1) << 8;
c1 %= 96;
s |= c1 + (c1 >= 0x3F ? 0x41 : 0x40);
}
} else {
/* U+E766-U+E864 */
unsigned int k1 = 0, k2 = mbfl_gb18030_pua_tbl_max;
while (k1 < k2) {
unsigned int k = (k1 + k2) >> 1;
if (w < mbfl_gb18030_pua_tbl[k][0]) {
k2 = k;
} else if (w > mbfl_gb18030_pua_tbl[k][1]) {
k1 = k + 1;
} else {
s = w - mbfl_gb18030_pua_tbl[k][0] + mbfl_gb18030_pua_tbl[k][2];
break;
}
}
}
}
/* While GB18030 and CP936 are very similar, some mappings are different between these encodings;
* do a binary search in a table of differing codepoints to see if we have one */
if (!s && w >= mbfl_gb18030_c_tbl_key[0] && w <= mbfl_gb18030_c_tbl_key[mbfl_gb18030_c_tbl_max-1]) {
int i = mbfl_bisec_srch2(w, mbfl_gb18030_c_tbl_key, mbfl_gb18030_c_tbl_max);
if (i >= 0) {
s = mbfl_gb18030_c_tbl_val[i];
}
}
/* If we have not yet found a suitable mapping for this codepoint, it requires a 4-byte code */
if (!s && w >= 0x80 && w <= 0xFFFF) {
/* BMP */
int i = mbfl_bisec_srch(w, mbfl_uni2gb_tbl, mbfl_gb_uni_max);
if (i >= 0) {
unsigned int c1 = w - mbfl_gb_uni_ofst[i];
s = (c1 % 10) + 0x30;
c1 /= 10;
s |= ((c1 % 126) + 0x81) << 8;
c1 /= 126;
s |= ((c1 % 10) + 0x30) << 16;
c1 /= 10;
s |= (c1 + 0x81) << 24;
}
} else if (w >= 0x10000 && w <= 0x10FFFF) {
/* Code set 3: Unicode U+10000-U+10FFFF */
unsigned int c1 = w - 0x10000;
s = (c1 % 10) + 0x30;
c1 /= 10;
s |= ((c1 % 126) + 0x81) << 8;
c1 /= 126;
s |= ((c1 % 10) + 0x30) << 16;
c1 /= 10;
s |= (c1 + 0x90) << 24;
}
if (!s) {
MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_gb18030);
MB_CONVERT_BUF_ENSURE(buf, out, limit, len);
} else if (s < 0x80) {
out = mb_convert_buf_add(out, s);
} else if (s > 0xFFFFFF) {
MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 4);
out = mb_convert_buf_add4(out, (s >> 24) & 0xFF, (s >> 16) & 0xFF, (s >> 8) & 0xFF, s & 0xFF);
} else {
MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2);
out = mb_convert_buf_add2(out, (s >> 8) & 0xFF, s & 0xFF);
}
}
MB_CONVERT_BUF_STORE(buf, out, limit);
}

View file

@ -1,42 +0,0 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* the source code included in this files was separated from mbfilter_cn.h
* by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
*
*/
#ifndef MBFL_MBFILTER_GB18030_H
#define MBFL_MBFILTER_GB18030_H
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_gb18030;
extern const struct mbfl_convert_vtbl vtbl_gb18030_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_gb18030;
int mbfl_filt_conv_gb18030_wchar(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_wchar_gb18030(int c, mbfl_convert_filter *filter);
#endif /* MBFL_MBFILTER_GB18030_H */

View file

@ -1,409 +0,0 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* The source code included in this files was separated from mbfilter_cn.c
* by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
*
*/
#include "mbfilter.h"
#include "mbfilter_hz.h"
#include "unicode_table_cp936.h"
#include "unicode_table_gb2312.h"
static int mbfl_filt_conv_hz_wchar_flush(mbfl_convert_filter *filter);
static size_t mb_hz_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
static void mb_wchar_to_hz(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
const mbfl_encoding mbfl_encoding_hz = {
mbfl_no_encoding_hz,
"HZ",
"HZ-GB-2312",
NULL,
NULL,
MBFL_ENCTYPE_GL_UNSAFE,
&vtbl_hz_wchar,
&vtbl_wchar_hz,
mb_hz_to_wchar,
mb_wchar_to_hz,
NULL
};
const struct mbfl_convert_vtbl vtbl_hz_wchar = {
mbfl_no_encoding_hz,
mbfl_no_encoding_wchar,
mbfl_filt_conv_common_ctor,
NULL,
mbfl_filt_conv_hz_wchar,
mbfl_filt_conv_hz_wchar_flush,
NULL,
};
const struct mbfl_convert_vtbl vtbl_wchar_hz = {
mbfl_no_encoding_wchar,
mbfl_no_encoding_hz,
mbfl_filt_conv_common_ctor,
NULL,
mbfl_filt_conv_wchar_hz,
mbfl_filt_conv_any_hz_flush,
NULL,
};
#define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
int mbfl_filt_conv_hz_wchar(int c, mbfl_convert_filter *filter)
{
int c1, s, w;
switch (filter->status & 0xf) {
/* case 0x00: ASCII */
/* case 0x10: GB2312 */
case 0:
if (c == '~') {
filter->status += 2;
} else if (filter->status == 0x10 && ((c > 0x20 && c <= 0x29) || (c >= 0x30 && c <= 0x77))) {
/* DBCS first char */
filter->cache = c;
filter->status += 1;
} else if (filter->status == 0 && c >= 0 && c < 0x80) { /* latin, CTLs */
CK((*filter->output_function)(c, filter->data));
} else {
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
}
break;
/* case 0x11: GB2312 second char */
case 1:
filter->status &= ~0xf;
c1 = filter->cache;
if (c1 > 0x20 && c1 < 0x7F && c > 0x20 && c < 0x7F) {
s = (c1 - 1)*192 + c + 0x40; /* GB2312 */
ZEND_ASSERT(s < cp936_ucs_table_size);
if (s == 0x1864) {
w = 0x30FB;
} else if (s == 0x186A) {
w = 0x2015;
} else if (s == 0x186C) {
w = 0x2225;
} else if ((s >= 0x1920 && s <= 0x192A) || s == 0x1963 || (s >= 0x1C60 && s <= 0x1C7F) || (s >= 0x1DBB && s <= 0x1DC4)) {
w = 0;
} else {
w = cp936_ucs_table[s];
}
if (w <= 0) {
w = MBFL_BAD_INPUT;
}
CK((*filter->output_function)(w, filter->data));
} else {
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
}
break;
/* '~' */
case 2:
if (c == '}' && filter->status == 0x12) {
filter->status = 0;
} else if (c == '{' && filter->status == 2) {
filter->status = 0x10;
} else if (c == '~' && filter->status == 2) {
CK((*filter->output_function)('~', filter->data));
filter->status -= 2;
} else if (c == '\n') {
/* "~\n" is a line continuation; no output is needed, nor should we shift modes */
filter->status -= 2;
} else {
/* Invalid character after ~ */
filter->status -= 2;
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
}
break;
EMPTY_SWITCH_DEFAULT_CASE();
}
return 0;
}
static int mbfl_filt_conv_hz_wchar_flush(mbfl_convert_filter *filter)
{
if (filter->status == 0x11) {
/* 2-byte character was truncated */
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
}
filter->status = 0;
if (filter->flush_function) {
(*filter->flush_function)(filter->data);
}
return 0;
}
int mbfl_filt_conv_wchar_hz(int c, mbfl_convert_filter *filter)
{
int s = 0;
if (c >= ucs_a1_cp936_table_min && c < ucs_a1_cp936_table_max) {
if (c == 0xB7 || c == 0x144 || c == 0x148 || c == 0x251 || c == 0x261 || c == 0x2CA || c == 0x2CB || c == 0x2D9) {
s = 0;
} else {
s = ucs_a1_cp936_table[c - ucs_a1_cp936_table_min];
}
} else if (c >= ucs_a2_cp936_table_min && c < ucs_a2_cp936_table_max) {
if (c == 0x2015) {
s = 0xA1AA;
} else if (c == 0x2010 || c == 0x2013 || c == 0x2014 || c == 0x2016 || c == 0x2025 || c == 0x2035 ||
c == 0x2105 || c == 0x2109 || c == 0x2121 || (c >= 0x2170 && c <= 0x2179) || (c >= 0x2196 && c <= 0x2199) ||
c == 0x2215 || c == 0x221F || c == 0x2223 || c == 0x2252 || c == 0x2266 || c == 0x2267 || c == 0x2295 ||
(c >= 0x2550 && c <= 0x2573) || c == 0x22BF || c == 0x2609 || (c >= 0x2581 && c <= 0x258F) ||
(c >= 0x2593 && c <= 0x2595) || c == 0x25BC || c == 0x25BD || (c >= 0x25E2 && c <= 0x25E5)) {
s = 0;
} else {
s = ucs_a2_cp936_table[c - ucs_a2_cp936_table_min];
}
} else if (c >= ucs_a3_cp936_table_min && c < ucs_a3_cp936_table_max) {
if (c == 0x30FB) {
s = 0xA1A4;
} else if (c == 0x3006 || c == 0x3007 || c == 0x3012 || c == 0x3231 || c == 0x32A3 || c >= 0x3300 ||
(c >= 0x3018 && c <= 0x3040) || (c >= 0x309B && c <= 0x309E) || (c >= 0x30FC && c <= 0x30FE)) {
s = 0;
} else {
s = ucs_a3_cp936_table[c - ucs_a3_cp936_table_min];
}
} else if (c >= ucs_i_gb2312_table_min && c < ucs_i_gb2312_table_max) {
s = ucs_i_gb2312_table[c - ucs_i_gb2312_table_min];
} else if (c >= ucs_hff_cp936_table_min && c < ucs_hff_cp936_table_max) {
if (c == 0xFF04) {
s = 0xA1E7;
} else if (c == 0xFF5E) {
s = 0xA1AB;
} else if (c >= 0xFF01 && c <= 0xFF5D) {
s = c - 0xFF01 + 0xA3A1;
} else if (c == 0xFFE0 || c == 0xFFE1 || c == 0xFFE3 || c == 0xFFE5) {
s = ucs_hff_s_cp936_table[c - 0xFFE0];
}
}
if (s & 0x8000) {
s -= 0x8080;
}
if (s <= 0) {
s = (c == 0) ? 0 : -1;
} else if ((s >= 0x80 && s < 0x2121) || s > 0x8080) {
s = -1;
}
if (s >= 0) {
if (s < 0x80) { /* ASCII */
if ((filter->status & 0xff00) != 0) {
CK((*filter->output_function)('~', filter->data));
CK((*filter->output_function)('}', filter->data));
}
filter->status = 0;
if (s == 0x7E) {
CK((*filter->output_function)('~', filter->data));
}
CK((*filter->output_function)(s, filter->data));
} else { /* GB 2312-80 */
if ((filter->status & 0xFF00) != 0x200) {
CK((*filter->output_function)('~', filter->data));
CK((*filter->output_function)('{', filter->data));
}
filter->status = 0x200;
CK((*filter->output_function)((s >> 8) & 0x7F, filter->data));
CK((*filter->output_function)(s & 0x7F, filter->data));
}
} else {
CK(mbfl_filt_conv_illegal_output(c, filter));
}
return 0;
}
int mbfl_filt_conv_any_hz_flush(mbfl_convert_filter *filter)
{
/* back to latin */
if (filter->status & 0xFF00) {
CK((*filter->output_function)('~', filter->data));
CK((*filter->output_function)('}', filter->data));
}
filter->status = 0;
return 0;
}
#define ASCII 0
#define GB2312 1
static size_t mb_hz_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
{
unsigned char *p = *in, *e = p + *in_len;
uint32_t *out = buf, *limit = buf + bufsize;
while (p < e && out < limit) {
unsigned char c = *p++;
if (c == '~') {
if (p == e) {
break;
}
unsigned char c2 = *p++;
if (c2 == '}' && *state == GB2312) {
*state = ASCII;
} else if (c2 == '{' && *state == ASCII) {
*state = GB2312;
} else if (c2 == '~' && *state == ASCII) {
*out++ = '~';
} else if (c2 == '\n') {
/* "~\n" is a line continuation; no output is needed, nor should we shift modes */
} else {
/* Invalid character after ~ */
*out++ = MBFL_BAD_INPUT;
}
} else if (((c > 0x20 && c <= 0x29) || (c >= 0x30 && c <= 0x77)) && p < e && *state == GB2312) {
unsigned char c2 = *p++;
if (c > 0x20 && c < 0x7F && c2 > 0x20 && c2 < 0x7F) {
unsigned int s = (c - 1)*192 + c2 + 0x40;
ZEND_ASSERT(s < cp936_ucs_table_size);
if (s == 0x1864) {
s = 0x30FB;
} else if (s == 0x186A) {
s = 0x2015;
} else if (s == 0x186C) {
s = 0x2225;
} else if ((s >= 0x1920 && s <= 0x192A) || s == 0x1963 || (s >= 0x1C60 && s <= 0x1C7F) || (s >= 0x1DBB && s <= 0x1DC4)) {
s = 0;
} else {
s = cp936_ucs_table[s];
}
if (!s)
s = MBFL_BAD_INPUT;
*out++ = s;
} else {
*out++ = MBFL_BAD_INPUT;
}
} else if (c < 0x80 && *state == ASCII) {
*out++ = c;
} else {
*out++ = MBFL_BAD_INPUT;
}
}
*in_len = e - p;
*in = p;
return out - buf;
}
static void mb_wchar_to_hz(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
{
unsigned char *out, *limit;
MB_CONVERT_BUF_LOAD(buf, out, limit);
MB_CONVERT_BUF_ENSURE(buf, out, limit, len);
while (len--) {
uint32_t w = *in++;
unsigned int s = 0;
if (w >= ucs_a1_cp936_table_min && w < ucs_a1_cp936_table_max) {
if (w == 0xB7 || w == 0x144 || w == 0x148 || w == 0x251 || w == 0x261 || w == 0x2CA || w == 0x2CB || w == 0x2D9) {
s = 0;
} else {
s = ucs_a1_cp936_table[w - ucs_a1_cp936_table_min];
}
} else if (w >= ucs_a2_cp936_table_min && w < ucs_a2_cp936_table_max) {
if (w == 0x2015) {
s = 0xA1AA;
} else if (w == 0x2010 || w == 0x2013 || w == 0x2014 || w == 0x2016 || w == 0x2025 || w == 0x2035 || w == 0x2105 || w == 0x2109 || w == 0x2121 || (w >= 0x2170 && w <= 0x2179) || (w >= 0x2196 && w <= 0x2199) || w == 0x2215 || w == 0x221F || w == 0x2223 || w == 0x2252 || w == 0x2266 || w == 0x2267 || w == 0x2295 || (w >= 0x2550 && w <= 0x2573) || w == 0x22BF || w == 0x2609 || (w >= 0x2581 && w <= 0x258F) || (w >= 0x2593 && w <= 0x2595) || w == 0x25BC || w == 0x25BD || (w >= 0x25E2 && w <= 0x25E5)) {
s = 0;
} else {
s = ucs_a2_cp936_table[w - ucs_a2_cp936_table_min];
}
} else if (w >= ucs_a3_cp936_table_min && w < ucs_a3_cp936_table_max) {
if (w == 0x30FB) {
s = 0xA1A4;
} else if (w == 0x3006 || w == 0x3007 || w == 0x3012 || w == 0x3231 || w == 0x32A3 || w >= 0x3300 || (w >= 0x3018 && w <= 0x3040) || (w >= 0x309B && w <= 0x309E) || (w >= 0x30FC && w <= 0x30FE)) {
s = 0;
} else {
s = ucs_a3_cp936_table[w - ucs_a3_cp936_table_min];
}
} else if (w >= ucs_i_gb2312_table_min && w < ucs_i_gb2312_table_max) {
s = ucs_i_gb2312_table[w - ucs_i_gb2312_table_min];
} else if (w >= ucs_hff_cp936_table_min && w < ucs_hff_cp936_table_max) {
if (w == 0xFF04) {
s = 0xA1E7;
} else if (w == 0xFF5E) {
s = 0xA1AB;
} else if (w >= 0xFF01 && w <= 0xFF5D) {
s = w - 0xFF01 + 0xA3A1;
} else if (w == 0xFFE0 || w == 0xFFE1 || w == 0xFFE3 || w == 0xFFE5) {
s = ucs_hff_s_cp936_table[w - 0xFFE0];
}
}
s &= ~0x8080;
if ((!s && w) || (s >= 0x80 && s < 0x2121)) {
MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_hz);
MB_CONVERT_BUF_ENSURE(buf, out, limit, len);
} else if (s < 0x80) {
/* ASCII */
if (buf->state != ASCII) {
MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 3);
out = mb_convert_buf_add2(out, '~', '}');
buf->state = ASCII;
}
if (s == '~') {
MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2);
out = mb_convert_buf_add2(out, '~', '~');
} else {
out = mb_convert_buf_add(out, s);
}
} else {
/* GB 2312-80 */
if (buf->state != GB2312) {
MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 4);
out = mb_convert_buf_add2(out, '~', '{');
buf->state = GB2312;
} else {
MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2);
}
out = mb_convert_buf_add2(out, (s >> 8) & 0x7F, s & 0x7F);
}
}
if (end && buf->state != ASCII) {
/* If not in ASCII state, need to emit closing control chars */
MB_CONVERT_BUF_ENSURE(buf, out, limit, 2);
out = mb_convert_buf_add2(out, '~', '}');
}
MB_CONVERT_BUF_STORE(buf, out, limit);
}

View file

@ -1,43 +0,0 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* The source code included in this files was separated from mbfilter_cn.h
* by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
*
*/
#ifndef MBFL_MBFILTER_HZ_H
#define MBFL_MBFILTER_HZ_H
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_hz;
extern const struct mbfl_convert_vtbl vtbl_hz_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_hz;
int mbfl_filt_conv_hz_wchar(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_wchar_hz(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_any_hz_flush(mbfl_convert_filter *filter);
#endif /* MBFL_MBFILTER_HZ_H */

View file

@ -1,584 +0,0 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* The source code included in this files was separated from mbfilter_ja.c
* by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
*
*/
#include "mbfilter.h"
#include "mbfilter_iso2022_jp_ms.h"
#include "unicode_table_cp932_ext.h"
#include "unicode_table_jis.h"
#include "cp932_table.h"
static size_t mb_iso2022jpms_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
static void mb_wchar_to_iso2022jpms(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
static int mbfl_filt_conv_2022jpms_wchar_flush(mbfl_convert_filter *filter);
static const char *mbfl_encoding_2022jpms_aliases[] = {"ISO2022JPMS", NULL};
const mbfl_encoding mbfl_encoding_2022jpms = {
mbfl_no_encoding_2022jpms,
"ISO-2022-JP-MS",
"ISO-2022-JP",
mbfl_encoding_2022jpms_aliases,
NULL,
MBFL_ENCTYPE_GL_UNSAFE,
&vtbl_2022jpms_wchar,
&vtbl_wchar_2022jpms,
mb_iso2022jpms_to_wchar,
mb_wchar_to_iso2022jpms,
NULL
};
const struct mbfl_convert_vtbl vtbl_2022jpms_wchar = {
mbfl_no_encoding_2022jpms,
mbfl_no_encoding_wchar,
mbfl_filt_conv_common_ctor,
NULL,
mbfl_filt_conv_2022jpms_wchar,
mbfl_filt_conv_2022jpms_wchar_flush,
NULL,
};
const struct mbfl_convert_vtbl vtbl_wchar_2022jpms = {
mbfl_no_encoding_wchar,
mbfl_no_encoding_2022jpms,
mbfl_filt_conv_common_ctor,
NULL,
mbfl_filt_conv_wchar_2022jpms,
mbfl_filt_conv_any_2022jpms_flush,
NULL,
};
#define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
#define sjistoidx(c1, c2) \
(((c1) > 0x9f) \
? (((c1) - 0xc1) * 188 + (c2) - (((c2) > 0x7e) ? 0x41 : 0x40)) \
: (((c1) - 0x81) * 188 + (c2) - (((c2) > 0x7e) ? 0x41 : 0x40)))
#define idxtojis1(c) (((c) / 94) + 0x21)
#define idxtojis2(c) (((c) % 94) + 0x21)
#define ASCII 0
#define JISX0201_KANA 0x20
#define JISX0208_KANJI 0x80
#define UDC 0xA0
int mbfl_filt_conv_2022jpms_wchar(int c, mbfl_convert_filter *filter)
{
int c1, s, w;
switch (filter->status & 0xF) {
case 0:
if (c == 0x1B) {
filter->status += 2;
} else if (filter->status == JISX0201_KANA && c > 0x20 && c < 0x60) {
CK((*filter->output_function)(0xFF40 + c, filter->data));
} else if ((filter->status == JISX0208_KANJI || filter->status == UDC) && c > 0x20 && c < 0x80) {
filter->cache = c;
filter->status += 1;
} else if (c >= 0 && c < 0x80) { /* ASCII */
CK((*filter->output_function)(c, filter->data));
} else if (c > 0xA0 && c < 0xE0) { /* Kana */
CK((*filter->output_function)(0xFEC0 + c, filter->data));
} else {
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
}
break;
/* Kanji, second byte */
case 1:
w = 0;
filter->status &= ~0xF;
c1 = filter->cache;
if (c > 0x20 && c < 0x7F) {
s = ((c1 - 0x21) * 94) + c - 0x21;
if (filter->status == JISX0208_KANJI) {
if (s <= 137) {
if (s == 31) {
w = 0xFF3C; /* FULLWIDTH REVERSE SOLIDUS */
} else if (s == 32) {
w = 0xFF5E; /* FULLWIDTH TILDE */
} else if (s == 33) {
w = 0x2225; /* PARALLEL TO */
} else if (s == 60) {
w = 0xFF0D; /* FULLWIDTH HYPHEN-MINUS */
} else if (s == 80) {
w = 0xFFE0; /* FULLWIDTH CENT SIGN */
} else if (s == 81) {
w = 0xFFE1; /* FULLWIDTH POUND SIGN */
} else if (s == 137) {
w = 0xFFE2; /* FULLWIDTH NOT SIGN */
}
}
if (w == 0) {
if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) { /* vendor ext1 (13ku) */
w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min];
} else if (s >= 0 && s < jisx0208_ucs_table_size) {
w = jisx0208_ucs_table[s];
} else if (s >= cp932ext2_ucs_table_min && s < cp932ext2_ucs_table_max) { /* vendor ext2 (89ku - 92ku) */
w = cp932ext2_ucs_table[s - cp932ext2_ucs_table_min];
}
}
if (w <= 0) {
w = MBFL_BAD_INPUT;
}
} else {
if (c1 > 0x20 && c1 < 0x35) {
w = 0xE000 + ((c1 - 0x21) * 94) + c - 0x21;
} else {
w = MBFL_BAD_INPUT;
}
}
CK((*filter->output_function)(w, filter->data));
} else {
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
}
break;
/* ESC */
case 2:
if (c == '$') {
filter->status++;
} else if (c == '(') {
filter->status += 3;
} else {
filter->status &= ~0xF;
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
}
break;
/* ESC $ */
case 3:
if (c == '@' || c == 'B') {
filter->status = JISX0208_KANJI;
} else if (c == '(') {
filter->status++;
} else {
filter->status &= ~0xF;
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
}
break;
/* ESC $ ( */
case 4:
if (c == '@' || c == 'B') {
filter->status = JISX0208_KANJI;
} else if (c == '?') {
filter->status = UDC;
} else {
filter->status &= ~0xF;
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
}
break;
/* ESC ( */
case 5:
if (c == 'B' || c == 'J') {
filter->status = 0;
} else if (c == 'I') {
filter->status = JISX0201_KANA;
} else {
filter->status &= ~0xF;
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
}
}
return 0;
}
static int mbfl_filt_conv_2022jpms_wchar_flush(mbfl_convert_filter *filter)
{
if (filter->status & 0xF) {
(*filter->output_function)(MBFL_BAD_INPUT, filter->data);
}
filter->status = 0;
if (filter->flush_function) {
(*filter->flush_function)(filter->data);
}
return 0;
}
static int cp932ext3_cp932ext2_jis(int c)
{
int idx;
idx = sjistoidx(0xfa, 0x40) + c;
if (idx >= sjistoidx(0xfa, 0x5c))
idx -= sjistoidx(0xfa, 0x5c) - sjistoidx(0xed, 0x40);
else if (idx >= sjistoidx(0xfa, 0x55))
idx -= sjistoidx(0xfa, 0x55) - sjistoidx(0xee, 0xfa);
else if (idx >= sjistoidx(0xfa, 0x40))
idx -= sjistoidx(0xfa, 0x40) - sjistoidx(0xee, 0xef);
return idxtojis1(idx) << 8 | idxtojis2(idx);
}
int mbfl_filt_conv_wchar_2022jpms(int c, mbfl_convert_filter *filter)
{
int c1, c2, s1 = 0, s2 = 0;
if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) {
s1 = ucs_a1_jis_table[c - ucs_a1_jis_table_min];
} else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) {
s1 = ucs_a2_jis_table[c - ucs_a2_jis_table_min];
} else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) {
s1 = ucs_i_jis_table[c - ucs_i_jis_table_min];
} else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) {
s1 = ucs_r_jis_table[c - ucs_r_jis_table_min];
} else if (c >= 0xE000 && c < (0xE000 + 20*94)) {
/* Private User Area (95ku - 114ku) */
s1 = c - 0xE000;
c1 = (s1 / 94) + 0x7f;
c2 = (s1 % 94) + 0x21;
s1 = (c1 << 8) | c2;
}
if (s1 <= 0) {
if (c == 0xA5) { /* YEN SIGN */
s1 = 0x216F; /* FULLWIDTH YEN SIGN */
} else if (c == 0xFF3C) { /* FULLWIDTH REVERSE SOLIDUS */
s1 = 0x2140;
} else if (c == 0x2225) { /* PARALLEL TO */
s1 = 0x2142;
} else if (c == 0xFF0D) { /* FULLWIDTH HYPHEN-MINUS */
s1 = 0x215d;
} else if (c == 0xFFE0) { /* FULLWIDTH CENT SIGN */
s1 = 0x2171;
} else if (c == 0xFFE1) { /* FULLWIDTH POUND SIGN */
s1 = 0x2172;
} else if (c == 0xFFE2) { /* FULLWIDTH NOT SIGN */
s1 = 0x224C;
}
}
if ((s1 <= 0) || (s1 >= 0xa1a1 && s2 == 0)) { /* not found or X 0212 */
s1 = -1;
for (c1 = 0; c1 < cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; c1++) {
if (c == cp932ext1_ucs_table[c1]) {
s1 = (((c1 / 94) + 0x2D) << 8) + (c1 % 94) + 0x21;
break;
}
}
if (s1 <= 0) {
for (c1 = 0; c1 < cp932ext3_ucs_table_max - cp932ext3_ucs_table_min; c1++) {
if (c == cp932ext3_ucs_table[c1]) {
s1 = cp932ext3_cp932ext2_jis(c1);
break;
}
}
}
if (c == 0) {
s1 = 0;
}
}
if (s1 >= 0) {
if (s1 < 0x80) { /* latin */
if (filter->status & 0xFF00) {
CK((*filter->output_function)(0x1B, filter->data)); /* ESC */
CK((*filter->output_function)('(', filter->data));
CK((*filter->output_function)('B', filter->data));
}
CK((*filter->output_function)(s1, filter->data));
filter->status = 0;
} else if (s1 > 0xA0 && s1 < 0xE0) { /* kana */
if ((filter->status & 0xFF00) != 0x100) {
CK((*filter->output_function)(0x1B, filter->data)); /* ESC */
CK((*filter->output_function)('(', filter->data));
CK((*filter->output_function)('I', filter->data));
}
filter->status = 0x100;
CK((*filter->output_function)(s1 & 0x7F, filter->data));
} else if (s1 < 0x7E7F) { /* X 0208 */
if ((filter->status & 0xFF00) != 0x200) {
CK((*filter->output_function)(0x1B, filter->data)); /* ESC */
CK((*filter->output_function)('$', filter->data));
CK((*filter->output_function)('B', filter->data));
}
filter->status = 0x200;
CK((*filter->output_function)((s1 >> 8) & 0xFF, filter->data));
CK((*filter->output_function)(s1 & 0x7F, filter->data));
} else if (s1 < 0x927F) { /* UDC */
if ((filter->status & 0xFF00) != 0x800) {
CK((*filter->output_function)(0x1B, filter->data)); /* ESC */
CK((*filter->output_function)('$', filter->data));
CK((*filter->output_function)('(', filter->data));
CK((*filter->output_function)('?', filter->data));
}
filter->status = 0x800;
CK((*filter->output_function)(((s1 >> 8) - 0x5E) & 0x7F, filter->data));
CK((*filter->output_function)(s1 & 0x7F, filter->data));
}
} else {
CK(mbfl_filt_conv_illegal_output(c, filter));
}
return 0;
}
int mbfl_filt_conv_any_2022jpms_flush(mbfl_convert_filter *filter)
{
/* Go back to ASCII (so strings can be safely concatenated) */
if ((filter->status & 0xFF00) != 0) {
CK((*filter->output_function)(0x1B, filter->data)); /* ESC */
CK((*filter->output_function)('(', filter->data));
CK((*filter->output_function)('B', filter->data));
}
filter->status = 0;
if (filter->flush_function) {
(*filter->flush_function)(filter->data);
}
return 0;
}
static size_t mb_iso2022jpms_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
{
unsigned char *p = *in, *e = p + *in_len;
uint32_t *out = buf, *limit = buf + bufsize;
while (p < e && out < limit) {
unsigned char c = *p++;
if (c == 0x1B) {
if ((e - p) < 2) {
*out++ = MBFL_BAD_INPUT;
p = e;
break;
}
unsigned char c2 = *p++;
unsigned char c3 = *p++;
if (c2 == '$') {
if (c3 == '@' || c3 == 'B') {
*state = JISX0208_KANJI;
} else if (c3 == '(' && p < e) {
unsigned char c4 = *p++;
if (c4 == '@' || c4 == 'B') {
*state = JISX0208_KANJI;
} else if (c4 == '?') {
*state = UDC;
} else {
*out++ = MBFL_BAD_INPUT;
}
} else {
*out++ = MBFL_BAD_INPUT;
}
} else if (c2 == '(') {
if (c3 == 'B' || c3 == 'J') {
*state = ASCII;
} else if (c3 == 'I') {
*state = JISX0201_KANA;
} else {
*out++ = MBFL_BAD_INPUT;
}
} else {
p--;
*out++ = MBFL_BAD_INPUT;
}
} else if (*state == JISX0201_KANA && c >= 0x21 && c <= 0x5F) {
*out++ = 0xFF40 + c;
} else if ((*state == JISX0208_KANJI || *state == UDC) && c >= 0x21 && c <= 0x7F) {
if (p == e) {
*out++ = MBFL_BAD_INPUT;
break;
}
unsigned char c2 = *p++;
unsigned int w = 0;
if (c2 >= 0x21 && c2 <= 0x7E) {
unsigned int s = ((c - 0x21) * 94) + c2 - 0x21;
if (*state == JISX0208_KANJI) {
if (s <= 137) {
if (s == 31) {
w = 0xFF3C; /* FULLWIDTH REVERSE SOLIDUS */
} else if (s == 32) {
w = 0xFF5E; /* FULLWIDTH TILDE */
} else if (s == 33) {
w = 0x2225; /* PARALLEL TO */
} else if (s == 60) {
w = 0xFF0D; /* FULLWIDTH HYPHEN-MINUS */
} else if (s == 80) {
w = 0xFFE0; /* FULLWIDTH CENT SIGN */
} else if (s == 81) {
w = 0xFFE1; /* FULLWIDTH POUND SIGN */
} else if (s == 137) {
w = 0xFFE2; /* FULLWIDTH NOT SIGN */
}
}
if (!w) {
if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) {
w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min];
} else if (s < jisx0208_ucs_table_size) {
w = jisx0208_ucs_table[s];
} else if (s >= cp932ext2_ucs_table_min && s < cp932ext2_ucs_table_max) {
w = cp932ext2_ucs_table[s - cp932ext2_ucs_table_min];
}
}
} else if (c >= 0x21 && c <= 0x34) {
w = 0xE000 + ((c - 0x21) * 94) + c2 - 0x21;
}
*out++ = w ? w : MBFL_BAD_INPUT;
} else {
*out++ = MBFL_BAD_INPUT;
}
} else if (c <= 0x7F) {
*out++ = c;
} else if (c >= 0xA1 && c <= 0xDF) {
*out++ = 0xFEC0 + c;
} else {
*out++ = MBFL_BAD_INPUT;
}
}
*in_len = e - p;
*in = p;
return out - buf;
}
static void mb_wchar_to_iso2022jpms(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
{
unsigned char *out, *limit;
MB_CONVERT_BUF_LOAD(buf, out, limit);
MB_CONVERT_BUF_ENSURE(buf, out, limit, len);
while (len--) {
uint32_t w = *in++;
unsigned int s = 0;
if (w >= ucs_a1_jis_table_min && w < ucs_a1_jis_table_max) {
s = ucs_a1_jis_table[w - ucs_a1_jis_table_min];
} else if (w >= ucs_a2_jis_table_min && w < ucs_a2_jis_table_max) {
s = ucs_a2_jis_table[w - ucs_a2_jis_table_min];
} else if (w >= ucs_i_jis_table_min && w < ucs_i_jis_table_max) {
s = ucs_i_jis_table[w - ucs_i_jis_table_min];
} else if (w >= ucs_r_jis_table_min && w < ucs_r_jis_table_max) {
s = ucs_r_jis_table[w - ucs_r_jis_table_min];
} else if (w >= 0xE000 && w < (0xE000 + 20*94)) {
/* Private User Area (95ku - 114ku) */
s = ((((w - 0xE000) / 94) + 0x7F) << 8) | (((w - 0xE000) % 94) + 0x21);
}
if (!s) {
if (w == 0xA5) { /* YEN SIGN */
s = 0x216F; /* FULLWIDTH YEN SIGN */
} else if (w == 0xFF3C) { /* FULLWIDTH REVERSE SOLIDUS */
s = 0x2140;
} else if (w == 0x2225) { /* PARALLEL TO */
s = 0x2142;
} else if (w == 0xFF0D) { /* FULLWIDTH HYPHEN-MINUS */
s = 0x215D;
} else if (w == 0xFFE0) { /* FULLWIDTH CENT SIGN */
s = 0x2171;
} else if (w == 0xFFE1) { /* FULLWIDTH POUND SIGN */
s = 0x2172;
} else if (w == 0xFFE2) { /* FULLWIDTH NOT SIGN */
s = 0x224C;
}
}
if (s >= 0xA1A1) /* JISX 0212 */
s = 0;
if (!s && w) {
for (int i = 0; i < cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; i++) {
if (w == cp932ext1_ucs_table[i]) {
s = (((i / 94) + 0x2D) << 8) + (i % 94) + 0x21;
break;
}
}
if (!s) {
for (int i = 0; i < cp932ext3_ucs_table_max - cp932ext3_ucs_table_min; i++) {
if (w == cp932ext3_ucs_table[i]) {
s = cp932ext3_cp932ext2_jis(i);
break;
}
}
}
}
if (!s && w) {
MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_iso2022jpms);
MB_CONVERT_BUF_ENSURE(buf, out, limit, len);
} else if (s <= 0x7F) {
if (buf->state != ASCII) {
MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 4);
out = mb_convert_buf_add3(out, 0x1B, '(', 'B');
buf->state = ASCII;
}
out = mb_convert_buf_add(out, s);
} else if (s >= 0xA1 && s <= 0xDF) {
if (buf->state != JISX0201_KANA) {
MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 4);
out = mb_convert_buf_add3(out, 0x1B, '(', 'I');
buf->state = JISX0201_KANA;
}
out = mb_convert_buf_add(out, s & 0x7F);
} else if (s <= 0x7E7E) {
if (buf->state != JISX0208_KANJI) {
MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 5);
out = mb_convert_buf_add3(out, 0x1B, '$', 'B');
buf->state = JISX0208_KANJI;
} else {
MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2);
}
out = mb_convert_buf_add2(out, (s >> 8) & 0xFF, s & 0x7F);
} else if (s < 0x927F) {
if (buf->state != UDC) {
MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 6);
out = mb_convert_buf_add4(out, 0x1B, '$', '(', '?');
buf->state = UDC;
} else {
MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2);
}
out = mb_convert_buf_add2(out, ((s >> 8) - 0x5E) & 0x7F, s & 0x7F);
} else {
MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_iso2022jpms);
MB_CONVERT_BUF_ENSURE(buf, out, limit, len);
}
}
if (end && buf->state != ASCII) {
MB_CONVERT_BUF_ENSURE(buf, out, limit, 3);
out = mb_convert_buf_add3(out, 0x1B, '(', 'B');
}
MB_CONVERT_BUF_STORE(buf, out, limit);
}

View file

@ -1,43 +0,0 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* The source code included in this files was separated from mbfilter_ja.c
* by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
*
*/
#ifndef MBFL_MBFILTER_ISO2022_JP_MS_H
#define MBFL_MBFILTER_ISO2022_JP_MS_H
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_2022jpms;
extern const struct mbfl_convert_vtbl vtbl_2022jpms_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_2022jpms;
int mbfl_filt_conv_2022jpms_wchar(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_wchar_2022jpms(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_any_2022jpms_flush(mbfl_convert_filter *filter);
#endif /* MBFL_MBFILTER_ISO2022_JP_MS_H */

View file

@ -1,431 +0,0 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* The source code included in this files was separated from mbfilter_kr.c
* by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
*
*/
/* ISO-2022-KR is defined in RFC 1557
*
* The RFC says that ESC $ ) C must appear once in a ISO-2022-KR string,
* at the beginning of a line, before any instances of the Shift In or
* Shift Out bytes which are used to switch between ASCII/KSC 5601 modes
*
* We don't enforce that for ISO-2022-KR input */
#include "mbfilter.h"
#include "mbfilter_iso2022_kr.h"
#include "unicode_table_uhc.h"
static int mbfl_filt_conv_2022kr_wchar_flush(mbfl_convert_filter *filter);
static int mbfl_filt_conv_any_2022kr_flush(mbfl_convert_filter *filter);
static size_t mb_iso2022kr_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
static void mb_wchar_to_iso2022kr(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
const mbfl_encoding mbfl_encoding_2022kr = {
mbfl_no_encoding_2022kr,
"ISO-2022-KR",
"ISO-2022-KR",
NULL,
NULL,
MBFL_ENCTYPE_GL_UNSAFE,
&vtbl_2022kr_wchar,
&vtbl_wchar_2022kr,
mb_iso2022kr_to_wchar,
mb_wchar_to_iso2022kr,
NULL
};
const struct mbfl_convert_vtbl vtbl_wchar_2022kr = {
mbfl_no_encoding_wchar,
mbfl_no_encoding_2022kr,
mbfl_filt_conv_common_ctor,
NULL,
mbfl_filt_conv_wchar_2022kr,
mbfl_filt_conv_any_2022kr_flush,
NULL,
};
const struct mbfl_convert_vtbl vtbl_2022kr_wchar = {
mbfl_no_encoding_2022kr,
mbfl_no_encoding_wchar,
mbfl_filt_conv_common_ctor,
NULL,
mbfl_filt_conv_2022kr_wchar,
mbfl_filt_conv_2022kr_wchar_flush,
NULL,
};
#define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
int mbfl_filt_conv_2022kr_wchar(int c, mbfl_convert_filter *filter)
{
int w = 0;
switch (filter->status & 0xf) {
/* case 0x00: ASCII */
/* case 0x10: KSC5601 */
case 0:
if (c == 0x1b) { /* ESC */
filter->status += 2;
} else if (c == 0x0f) { /* shift in (ASCII) */
filter->status = 0;
} else if (c == 0x0e) { /* shift out (KSC5601) */
filter->status = 0x10;
} else if ((filter->status & 0x10) && c > 0x20 && c < 0x7f) {
/* KSC5601 lead byte */
filter->cache = c;
filter->status = 0x11;
} else if ((filter->status & 0x10) == 0 && c >= 0 && c < 0x80) {
/* latin, CTLs */
CK((*filter->output_function)(c, filter->data));
} else {
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
}
break;
case 1: /* dbcs second byte */
filter->status = 0x10;
int c1 = filter->cache;
int flag = 0;
if (c1 > 0x20 && c1 < 0x47) {
flag = 1;
} else if (c1 >= 0x47 && c1 <= 0x7e && c1 != 0x49) {
flag = 2;
}
if (flag > 0 && c > 0x20 && c < 0x7f) {
if (flag == 1) {
if (c1 != 0x22 || c <= 0x65) {
w = (c1 - 1)*190 + (c - 0x41) + 0x80;
ZEND_ASSERT(w < uhc1_ucs_table_size);
w = uhc1_ucs_table[w];
}
} else {
w = (c1 - 0x47)*94 + c - 0x21;
if (w < uhc3_ucs_table_size) {
w = uhc3_ucs_table[w];
} else {
w = MBFL_BAD_INPUT;
}
}
if (w <= 0) {
w = MBFL_BAD_INPUT;
}
CK((*filter->output_function)(w, filter->data));
} else {
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
}
break;
case 2: /* ESC */
if (c == '$') {
filter->status++;
} else {
filter->status &= ~0xF;
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
}
break;
case 3: /* ESC $ */
if (c == ')') {
filter->status++;
} else {
filter->status &= ~0xF;
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
}
break;
case 4: /* ESC $ ) */
filter->status = 0;
if (c != 'C') {
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
}
break;
EMPTY_SWITCH_DEFAULT_CASE();
}
return 0;
}
static int mbfl_filt_conv_2022kr_wchar_flush(mbfl_convert_filter *filter)
{
if (filter->status & 0xF) {
/* 2-byte character was truncated */
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
}
filter->status = 0;
if (filter->flush_function) {
(*filter->flush_function)(filter->data);
}
return 0;
}
int mbfl_filt_conv_wchar_2022kr(int c, mbfl_convert_filter *filter)
{
int c1, c2, s = 0;
if ((filter->status & 0x100) == 0) {
CK((*filter->output_function)(0x1b, filter->data)); /* ESC */
CK((*filter->output_function)('$', filter->data));
CK((*filter->output_function)(')', filter->data));
CK((*filter->output_function)('C', filter->data));
filter->status |= 0x100;
}
if (c >= ucs_a1_uhc_table_min && c < ucs_a1_uhc_table_max) {
s = ucs_a1_uhc_table[c - ucs_a1_uhc_table_min];
} else if (c >= ucs_a2_uhc_table_min && c < ucs_a2_uhc_table_max) {
s = ucs_a2_uhc_table[c - ucs_a2_uhc_table_min];
} else if (c >= ucs_a3_uhc_table_min && c < ucs_a3_uhc_table_max) {
s = ucs_a3_uhc_table[c - ucs_a3_uhc_table_min];
} else if (c >= ucs_i_uhc_table_min && c < ucs_i_uhc_table_max) {
s = ucs_i_uhc_table[c - ucs_i_uhc_table_min];
} else if (c >= ucs_s_uhc_table_min && c < ucs_s_uhc_table_max) {
s = ucs_s_uhc_table[c - ucs_s_uhc_table_min];
} else if (c >= ucs_r1_uhc_table_min && c < ucs_r1_uhc_table_max) {
s = ucs_r1_uhc_table[c - ucs_r1_uhc_table_min];
} else if (c >= ucs_r2_uhc_table_min && c < ucs_r2_uhc_table_max) {
s = ucs_r2_uhc_table[c - ucs_r2_uhc_table_min];
}
c1 = (s >> 8) & 0xff;
c2 = s & 0xff;
/* exclude UHC extension area */
if (c1 < 0xa1 || c2 < 0xa1) {
s = c;
} else if (s & 0x8000) {
s -= 0x8080;
}
if (s <= 0) {
if (c == 0) {
s = 0;
} else {
s = -1;
}
} else if ((s >= 0x80 && s < 0x2121) || (s > 0x8080)) {
s = -1;
}
if (s >= 0) {
if (s < 0x80 && s >= 0) { /* ASCII */
if (filter->status & 0x10) {
CK((*filter->output_function)(0x0f, filter->data)); /* shift in */
filter->status &= ~0x10;
}
CK((*filter->output_function)(s, filter->data));
} else {
if ((filter->status & 0x10) == 0) {
CK((*filter->output_function)(0x0e, filter->data)); /* shift out */
filter->status |= 0x10;
}
CK((*filter->output_function)((s >> 8) & 0xff, filter->data));
CK((*filter->output_function)(s & 0xff, filter->data));
}
} else {
CK(mbfl_filt_conv_illegal_output(c, filter));
}
return 0;
}
static int mbfl_filt_conv_any_2022kr_flush(mbfl_convert_filter *filter)
{
if (filter->status & 0xF) {
/* Escape sequence or 2-byte character was truncated */
(*filter->output_function)(MBFL_BAD_INPUT, filter->data);
}
/* back to ascii */
if (filter->status & 0x10) {
CK((*filter->output_function)(0x0f, filter->data)); /* shift in */
}
filter->status = filter->cache = 0;
if (filter->flush_function) {
return (*filter->flush_function)(filter->data);
}
return 0;
}
#define ASCII 0
#define KSC5601 1
static size_t mb_iso2022kr_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
{
unsigned char *p = *in, *e = p + *in_len;
uint32_t *out = buf, *limit = buf + bufsize;
while (p < e && out < limit) {
unsigned char c = *p++;
if (c == 0x1B) {
if ((e - p) < 3) {
*out++ = MBFL_BAD_INPUT;
if (p < e && *p++ == '$') {
if (p < e) {
p++;
}
}
continue;
}
unsigned char c2 = *p++;
unsigned char c3 = *p++;
unsigned char c4 = *p++;
if (c2 == '$' && c3 == ')' && c4 == 'C') {
*state = ASCII;
} else {
if (c3 != ')') {
p--;
if (c2 != '$')
p--;
}
*out++ = MBFL_BAD_INPUT;
}
} else if (c == 0xF) {
*state = ASCII;
} else if (c == 0xE) {
*state = KSC5601;
} else if (c >= 0x21 && c <= 0x7E && *state == KSC5601) {
if (p == e) {
*out++ = MBFL_BAD_INPUT;
break;
}
unsigned char c2 = *p++;
unsigned int w = 0;
if (c2 < 0x21 || c2 > 0x7E) {
*out++ = MBFL_BAD_INPUT;
continue;
}
if (c < 0x47) {
if (c != 0x22 || c2 <= 0x65) {
w = (c - 1)*190 + c2 - 0x41 + 0x80;
ZEND_ASSERT(w < uhc1_ucs_table_size);
w = uhc1_ucs_table[w];
}
} else if (c != 0x49 && c <= 0x7D) {
w = (c - 0x47)*94 + c2 - 0x21;
ZEND_ASSERT(w < uhc3_ucs_table_size);
w = uhc3_ucs_table[w];
}
if (!w)
w = MBFL_BAD_INPUT;
*out++ = w;
} else if (c < 0x80 && *state == ASCII) {
*out++ = c;
} else {
*out++ = MBFL_BAD_INPUT;
}
}
*in_len = e - p;
*in = p;
return out - buf;
}
#define EMITTED_ESC_SEQUENCE 0x10
static void mb_wchar_to_iso2022kr(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
{
unsigned char *out, *limit;
MB_CONVERT_BUF_LOAD(buf, out, limit);
/* This escape sequence needs to come *somewhere* at the beginning of a line before
* we can use the Shift In/Shift Out bytes, but it only needs to come once in a string
* Rather than tracking newlines, we can just emit the sequence once at the beginning
* of the output string... since that will always be "the beginning of a line" */
if (len && !(buf->state & EMITTED_ESC_SEQUENCE)) {
MB_CONVERT_BUF_ENSURE(buf, out, limit, 4 + len);
out = mb_convert_buf_add4(out, 0x1B, '$', ')', 'C');
buf->state |= EMITTED_ESC_SEQUENCE;
} else {
MB_CONVERT_BUF_ENSURE(buf, out, limit, len);
}
while (len--) {
uint32_t w = *in++;
unsigned int s = 0;
if (w >= ucs_a1_uhc_table_min && w < ucs_a1_uhc_table_max) {
s = ucs_a1_uhc_table[w - ucs_a1_uhc_table_min];
} else if (w >= ucs_a2_uhc_table_min && w < ucs_a2_uhc_table_max) {
s = ucs_a2_uhc_table[w - ucs_a2_uhc_table_min];
} else if (w >= ucs_a3_uhc_table_min && w < ucs_a3_uhc_table_max) {
s = ucs_a3_uhc_table[w - ucs_a3_uhc_table_min];
} else if (w >= ucs_i_uhc_table_min && w < ucs_i_uhc_table_max) {
s = ucs_i_uhc_table[w - ucs_i_uhc_table_min];
} else if (w >= ucs_s_uhc_table_min && w < ucs_s_uhc_table_max) {
s = ucs_s_uhc_table[w - ucs_s_uhc_table_min];
} else if (w >= ucs_r1_uhc_table_min && w < ucs_r1_uhc_table_max) {
s = ucs_r1_uhc_table[w - ucs_r1_uhc_table_min];
} else if (w >= ucs_r2_uhc_table_min && w < ucs_r2_uhc_table_max) {
s = ucs_r2_uhc_table[w - ucs_r2_uhc_table_min];
}
if (((s >> 8) & 0xFF) < 0xA1 || (s & 0xFF) < 0xA1) {
s = w;
} else {
s -= 0x8080;
}
if ((s >= 0x80 && s < 0x2121) || (s > 0x8080)) {
MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_iso2022kr);
MB_CONVERT_BUF_ENSURE(buf, out, limit, len);
} else if (s < 0x80) {
if ((buf->state & 1) != ASCII) {
MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2);
out = mb_convert_buf_add(out, 0xF);
buf->state &= ~KSC5601;
}
out = mb_convert_buf_add(out, s);
} else {
if ((buf->state & 1) != KSC5601) {
MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 3);
out = mb_convert_buf_add(out, 0xE);
buf->state |= KSC5601;
} else {
MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2);
}
out = mb_convert_buf_add2(out, (s >> 8) & 0xFF, s & 0xFF);
}
}
if (end && (buf->state & 1) != ASCII) {
MB_CONVERT_BUF_ENSURE(buf, out, limit, 1);
out = mb_convert_buf_add(out, 0xF);
}
MB_CONVERT_BUF_STORE(buf, out, limit);
}

View file

@ -1,42 +0,0 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* The source code included in this files was separated from mbfilter_kr.h
* by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
*
*/
#ifndef MBFL_MBFILTER_ISO2022_KR_H
#define MBFL_MBFILTER_ISO2022_KR_H
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_2022kr;
extern const struct mbfl_convert_vtbl vtbl_wchar_2022kr;
extern const struct mbfl_convert_vtbl vtbl_2022kr_wchar;
int mbfl_filt_conv_2022kr_wchar(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_wchar_2022kr(int c, mbfl_convert_filter *filter);
#endif /* MBFL_MBFILTER_ISO2022_KR_H */

View file

@ -1,757 +0,0 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* The source code included in this file was separated from mbfilter_iso2022_jp_ms.c
* by Rui Hirokawa <hirokawa@php.net> on 25 July 2011.
*
*/
#include "mbfilter.h"
#include "mbfilter_iso2022jp_mobile.h"
#include "unicode_table_cp932_ext.h"
#include "unicode_table_jis.h"
#include "cp932_table.h"
#include "emoji2uni.h"
static size_t mb_iso2022jp_kddi_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
static void mb_wchar_to_iso2022jp_kddi(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
static int mbfl_filt_conv_2022jp_mobile_wchar(int c, mbfl_convert_filter *filter);
static int mbfl_filt_conv_wchar_2022jp_mobile(int c, mbfl_convert_filter *filter);
static int mbfl_filt_conv_2022jp_mobile_wchar_flush(mbfl_convert_filter *filter);
static int mbfl_filt_conv_wchar_2022jp_mobile_flush(mbfl_convert_filter *filter);
extern int mbfl_bisec_srch2(int w, const unsigned short tbl[], int n);
/* Regional Indicator Unicode codepoints are from 0x1F1E6-0x1F1FF
* These correspond to the letters A-Z
* To display the flag emoji for a country, two unicode codepoints are combined,
* which correspond to the two-letter code for that country
* This macro converts uppercase ASCII values to Regional Indicator codepoints */
#define NFLAGS(c) (0x1F1A5+((unsigned int)(c)))
static const char nflags_s[10][2] = {
"CN","DE","ES","FR","GB","IT","JP","KR","RU","US"
};
static const int nflags_code_kddi[10] = {
0x2549, 0x2546, 0x24C0, 0x2545, 0x2548, 0x2547, 0x2750, 0x254A, 0x24C1, 0x27F7
};
static const char *mbfl_encoding_2022jp_kddi_aliases[] = {"ISO-2022-JP-KDDI", NULL};
const mbfl_encoding mbfl_encoding_2022jp_kddi = {
mbfl_no_encoding_2022jp_kddi,
"ISO-2022-JP-MOBILE#KDDI",
"ISO-2022-JP",
mbfl_encoding_2022jp_kddi_aliases,
NULL,
MBFL_ENCTYPE_GL_UNSAFE,
&vtbl_2022jp_kddi_wchar,
&vtbl_wchar_2022jp_kddi,
mb_iso2022jp_kddi_to_wchar,
mb_wchar_to_iso2022jp_kddi,
NULL
};
const struct mbfl_convert_vtbl vtbl_2022jp_kddi_wchar = {
mbfl_no_encoding_2022jp_kddi,
mbfl_no_encoding_wchar,
mbfl_filt_conv_common_ctor,
NULL,
mbfl_filt_conv_2022jp_mobile_wchar,
mbfl_filt_conv_2022jp_mobile_wchar_flush,
NULL,
};
const struct mbfl_convert_vtbl vtbl_wchar_2022jp_kddi = {
mbfl_no_encoding_wchar,
mbfl_no_encoding_2022jp_kddi,
mbfl_filt_conv_common_ctor,
NULL,
mbfl_filt_conv_wchar_2022jp_mobile,
mbfl_filt_conv_wchar_2022jp_mobile_flush,
NULL,
};
#define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
#define SJIS_ENCODE(c1,c2,s1,s2) \
do { \
s1 = ((c1 - 1) >> 1) + ((c1) < 0x5F ? 0x71 : 0xB1); \
s2 = c2; \
if ((c1) & 1) { \
if ((c2) < 0x60) { \
s2--; \
} \
s2 += 0x20; \
} else { \
s2 += 0x7e; \
} \
} while (0)
#define SJIS_DECODE(c1,c2,s1,s2) \
do { \
if (c1 < 0xa0) { \
s1 = ((c1 - 0x81) << 1) + 0x21; \
} else { \
s1 = ((c1 - 0xc1) << 1) + 0x21; \
} \
s2 = c2; \
if (c2 < 0x9f) { \
if (c2 < 0x7f) { \
s2++; \
} \
s2 -= 0x20; \
} else { \
s1++; \
s2 -= 0x7e; \
} \
} while (0)
/* (ku*94)+ten value -> Shift-JIS byte sequence */
#define CODE2JIS(c1,c2,s1,s2) \
c1 = (s1)/94+0x21; \
c2 = (s1)-94*((c1)-0x21)+0x21; \
s1 = ((c1) << 8) | (c2); \
s2 = 1
#define ASCII 0
#define JISX0201_KANA 0x20
#define JISX0208_KANJI 0x80
#define EMIT_KEYPAD_EMOJI(c) do { *snd = (c); return 0x20E3; } while(0)
#define EMIT_FLAG_EMOJI(country) do { *snd = NFLAGS((country)[0]); return NFLAGS((country)[1]); } while(0)
static const char nflags_kddi[6][2] = {"FR", "DE", "IT", "GB", "CN", "KR"};
static inline int convert_emoji_cp(int cp)
{
if (cp > 0xF000)
return cp + 0x10000;
if (cp > 0xE000)
return cp + 0xF0000;
return cp;
}
static int mbfilter_sjis_emoji_kddi2unicode(int s, int *snd)
{
if (s >= mb_tbl_code2uni_kddi1_min && s <= mb_tbl_code2uni_kddi1_max) {
if (s == 0x24C0) { /* Spain */
EMIT_FLAG_EMOJI("ES");
} else if (s == 0x24C1) { /* Russia */
EMIT_FLAG_EMOJI("RU");
} else if (s >= 0x2545 && s <= 0x254A) {
EMIT_FLAG_EMOJI(nflags_kddi[s - 0x2545]);
} else if (s == 0x25BC) {
EMIT_KEYPAD_EMOJI('#');
} else {
*snd = 0;
return convert_emoji_cp(mb_tbl_code2uni_kddi1[s - mb_tbl_code2uni_kddi1_min]);
}
} else if (s >= mb_tbl_code2uni_kddi2_min && s <= mb_tbl_code2uni_kddi2_max) {
if (s == 0x2750) { /* Japan */
EMIT_FLAG_EMOJI("JP");
} else if (s >= 0x27A6 && s <= 0x27AE) {
EMIT_KEYPAD_EMOJI(s - 0x27A6 + '1');
} else if (s == 0x27F7) { /* United States */
EMIT_FLAG_EMOJI("US");
} else if (s == 0x2830) {
EMIT_KEYPAD_EMOJI('0');
} else {
*snd = 0;
return convert_emoji_cp(mb_tbl_code2uni_kddi2[s - mb_tbl_code2uni_kddi2_min]);
}
}
return 0;
}
static int mbfl_filt_conv_2022jp_mobile_wchar(int c, mbfl_convert_filter *filter)
{
int c1, s, w, snd = 0;
switch (filter->status & 0xF) {
case 0:
if (c == 0x1B) {
filter->status += 2;
} else if (filter->status == JISX0201_KANA && c > 0x20 && c < 0x60) {
CK((*filter->output_function)(0xFF40 + c, filter->data));
} else if (filter->status == JISX0208_KANJI && c > 0x20 && c < 0x80) {
filter->cache = c;
filter->status += 1;
} else if (c >= 0 && c < 0x80) { /* ASCII */
CK((*filter->output_function)(c, filter->data));
} else if (c > 0xA0 && c < 0xE0) { /* Kana */
CK((*filter->output_function)(0xFEC0 + c, filter->data));
} else {
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
}
break;
/* JISX 0208, second byte */
case 1:
w = 0;
filter->status &= ~0xF;
c1 = filter->cache;
if (c > 0x20 && c < 0x7F) {
s = ((c1 - 0x21) * 94) + c - 0x21;
if (s <= 137) {
if (s == 31) {
w = 0xFF3C; /* FULLWIDTH REVERSE SOLIDUS */
} else if (s == 32) {
w = 0xFF5E; /* FULLWIDTH TILDE */
} else if (s == 33) {
w = 0x2225; /* PARALLEL TO */
} else if (s == 60) {
w = 0xFF0D; /* FULLWIDTH HYPHEN-MINUS */
} else if (s == 80) {
w = 0xFFE0; /* FULLWIDTH CENT SIGN */
} else if (s == 81) {
w = 0xFFE1; /* FULLWIDTH POUND SIGN */
} else if (s == 137) {
w = 0xFFE2; /* FULLWIDTH NOT SIGN */
}
}
if (s >= (84 * 94) && s < (91 * 94)) {
s += 22 * 94;
w = mbfilter_sjis_emoji_kddi2unicode(s, &snd);
if (w > 0 && snd > 0) {
(*filter->output_function)(snd, filter->data);
}
}
if (w == 0) {
if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) {
w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min];
} else if (s >= 0 && s < jisx0208_ucs_table_size) {
w = jisx0208_ucs_table[s];
}
}
if (w <= 0) {
w = MBFL_BAD_INPUT;
}
CK((*filter->output_function)(w, filter->data));
} else {
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
}
break;
/* ESC */
case 2:
if (c == '$') {
filter->status++;
} else if (c == '(') {
filter->status += 3;
} else {
filter->status &= ~0xF;
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
}
break;
/* ESC $ */
case 3:
if (c == '@' || c == 'B') {
filter->status = JISX0208_KANJI;
} else if (c == '(') {
filter->status++;
} else {
filter->status &= ~0xF;
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
}
break;
/* ESC $ ( */
case 4:
if (c == '@' || c == 'B') {
filter->status = JISX0208_KANJI;
} else {
filter->status &= ~0xF;
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
}
break;
/* ESC ( */
case 5:
if (c == 'B' || c == 'J') {
filter->status = 0; /* ASCII mode */
} else if (c == 'I') {
filter->status = JISX0201_KANA;
} else {
filter->status &= ~0xF;
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
}
}
return 0;
}
static int mbfl_filt_conv_2022jp_mobile_wchar_flush(mbfl_convert_filter *filter)
{
if (filter->status & 0xF) {
(*filter->output_function)(MBFL_BAD_INPUT, filter->data);
}
filter->status = 0;
if (filter->flush_function) {
(*filter->flush_function)(filter->data);
}
return 0;
}
static int mbfilter_unicode2sjis_emoji_kddi(int c, int *s1, mbfl_convert_filter *filter)
{
if ((filter->status & 0xF) == 1) {
int c1 = filter->cache;
filter->cache = 0;
filter->status &= ~0xFF;
if (c == 0x20E3) {
if (c1 == '#') {
*s1 = 0x25BC;
} else if (c1 == '0') {
*s1 = 0x2830;
} else { /* Previous character was '1'-'9' */
*s1 = 0x27A6 + (c1 - '1');
}
return 1;
} else {
if (filter->status & 0xFF00) {
CK((*filter->output_function)(0x1B, filter->data)); /* ESC */
CK((*filter->output_function)('(', filter->data));
CK((*filter->output_function)('B', filter->data));
}
CK((*filter->output_function)(c1, filter->data));
filter->status = 0;
}
}
if (c == '#' || (c >= '0' && c <= '9')) {
filter->status |= 1;
filter->cache = c;
return 0;
}
if (c == 0xA9) { /* Copyright sign */
*s1 = 0x27DC;
return 1;
} else if (c == 0xAE) { /* Registered sign */
*s1 = 0x27DD;
return 1;
} else if (c >= mb_tbl_uni_kddi2code2_min && c <= mb_tbl_uni_kddi2code2_max) {
int i = mbfl_bisec_srch2(c, mb_tbl_uni_kddi2code2_key, mb_tbl_uni_kddi2code2_len);
if (i >= 0) {
*s1 = mb_tbl_uni_kddi2code2_value[i];
return 1;
}
} else if (c >= mb_tbl_uni_kddi2code3_min && c <= mb_tbl_uni_kddi2code3_max) {
int i = mbfl_bisec_srch2(c - 0x10000, mb_tbl_uni_kddi2code3_key, mb_tbl_uni_kddi2code3_len);
if (i >= 0) {
*s1 = mb_tbl_uni_kddi2code3_value[i];
return 1;
}
} else if (c >= mb_tbl_uni_kddi2code5_min && c <= mb_tbl_uni_kddi2code5_max) {
int i = mbfl_bisec_srch2(c - 0xF0000, mb_tbl_uni_kddi2code5_key, mb_tbl_uni_kddi2code5_len);
if (i >= 0) {
*s1 = mb_tbl_uni_kddi2code5_val[i];
return 1;
}
}
return 0;
}
static int mbfl_filt_conv_wchar_2022jp_mobile(int c, mbfl_convert_filter *filter)
{
int c1, c2, s1 = 0, s2 = 0;
if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) {
s1 = ucs_a1_jis_table[c - ucs_a1_jis_table_min];
} else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) {
s1 = ucs_a2_jis_table[c - ucs_a2_jis_table_min];
} else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) {
s1 = ucs_i_jis_table[c - ucs_i_jis_table_min];
} else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) {
s1 = ucs_r_jis_table[c - ucs_r_jis_table_min];
}
if (s1 <= 0) {
if (c == 0xA5) { /* YEN SIGN */
s1 = 0x216F; /* FULLWIDTH YEN SIGN */
} else if (c == 0xFF3C) { /* FULLWIDTH REVERSE SOLIDUS */
s1 = 0x2140;
} else if (c == 0x2225) { /* PARALLEL TO */
s1 = 0x2142;
} else if (c == 0xFF0D) { /* FULLWIDTH HYPHEN-MINUS */
s1 = 0x215d;
} else if (c == 0xFFE0) { /* FULLWIDTH CENT SIGN */
s1 = 0x2171;
} else if (c == 0xFFE1) { /* FULLWIDTH POUND SIGN */
s1 = 0x2172;
} else if (c == 0xFFE2) { /* FULLWIDTH NOT SIGN */
s1 = 0x224c;
}
}
if (mbfilter_unicode2sjis_emoji_kddi(c, &s1, filter) > 0) {
/* A KDDI emoji was detected and stored in s1 */
CODE2JIS(c1,c2,s1,s2);
s1 -= 0x1600;
} else if ((filter->status & 0xFF) == 1 && filter->cache) {
/* We are just processing one of KDDI's special emoji for a phone keypad button */
return 0;
}
if ((s1 <= 0) || (s1 >= 0xa1a1 && s2 == 0)) { /* not found or X 0212 */
s1 = -1;
for (c1 = 0; c1 < cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; c1++) {
if (c == cp932ext1_ucs_table[c1]) {
s1 = (((c1 / 94) + 0x2D) << 8) + (c1 % 94) + 0x21;
break;
}
}
if (c == 0) {
s1 = 0;
}
}
if (s1 >= 0) {
if (s1 < 0x80) { /* ASCII */
if (filter->status & 0xFF00) {
CK((*filter->output_function)(0x1B, filter->data)); /* ESC */
CK((*filter->output_function)('(', filter->data));
CK((*filter->output_function)('B', filter->data));
}
CK((*filter->output_function)(s1, filter->data));
filter->status = 0;
} else if (s1 > 0xA0 && s1 < 0xE0) { /* Kana */
if ((filter->status & 0xFF00) != 0x100) {
CK((*filter->output_function)(0x1B, filter->data)); /* ESC */
CK((*filter->output_function)('(', filter->data));
CK((*filter->output_function)('I', filter->data));
}
filter->status = 0x100;
CK((*filter->output_function)(s1 & 0x7F, filter->data));
} else if (s1 < 0x7E7F) { /* JIS X 0208 */
if ((filter->status & 0xFF00) != 0x200) {
CK((*filter->output_function)(0x1B, filter->data)); /* ESC */
CK((*filter->output_function)('$', filter->data));
CK((*filter->output_function)('B', filter->data));
}
filter->status = 0x200;
CK((*filter->output_function)((s1 >> 8) & 0xFF, filter->data));
CK((*filter->output_function)(s1 & 0x7F, filter->data));
}
} else {
CK(mbfl_filt_conv_illegal_output(c, filter));
}
return 0;
}
static int mbfl_filt_conv_wchar_2022jp_mobile_flush(mbfl_convert_filter *filter)
{
/* Go back to ASCII mode (so strings can be safely concatenated) */
if (filter->status & 0xFF00) {
(*filter->output_function)(0x1B, filter->data); /* ESC */
(*filter->output_function)('(', filter->data);
(*filter->output_function)('B', filter->data);
}
int c1 = filter->cache;
if ((filter->status & 0xFF) == 1 && (c1 == '#' || (c1 >= '0' && c1 <= '9'))) {
(*filter->output_function)(c1, filter->data);
}
filter->status = filter->cache = 0;
if (filter->flush_function) {
(*filter->flush_function)(filter->data);
}
return 0;
}
static size_t mb_iso2022jp_kddi_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
{
unsigned char *p = *in, *e = p + *in_len;
uint32_t *out = buf, *limit = buf + bufsize - 1;
while (p < e && out < limit) {
unsigned char c = *p++;
if (c == 0x1B) {
if ((e - p) < 2) {
p = e;
*out++ = MBFL_BAD_INPUT;
break;
}
unsigned char c2 = *p++;
unsigned char c3 = *p++;
if (c2 == '$') {
if (c3 == '@' || c3 == 'B') {
*state = JISX0208_KANJI;
} else if (c3 == '(') {
if (p == e) {
*out++ = MBFL_BAD_INPUT;
break;
}
unsigned char c4 = *p++;
if (c4 == '@' || c4 == 'B') {
*state = JISX0208_KANJI;
} else {
*out++ = MBFL_BAD_INPUT;
}
} else {
*out++ = MBFL_BAD_INPUT;
}
} else if (c2 == '(') {
if (c3 == 'B' || c3 == 'J') {
*state = ASCII;
} else if (c3 == 'I') {
*state = JISX0201_KANA;
} else {
*out++ = MBFL_BAD_INPUT;
}
} else {
p--;
*out++ = MBFL_BAD_INPUT;
}
} else if (*state == JISX0201_KANA && c >= 0x21 && c <= 0x5F) {
*out++ = 0xFF40 + c;
} else if (*state == JISX0208_KANJI && c >= 0x21 && c <= 0x7F) {
if (p == e) {
*out++ = MBFL_BAD_INPUT;
break;
}
unsigned char c2 = *p++;
if (c2 >= 0x21 && c2 <= 0x7E) {
unsigned int s = ((c - 0x21) * 94) + c2 - 0x21;
uint32_t w = 0;
if (s <= 137) {
if (s == 31) {
w = 0xFF3C; /* FULLWIDTH REVERSE SOLIDUS */
} else if (s == 32) {
w = 0xFF5E; /* FULLWIDTH TILDE */
} else if (s == 33) {
w = 0x2225; /* PARALLEL TO */
} else if (s == 60) {
w = 0xFF0D; /* FULLWIDTH HYPHEN-MINUS */
} else if (s == 80) {
w = 0xFFE0; /* FULLWIDTH CENT SIGN */
} else if (s == 81) {
w = 0xFFE1; /* FULLWIDTH POUND SIGN */
} else if (s == 137) {
w = 0xFFE2; /* FULLWIDTH NOT SIGN */
}
}
if (s >= (84 * 94) && s < (91 * 94)) {
int snd = 0;
s += 22 * 94;
w = mbfilter_sjis_emoji_kddi2unicode(s, &snd);
if (w && snd) {
*out++ = snd;
}
}
if (!w) {
if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) {
w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min];
} else if (s < jisx0208_ucs_table_size) {
w = jisx0208_ucs_table[s];
}
}
*out++ = w ? w : MBFL_BAD_INPUT;
} else {
*out++ = MBFL_BAD_INPUT;
}
} else if (c <= 0x7F) {
*out++ = c;
} else if (c >= 0xA1 && c <= 0xDF) {
*out++ = 0xFEC0 + c;
} else {
*out++ = MBFL_BAD_INPUT;
}
}
*in_len = e - p;
*in = p;
return out - buf;
}
static void mb_wchar_to_iso2022jp_kddi(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
{
unsigned char *out, *limit;
MB_CONVERT_BUF_LOAD(buf, out, limit);
MB_CONVERT_BUF_ENSURE(buf, out, limit, len);
while (len--) {
uint32_t w = *in++;
unsigned int s = 0;
if (w >= ucs_a1_jis_table_min && w < ucs_a1_jis_table_max) {
s = ucs_a1_jis_table[w - ucs_a1_jis_table_min];
} else if (w >= ucs_a2_jis_table_min && w < ucs_a2_jis_table_max) {
s = ucs_a2_jis_table[w - ucs_a2_jis_table_min];
} else if (w >= ucs_i_jis_table_min && w < ucs_i_jis_table_max) {
s = ucs_i_jis_table[w - ucs_i_jis_table_min];
} else if (w >= ucs_r_jis_table_min && w < ucs_r_jis_table_max) {
s = ucs_r_jis_table[w - ucs_r_jis_table_min];
}
if (!s) {
if (w == 0xA5) { /* YEN SIGN */
s = 0x216F; /* FULLWIDTH YEN SIGN */
} else if (w == 0xFF3C) { /* FULLWIDTH REVERSE SOLIDUS */
s = 0x2140;
} else if (w == 0x2225) { /* PARALLEL TO */
s = 0x2142;
} else if (w == 0xFF0D) { /* FULLWIDTH HYPHEN-MINUS */
s = 0x215D;
} else if (w == 0xFFE0) { /* FULLWIDTH CENT SIGN */
s = 0x2171;
} else if (w == 0xFFE1) { /* FULLWIDTH POUND SIGN */
s = 0x2172;
} else if (w == 0xFFE2) { /* FULLWIDTH NOT SIGN */
s = 0x224C;
}
}
if ((w == '#' || (w >= '0' && w <= '9')) && len) {
uint32_t w2 = *in++; len--;
if (w2 == 0x20E3) {
unsigned int s1 = 0;
if (w == '#') {
s1 = 0x25BC;
} else if (w == '0') {
s1 = 0x2830;
} else { /* Previous character was '1'-'9' */
s1 = 0x27A6 + (w - '1');
}
s = (((s1 / 94) + 0x21) << 8) + ((s1 % 94) + 0x21) - 0x1600;
} else {
in--; len++;
}
} else if (w >= NFLAGS('C') && w <= NFLAGS('U') && len) { /* C for CN, U for US */
uint32_t w2 = *in++; len--;
if (w2 >= NFLAGS('B') && w2 <= NFLAGS('U')) { /* B for GB, U for RU */
for (int i = 0; i < 10; i++) {
if (w == NFLAGS(nflags_s[i][0]) && w2 == NFLAGS(nflags_s[i][1])) {
unsigned int s1 = nflags_code_kddi[i];
s = (((s1 / 94) + 0x21) << 8) + ((s1 % 94) + 0x21) - 0x1600;
goto found_flag_emoji;
}
}
}
in--; len++;
found_flag_emoji: ;
}
if (w == 0xA9) { /* Copyright sign */
unsigned int s1 = 0x27DC;
s = (((s1 / 94) + 0x21) << 8) + ((s1 % 94) + 0x21) - 0x1600;
} else if (w == 0xAE) { /* Registered sign */
unsigned int s1 = 0x27DD;
s = (((s1 / 94) + 0x21) << 8) + ((s1 % 94) + 0x21) - 0x1600;
} else if (w >= mb_tbl_uni_kddi2code2_min && w <= mb_tbl_uni_kddi2code2_max) {
int i = mbfl_bisec_srch2(w, mb_tbl_uni_kddi2code2_key, mb_tbl_uni_kddi2code2_len);
if (i >= 0) {
unsigned int s1 = mb_tbl_uni_kddi2code2_value[i];
s = (((s1 / 94) + 0x21) << 8) + ((s1 % 94) + 0x21) - 0x1600;
}
} else if (w >= mb_tbl_uni_kddi2code3_min && w <= mb_tbl_uni_kddi2code3_max) {
int i = mbfl_bisec_srch2(w - 0x10000, mb_tbl_uni_kddi2code3_key, mb_tbl_uni_kddi2code3_len);
if (i >= 0) {
unsigned int s1 = mb_tbl_uni_kddi2code3_value[i];
s = (((s1 / 94) + 0x21) << 8) + ((s1 % 94) + 0x21) - 0x1600;
}
} else if (w >= mb_tbl_uni_kddi2code5_min && w <= mb_tbl_uni_kddi2code5_max) {
int i = mbfl_bisec_srch2(w - 0xF0000, mb_tbl_uni_kddi2code5_key, mb_tbl_uni_kddi2code5_len);
if (i >= 0) {
unsigned int s1 = mb_tbl_uni_kddi2code5_val[i];
s = (((s1 / 94) + 0x21) << 8) + ((s1 % 94) + 0x21) - 0x1600;
}
}
if (!s || s >= 0xA1A1) {
s = 0;
for (int i = 0; i < cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; i++) {
if (w == cp932ext1_ucs_table[i]) {
s = (((i / 94) + 0x2D) << 8) + (i % 94) + 0x21;
break;
}
}
if (w == 0)
s = 0;
}
if (!s && w) {
MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_iso2022jp_kddi);
MB_CONVERT_BUF_ENSURE(buf, out, limit, len);
} else if (s <= 0x7F) {
if (buf->state != ASCII) {
MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 4);
out = mb_convert_buf_add3(out, 0x1B, '(', 'B');
buf->state = ASCII;
}
out = mb_convert_buf_add(out, s);
} else if (s >= 0xA1 && s <= 0xDF) {
if (buf->state != JISX0201_KANA) {
MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 4);
out = mb_convert_buf_add3(out, 0x1B, '(', 'I');
buf->state = JISX0201_KANA;
}
out = mb_convert_buf_add(out, s & 0x7F);
} else if (s <= 0x7E7E) {
if (buf->state != JISX0208_KANJI) {
MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 5);
out = mb_convert_buf_add3(out, 0x1B, '$', 'B');
buf->state = JISX0208_KANJI;
} else {
MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2);
}
out = mb_convert_buf_add2(out, (s >> 8) & 0xFF, s & 0xFF);
} else {
MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_iso2022jp_kddi);
MB_CONVERT_BUF_ENSURE(buf, out, limit, len);
}
}
if (end && buf->state != ASCII) {
MB_CONVERT_BUF_ENSURE(buf, out, limit, 3);
out = mb_convert_buf_add3(out, 0x1B, '(', 'B');
}
MB_CONVERT_BUF_STORE(buf, out, limit);
}

View file

@ -1,39 +0,0 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* The source code included in this files was separated from mbfilter_iso2022_jp_ms.h
* by Rui Hirokawa <hirokawa@php.net> on 25 July 2011.
*
*/
#ifndef MBFL_MBFILTER_ISO2022_JP_MOBILE_H
#define MBFL_MBFILTER_ISO2022_JP_MOBILE_H
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_2022jp_kddi;
extern const struct mbfl_convert_vtbl vtbl_2022jp_kddi_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_2022jp_kddi;
#endif /* MBFL_MBFILTER_ISO2022_JP_MOBILE_H */

View file

@ -1,944 +0,0 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* The source code included in this files was separated from mbfilter_ja.c
* by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
*
*/
#include "mbfilter.h"
#include "mbfilter_jis.h"
#include "unicode_table_cp932_ext.h"
#include "unicode_table_jis.h"
static int mbfl_filt_conv_jis_wchar_flush(mbfl_convert_filter *filter);
static size_t mb_iso2022jp_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
static void mb_wchar_to_iso2022jp(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
static void mb_wchar_to_jis(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
static bool mb_check_iso2022jp(unsigned char *in, size_t in_len);
static bool mb_check_jis(unsigned char *in, size_t in_len);
const mbfl_encoding mbfl_encoding_jis = {
mbfl_no_encoding_jis,
"JIS",
"ISO-2022-JP",
NULL,
NULL,
MBFL_ENCTYPE_GL_UNSAFE,
&vtbl_jis_wchar,
&vtbl_wchar_jis,
mb_iso2022jp_to_wchar,
mb_wchar_to_jis,
mb_check_jis
};
const mbfl_encoding mbfl_encoding_2022jp = {
mbfl_no_encoding_2022jp,
"ISO-2022-JP",
"ISO-2022-JP",
NULL,
NULL,
MBFL_ENCTYPE_GL_UNSAFE,
&vtbl_2022jp_wchar,
&vtbl_wchar_2022jp,
mb_iso2022jp_to_wchar,
mb_wchar_to_iso2022jp,
mb_check_iso2022jp
};
const struct mbfl_convert_vtbl vtbl_jis_wchar = {
mbfl_no_encoding_jis,
mbfl_no_encoding_wchar,
mbfl_filt_conv_common_ctor,
NULL,
mbfl_filt_conv_jis_wchar,
mbfl_filt_conv_jis_wchar_flush,
NULL,
};
const struct mbfl_convert_vtbl vtbl_wchar_jis = {
mbfl_no_encoding_wchar,
mbfl_no_encoding_jis,
mbfl_filt_conv_common_ctor,
NULL,
mbfl_filt_conv_wchar_jis,
mbfl_filt_conv_any_jis_flush,
NULL,
};
const struct mbfl_convert_vtbl vtbl_2022jp_wchar = {
mbfl_no_encoding_2022jp,
mbfl_no_encoding_wchar,
mbfl_filt_conv_common_ctor,
NULL,
mbfl_filt_conv_jis_wchar,
mbfl_filt_conv_jis_wchar_flush,
NULL,
};
const struct mbfl_convert_vtbl vtbl_wchar_2022jp = {
mbfl_no_encoding_wchar,
mbfl_no_encoding_2022jp,
mbfl_filt_conv_common_ctor,
NULL,
mbfl_filt_conv_wchar_2022jp,
mbfl_filt_conv_any_jis_flush,
NULL,
};
#define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
/*
* JIS => wchar
*/
int
mbfl_filt_conv_jis_wchar(int c, mbfl_convert_filter *filter)
{
int c1, s, w;
retry:
switch (filter->status & 0xf) {
/* case 0x00: ASCII */
/* case 0x10: X 0201 latin */
/* case 0x20: X 0201 kana */
/* case 0x80: X 0208 */
/* case 0x90: X 0212 */
case 0:
if (c == 0x1b) {
filter->status += 2;
} else if (c == 0x0e) { /* "kana in" */
filter->status = 0x20;
} else if (c == 0x0f) { /* "kana out" */
filter->status = 0;
} else if (filter->status == 0x10 && c == 0x5c) { /* YEN SIGN */
CK((*filter->output_function)(0xa5, filter->data));
} else if (filter->status == 0x10 && c == 0x7e) { /* OVER LINE */
CK((*filter->output_function)(0x203e, filter->data));
} else if (filter->status == 0x20 && c > 0x20 && c < 0x60) { /* kana */
CK((*filter->output_function)(0xff40 + c, filter->data));
} else if ((filter->status == 0x80 || filter->status == 0x90) && c > 0x20 && c < 0x7f) { /* kanji first char */
filter->cache = c;
filter->status += 1;
} else if (c >= 0 && c < 0x80) { /* latin, CTLs */
CK((*filter->output_function)(c, filter->data));
} else if (c > 0xa0 && c < 0xe0) { /* GR kana */
CK((*filter->output_function)(0xfec0 + c, filter->data));
} else {
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
}
break;
/* case 0x81: X 0208 second char */
/* case 0x91: X 0212 second char */
case 1:
filter->status &= ~0xf;
c1 = filter->cache;
if (c > 0x20 && c < 0x7f) {
s = (c1 - 0x21)*94 + c - 0x21;
if (filter->status == 0x80) {
if (s >= 0 && s < jisx0208_ucs_table_size) {
w = jisx0208_ucs_table[s];
} else {
w = 0;
}
if (w <= 0) {
w = MBFL_BAD_INPUT;
}
} else {
if (s >= 0 && s < jisx0212_ucs_table_size) {
w = jisx0212_ucs_table[s];
} else {
w = 0;
}
if (w <= 0) {
w = MBFL_BAD_INPUT;
}
}
CK((*filter->output_function)(w, filter->data));
} else {
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
}
break;
/* ESC */
/* case 0x02: */
/* case 0x12: */
/* case 0x22: */
/* case 0x82: */
/* case 0x92: */
case 2:
if (c == 0x24) { /* '$' */
filter->status++;
} else if (c == 0x28) { /* '(' */
filter->status += 3;
} else {
filter->status &= ~0xf;
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
goto retry;
}
break;
/* ESC $ */
/* case 0x03: */
/* case 0x13: */
/* case 0x23: */
/* case 0x83: */
/* case 0x93: */
case 3:
if (c == 0x40 || c == 0x42) { /* '@' or 'B' */
filter->status = 0x80;
} else if (c == 0x28) { /* '(' */
filter->status++;
} else {
filter->status &= ~0xf;
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
CK((*filter->output_function)(0x24, filter->data));
goto retry;
}
break;
/* ESC $ ( */
/* case 0x04: */
/* case 0x14: */
/* case 0x24: */
/* case 0x84: */
/* case 0x94: */
case 4:
if (c == 0x40 || c == 0x42) { /* '@' or 'B' */
filter->status = 0x80;
} else if (c == 0x44) { /* 'D' */
filter->status = 0x90;
} else {
filter->status &= ~0xf;
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
CK((*filter->output_function)(0x24, filter->data));
CK((*filter->output_function)(0x28, filter->data));
goto retry;
}
break;
/* ESC ( */
/* case 0x05: */
/* case 0x15: */
/* case 0x25: */
/* case 0x85: */
/* case 0x95: */
case 5:
if (c == 0x42 || c == 0x48) { /* 'B' or 'H' */
filter->status = 0;
} else if (c == 0x4a) { /* 'J' */
filter->status = 0x10;
} else if (c == 0x49) { /* 'I' */
filter->status = 0x20;
} else {
filter->status &= ~0xf;
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
CK((*filter->output_function)(0x28, filter->data));
goto retry;
}
break;
EMPTY_SWITCH_DEFAULT_CASE();
}
return 0;
}
static int mbfl_filt_conv_jis_wchar_flush(mbfl_convert_filter *filter)
{
if (filter->status & 0xF) {
/* 2-byte (JIS X 0208 or 0212) character was truncated,
* or else escape sequence was truncated */
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
}
filter->status = 0;
if (filter->flush_function) {
(*filter->flush_function)(filter->data);
}
return 0;
}
/*
* wchar => JIS
*/
int
mbfl_filt_conv_wchar_jis(int c, mbfl_convert_filter *filter)
{
int s = 0;
if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) {
s = ucs_a1_jis_table[c - ucs_a1_jis_table_min];
} else if (c == 0x203E) { /* OVERLINE */
s = 0x1007E; /* Convert to JISX 0201 OVERLINE */
} else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) {
s = ucs_a2_jis_table[c - ucs_a2_jis_table_min];
} else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) {
s = ucs_i_jis_table[c - ucs_i_jis_table_min];
} else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) {
s = ucs_r_jis_table[c - ucs_r_jis_table_min];
}
if (s <= 0) {
if (c == 0xa5) { /* YEN SIGN */
s = 0x1005c;
} else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */
s = 0x2140;
} else if (c == 0x2225) { /* PARALLEL TO */
s = 0x2142;
} else if (c == 0xff0d) { /* FULLWIDTH HYPHEN-MINUS */
s = 0x215d;
} else if (c == 0xffe0) { /* FULLWIDTH CENT SIGN */
s = 0x2171;
} else if (c == 0xffe1) { /* FULLWIDTH POUND SIGN */
s = 0x2172;
} else if (c == 0xffe2) { /* FULLWIDTH NOT SIGN */
s = 0x224c;
}
if (c == 0) {
s = 0;
} else if (s <= 0) {
s = -1;
}
}
if (s >= 0) {
if (s < 0x80) { /* ASCII */
if ((filter->status & 0xff00) != 0) {
CK((*filter->output_function)(0x1b, filter->data)); /* ESC */
CK((*filter->output_function)(0x28, filter->data)); /* '(' */
CK((*filter->output_function)(0x42, filter->data)); /* 'B' */
}
filter->status = 0;
CK((*filter->output_function)(s, filter->data));
} else if (s < 0x8080) { /* X 0208 */
if ((filter->status & 0xff00) != 0x200) {
CK((*filter->output_function)(0x1b, filter->data)); /* ESC */
CK((*filter->output_function)(0x24, filter->data)); /* '$' */
CK((*filter->output_function)(0x42, filter->data)); /* 'B' */
}
filter->status = 0x200;
CK((*filter->output_function)((s >> 8) & 0x7f, filter->data));
CK((*filter->output_function)(s & 0x7f, filter->data));
} else if (s < 0x10000) { /* X 0212 */
if ((filter->status & 0xff00) != 0x300) {
CK((*filter->output_function)(0x1b, filter->data)); /* ESC */
CK((*filter->output_function)(0x24, filter->data)); /* '$' */
CK((*filter->output_function)(0x28, filter->data)); /* '(' */
CK((*filter->output_function)(0x44, filter->data)); /* 'D' */
}
filter->status = 0x300;
CK((*filter->output_function)((s >> 8) & 0x7f, filter->data));
CK((*filter->output_function)(s & 0x7f, filter->data));
} else { /* X 0201 latin */
if ((filter->status & 0xff00) != 0x400) {
CK((*filter->output_function)(0x1b, filter->data)); /* ESC */
CK((*filter->output_function)(0x28, filter->data)); /* '(' */
CK((*filter->output_function)(0x4a, filter->data)); /* 'J' */
}
filter->status = 0x400;
CK((*filter->output_function)(s & 0x7f, filter->data));
}
} else {
CK(mbfl_filt_conv_illegal_output(c, filter));
}
return 0;
}
/*
* wchar => ISO-2022-JP
*/
int
mbfl_filt_conv_wchar_2022jp(int c, mbfl_convert_filter *filter)
{
int s;
s = 0;
if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) {
s = ucs_a1_jis_table[c - ucs_a1_jis_table_min];
} else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) {
s = ucs_a2_jis_table[c - ucs_a2_jis_table_min];
} else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) {
s = ucs_i_jis_table[c - ucs_i_jis_table_min];
} else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) {
s = ucs_r_jis_table[c - ucs_r_jis_table_min];
}
if (s <= 0) {
if (c == 0xa5) { /* YEN SIGN */
s = 0x1005c;
} else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */
s = 0x2140;
} else if (c == 0x2225) { /* PARALLEL TO */
s = 0x2142;
} else if (c == 0xff0d) { /* FULLWIDTH HYPHEN-MINUS */
s = 0x215d;
} else if (c == 0xffe0) { /* FULLWIDTH CENT SIGN */
s = 0x2171;
} else if (c == 0xffe1) { /* FULLWIDTH POUND SIGN */
s = 0x2172;
} else if (c == 0xffe2) { /* FULLWIDTH NOT SIGN */
s = 0x224c;
}
if (c == 0) {
s = 0;
} else if (s <= 0) {
s = -1;
}
} else if ((s >= 0x80 && s < 0x2121) || (s > 0x8080)) {
s = -1;
}
if (s >= 0) {
if (s < 0x80) { /* ASCII */
if ((filter->status & 0xff00) != 0) {
CK((*filter->output_function)(0x1b, filter->data)); /* ESC */
CK((*filter->output_function)(0x28, filter->data)); /* '(' */
CK((*filter->output_function)(0x42, filter->data)); /* 'B' */
}
filter->status = 0;
CK((*filter->output_function)(s, filter->data));
} else if (s < 0x10000) { /* X 0208 */
if ((filter->status & 0xff00) != 0x200) {
CK((*filter->output_function)(0x1b, filter->data)); /* ESC */
CK((*filter->output_function)(0x24, filter->data)); /* '$' */
CK((*filter->output_function)(0x42, filter->data)); /* 'B' */
}
filter->status = 0x200;
CK((*filter->output_function)((s >> 8) & 0x7f, filter->data));
CK((*filter->output_function)(s & 0x7f, filter->data));
} else { /* X 0201 latin */
if ((filter->status & 0xff00) != 0x400) {
CK((*filter->output_function)(0x1b, filter->data)); /* ESC */
CK((*filter->output_function)(0x28, filter->data)); /* '(' */
CK((*filter->output_function)(0x4a, filter->data)); /* 'J' */
}
filter->status = 0x400;
CK((*filter->output_function)(s & 0x7f, filter->data));
}
} else {
CK(mbfl_filt_conv_illegal_output(c, filter));
}
return 0;
}
int
mbfl_filt_conv_any_jis_flush(mbfl_convert_filter *filter)
{
/* back to latin */
if ((filter->status & 0xff00) != 0) {
CK((*filter->output_function)(0x1b, filter->data)); /* ESC */
CK((*filter->output_function)(0x28, filter->data)); /* '(' */
CK((*filter->output_function)(0x42, filter->data)); /* 'B' */
}
filter->status = 0;
if (filter->flush_function != NULL) {
return (*filter->flush_function)(filter->data);
}
return 0;
}
#define ASCII 0
#define JISX_0201_LATIN 1
#define JISX_0201_KANA 2
#define JISX_0208 3
#define JISX_0212 4
static size_t mb_iso2022jp_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
{
ZEND_ASSERT(bufsize >= 3);
unsigned char *p = *in, *e = p + *in_len;
uint32_t *out = buf, *limit = buf + bufsize;
while (p < e && out < limit) {
unsigned char c = *p++;
if (c == 0x1B) {
/* ESC seen; this is an escape sequence */
if ((e - p) < 2) {
*out++ = MBFL_BAD_INPUT;
if (p != e && (*p == '$' || *p == '('))
p++;
continue;
}
unsigned char c2 = *p++;
if (c2 == '$') {
unsigned char c3 = *p++;
if (c3 == '@' || c3 == 'B') {
*state = JISX_0208;
} else if (c3 == '(') {
if (p == e) {
*out++ = MBFL_BAD_INPUT;
break;
}
unsigned char c4 = *p++;
if (c4 == '@' || c4 == 'B') {
*state = JISX_0208;
} else if (c4 == 'D') {
*state = JISX_0212;
} else {
if ((limit - out) < 3) {
p -= 4;
break;
}
*out++ = MBFL_BAD_INPUT;
*out++ = '$';
*out++ = '(';
p--;
}
} else {
if ((limit - out) < 2) {
p -= 3;
break;
}
*out++ = MBFL_BAD_INPUT;
*out++ = '$';
p--;
}
} else if (c2 == '(') {
unsigned char c3 = *p++;
if (c3 == 'B' || c3 == 'H') {
*state = ASCII;
} else if (c3 == 'J') {
*state = JISX_0201_LATIN;
} else if (c3 == 'I') {
*state = JISX_0201_KANA;
} else {
if ((limit - out) < 2) {
p -= 3;
break;
}
*out++ = MBFL_BAD_INPUT;
*out++ = '(';
p--;
}
} else {
*out++ = MBFL_BAD_INPUT;
p--;
}
} else if (c == 0xE) {
/* "Kana In" marker; this is just for JIS-7/8, but we also accept it for ISO-2022-JP */
*state = JISX_0201_KANA;
} else if (c == 0xF) {
/* "Kana Out" marker */
*state = ASCII;
} else if (*state == JISX_0201_LATIN && c == 0x5C) { /* YEN SIGN */
*out++ = 0xA5;
} else if (*state == JISX_0201_LATIN && c == 0x7E) { /* OVER LINE */
*out++ = 0x203E;
} else if (*state == JISX_0201_KANA && c > 0x20 && c < 0x60) {
*out++ = 0xFF40 + c;
} else if (*state >= JISX_0208 && c > 0x20 && c < 0x7F) {
if (p == e) {
*out++ = MBFL_BAD_INPUT;
break;
}
unsigned char c2 = *p++;
if (c2 > 0x20 && c2 < 0x7F) {
unsigned int s = (c - 0x21)*94 + c2 - 0x21;
uint32_t w = 0;
if (*state == JISX_0208) {
if (s < jisx0208_ucs_table_size) {
w = jisx0208_ucs_table[s];
}
if (!w) {
w = MBFL_BAD_INPUT;
}
} else {
if (s < jisx0212_ucs_table_size) {
w = jisx0212_ucs_table[s];
}
if (!w) {
w = MBFL_BAD_INPUT;
}
}
*out++ = w;
} else {
*out++ = MBFL_BAD_INPUT;
}
} else if (c < 0x80) {
*out++ = c;
} else if (c >= 0xA1 && c <= 0xDF) {
/* GR-invoked Kana; "GR" stands for "graphics right" and refers to bytes
* with the MSB bit (in the context of ISO-2022 encoding).
*
* In this regard, Wikipedia states:
* "Other, older variants known as JIS7 and JIS8 build directly on the 7-bit and 8-bit
* encodings defined by JIS X 0201 and allow use of JIS X 0201 kana from G1 without
* escape sequences, using Shift Out and Shift In or setting the eighth bit
* (GR-invoked), respectively."
*
* Note that we support both the 'JIS7' use of 0xE/0xF Shift In/Shift Out codes
* and the 'JIS8' use of GR-invoked Kana */
*out++ = 0xFEC0 + c;
} else {
*out++ = MBFL_BAD_INPUT;
}
}
*in_len = e - p;
*in = p;
return out - buf;
}
static void mb_wchar_to_iso2022jp(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
{
unsigned char *out, *limit;
MB_CONVERT_BUF_LOAD(buf, out, limit);
MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2);
while (len--) {
uint32_t w = *in++;
unsigned int s = 0;
if (w >= ucs_a1_jis_table_min && w < ucs_a1_jis_table_max) {
s = ucs_a1_jis_table[w - ucs_a1_jis_table_min];
} else if (w >= ucs_a2_jis_table_min && w < ucs_a2_jis_table_max) {
s = ucs_a2_jis_table[w - ucs_a2_jis_table_min];
} else if (w >= ucs_i_jis_table_min && w < ucs_i_jis_table_max) {
s = ucs_i_jis_table[w - ucs_i_jis_table_min];
} else if (w >= ucs_r_jis_table_min && w < ucs_r_jis_table_max) {
s = ucs_r_jis_table[w - ucs_r_jis_table_min];
}
if (s == 0) {
if (w == 0xA5) { /* YEN SIGN */
s = 0x1005C;
} else if (w == 0xFF3C) { /* FULLWIDTH REVERSE SOLIDUS */
s = 0x2140;
} else if (w == 0x2225) { /* PARALLEL TO */
s = 0x2142;
} else if (w == 0xFF0D) { /* FULLWIDTH HYPHEN-MINUS */
s = 0x215D;
} else if (w == 0xFFE0) { /* FULLWIDTH CENT SIGN */
s = 0x2171;
} else if (w == 0xFFE1) { /* FULLWIDTH POUND SIGN */
s = 0x2172;
} else if (w == 0xFFE2) { /* FULLWIDTH NOT SIGN */
s = 0x224C;
} else if (w != 0) {
MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_iso2022jp);
MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2);
continue;
}
} else if ((s >= 0x80 && s < 0x2121) || (s > 0x8080)) {
MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_iso2022jp);
MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2);
continue;
}
if (s < 0x80) { /* ASCII */
if (buf->state != ASCII) {
MB_CONVERT_BUF_ENSURE(buf, out, limit, (len * 2) + 4);
out = mb_convert_buf_add3(out, 0x1B, '(', 'B');
buf->state = ASCII;
}
out = mb_convert_buf_add(out, s);
} else if (s < 0x8080) { /* JIS X 0208 */
if (buf->state != JISX_0208) {
MB_CONVERT_BUF_ENSURE(buf, out, limit, (len * 2) + 5);
out = mb_convert_buf_add3(out, 0x1B, '$', 'B');
buf->state = JISX_0208;
}
out = mb_convert_buf_add2(out, (s >> 8) & 0x7F, s & 0x7F);
} else if (s < 0x10000) { /* JIS X 0212 */
if (buf->state != JISX_0212) {
MB_CONVERT_BUF_ENSURE(buf, out, limit, (len * 2) + 6);
out = mb_convert_buf_add4(out, 0x1B, '$', '(', 'D');
buf->state = JISX_0212;
}
out = mb_convert_buf_add2(out, (s >> 8) & 0x7F, s & 0x7F);
} else { /* X 0201 Latin */
if (buf->state != JISX_0201_LATIN) {
MB_CONVERT_BUF_ENSURE(buf, out, limit, (len * 2) + 4);
out = mb_convert_buf_add3(out, 0x1B, '(', 'J');
buf->state = JISX_0201_LATIN;
}
out = mb_convert_buf_add(out, s & 0x7F);
}
}
if (end && buf->state != ASCII) {
MB_CONVERT_BUF_ENSURE(buf, out, limit, 3);
out = mb_convert_buf_add3(out, 0x1B, '(', 'B');
}
MB_CONVERT_BUF_STORE(buf, out, limit);
}
static void mb_wchar_to_jis(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
{
unsigned char *out, *limit;
MB_CONVERT_BUF_LOAD(buf, out, limit);
MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2);
while (len--) {
uint32_t w = *in++;
unsigned int s = 0;
if (w >= ucs_a1_jis_table_min && w < ucs_a1_jis_table_max) {
s = ucs_a1_jis_table[w - ucs_a1_jis_table_min];
} else if (w == 0x203E) { /* OVERLINE */
s = 0x1007E; /* Convert to JISX 0201 OVERLINE */
} else if (w >= ucs_a2_jis_table_min && w < ucs_a2_jis_table_max) {
s = ucs_a2_jis_table[w - ucs_a2_jis_table_min];
} else if (w >= ucs_i_jis_table_min && w < ucs_i_jis_table_max) {
s = ucs_i_jis_table[w - ucs_i_jis_table_min];
} else if (w >= ucs_r_jis_table_min && w < ucs_r_jis_table_max) {
s = ucs_r_jis_table[w - ucs_r_jis_table_min];
}
if (s == 0) {
if (w == 0xA5) { /* YEN SIGN */
s = 0x1005C;
} else if (w == 0xFF3C) { /* FULLWIDTH REVERSE SOLIDUS */
s = 0x2140;
} else if (w == 0x2225) { /* PARALLEL TO */
s = 0x2142;
} else if (w == 0xFF0D) { /* FULLWIDTH HYPHEN-MINUS */
s = 0x215D;
} else if (w == 0xFFE0) { /* FULLWIDTH CENT SIGN */
s = 0x2171;
} else if (w == 0xFFE1) { /* FULLWIDTH POUND SIGN */
s = 0x2172;
} else if (w == 0xFFE2) { /* FULLWIDTH NOT SIGN */
s = 0x224C;
} else if (w != 0) {
MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_iso2022jp);
MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2);
continue;
}
}
if (s < 0x80) { /* ASCII */
if (buf->state != ASCII) {
MB_CONVERT_BUF_ENSURE(buf, out, limit, (len * 2) + 4);
out = mb_convert_buf_add3(out, 0x1B, '(', 'B');
buf->state = ASCII;
}
out = mb_convert_buf_add(out, s);
} else if (s >= 0xA1 && s <= 0xDF) {
if (buf->state != JISX_0201_KANA) {
MB_CONVERT_BUF_ENSURE(buf, out, limit, (len * 2) + 4);
out = mb_convert_buf_add3(out, 0x1B, '(', 'I');
buf->state = JISX_0201_KANA;
}
out = mb_convert_buf_add(out, s & 0x7F);
} else if (s < 0x8080) { /* JIS X 0208 */
if (buf->state != JISX_0208) {
MB_CONVERT_BUF_ENSURE(buf, out, limit, (len * 2) + 5);
out = mb_convert_buf_add3(out, 0x1B, '$', 'B');
buf->state = JISX_0208;
}
out = mb_convert_buf_add2(out, (s >> 8) & 0x7F, s & 0x7F);
} else if (s < 0x10000) { /* JIS X 0212 */
if (buf->state != JISX_0212) {
MB_CONVERT_BUF_ENSURE(buf, out, limit, (len * 2) + 6);
out = mb_convert_buf_add4(out, 0x1B, '$', '(', 'D');
buf->state = JISX_0212;
}
out = mb_convert_buf_add2(out, (s >> 8) & 0x7F, s & 0x7F);
} else { /* X 0201 Latin */
if (buf->state != JISX_0201_LATIN) {
MB_CONVERT_BUF_ENSURE(buf, out, limit, (len * 2) + 4);
out = mb_convert_buf_add3(out, 0x1B, '(', 'J');
buf->state = JISX_0201_LATIN;
}
out = mb_convert_buf_add(out, s & 0x7F);
}
}
if (end && buf->state != ASCII) {
MB_CONVERT_BUF_ENSURE(buf, out, limit, 3);
out = mb_convert_buf_add3(out, 0x1B, '(', 'B');
}
MB_CONVERT_BUF_STORE(buf, out, limit);
}
#define JISX_0201_KANA_SO 5
static bool mb_check_jis(unsigned char *in, size_t in_len)
{
unsigned char *p = in, *e = p + in_len;
unsigned int state = ASCII;
while (p < e) {
unsigned char c = *p++;
if (c == 0x1B) {
/* ESC seen; this is an escape sequence */
if (state == JISX_0201_KANA_SO) {
return false;
}
if ((e - p) < 2) {
return false;
}
unsigned char c2 = *p++;
if (c2 == '$') {
unsigned char c3 = *p++;
if (c3 == '@' || c3 == 'B') {
state = JISX_0208;
} else if (c3 == '(') {
if (p == e) {
return false;
}
unsigned char c4 = *p++;
if (c4 == '@' || c4 == 'B') {
state = JISX_0208;
} else if (c4 == 'D') {
state = JISX_0212;
} else {
return false;
}
} else {
return false;
}
} else if (c2 == '(') {
unsigned char c3 = *p++;
/* ESC ( H is treated as a sequence transitioning to ASCII for historical reasons.
* see https://github.com/php/php-src/pull/10828#issuecomment-1478342432. */
if (c3 == 'B' || c3 == 'H') {
state = ASCII;
} else if (c3 == 'J') {
state = JISX_0201_LATIN;
} else if (c3 == 'I') {
state = JISX_0201_KANA;
} else {
return false;
}
} else {
return false;
}
} else if (c == 0xE) {
/* "Kana In" marker */
if (state != ASCII) {
return false;
}
state = JISX_0201_KANA_SO;
} else if (c == 0xF) {
/* "Kana Out" marker */
if (state != JISX_0201_KANA_SO) {
return false;
}
state = ASCII;
} else if ((state == JISX_0208 || state == JISX_0212) && (c > 0x20 && c < 0x7F)) {
if (p == e) {
return false;
}
unsigned char c2 = *p++;
if (c2 > 0x20 && c2 < 0x7F) {
unsigned int s = (c - 0x21)*94 + c2 - 0x21;
if (state == JISX_0208) {
if (s < jisx0208_ucs_table_size && jisx0208_ucs_table[s]) {
continue;
}
} else {
if (s < jisx0212_ucs_table_size && jisx0212_ucs_table[s]) {
continue;
}
}
return false;
} else {
return false;
}
} else if (c < 0x80) {
continue;
} else if (c >= 0xA1 && c <= 0xDF) {
/* GR-invoked Kana */
continue;
} else {
return false;
}
}
return state == ASCII;
}
static bool mb_check_iso2022jp(unsigned char *in, size_t in_len)
{
unsigned char *p = in, *e = p + in_len;
unsigned int state = ASCII;
while (p < e) {
unsigned char c = *p++;
if (c == 0x1B) {
/* ESC seen; this is an escape sequence */
if ((e - p) < 2) {
return false;
}
unsigned char c2 = *p++;
if (c2 == '$') {
unsigned char c3 = *p++;
if (c3 == '@' || c3 == 'B') {
state = JISX_0208;
} else {
return false;
}
} else if (c2 == '(') {
unsigned char c3 = *p++;
if (c3 == 'B') {
state = ASCII;
} else if (c3 == 'J') {
state = JISX_0201_LATIN;
} else {
return false;
}
} else {
return false;
}
} else if (c == 0xE || c == 0xF) {
/* "Kana In" or "Kana Out" marker; ISO-2022-JP is not accepted. */
return false;
} else if (state == JISX_0208 && (c > 0x20 && c < 0x7F)) {
if (p == e) {
return false;
}
unsigned char c2 = *p++;
if (c2 > 0x20 && c2 < 0x7F) {
unsigned int s = (c - 0x21)*94 + c2 - 0x21;
if (s < jisx0208_ucs_table_size && jisx0208_ucs_table[s]) {
continue;
}
return false;
} else {
return false;
}
} else if (c < 0x80) {
continue;
} else {
return false;
}
}
return state == ASCII;
}

View file

@ -1,47 +0,0 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* The source code included in this files was separated from mbfilter_ja.c
* by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
*
*/
#ifndef MBFL_MBFILTER_JIS_H
#define MBFL_MBFILTER_JIS_H
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_jis;
extern const mbfl_encoding mbfl_encoding_2022jp;
extern const struct mbfl_convert_vtbl vtbl_jis_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_jis;
extern const struct mbfl_convert_vtbl vtbl_2022jp_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_2022jp;
int mbfl_filt_conv_jis_wchar(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_wchar_jis(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_wchar_2022jp(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_any_jis_flush(mbfl_convert_filter *filter);
#endif /* MBFL_MBFILTER_JIS_H */

File diff suppressed because it is too large Load diff

View file

@ -1,46 +0,0 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* The source code included in this files was separated from mbfilter_ja.c
* by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
*
*/
#ifndef MBFL_MBFILTER_SJIS_H
#define MBFL_MBFILTER_SJIS_H
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_sjis;
extern const struct mbfl_convert_vtbl vtbl_sjis_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_sjis;
int mbfl_filt_conv_sjis_wchar(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_wchar_sjis(int c, mbfl_convert_filter *filter);
#endif /* MBFL_MBFILTER_SJIS_H */
/*
* charset=UTF-8
*/

File diff suppressed because it is too large Load diff

View file

@ -1,49 +0,0 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* The source code included in this files was separated from mbfilter_sjis.c
* by rui hirokawa <hirokawa@php.net> on 15 aug 2011.
*
*/
#ifndef MBFL_MBFILTER_SJIS_2004_H
#define MBFL_MBFILTER_SJIS_2004_H
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_sjis2004;
extern const struct mbfl_convert_vtbl vtbl_sjis2004_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_sjis2004;
int mbfl_filt_conv_jis2004_wchar(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_wchar_jis2004(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_wchar_jis2004_flush(mbfl_convert_filter *filter);
int mbfl_filt_conv_jis2004_wchar_flush(mbfl_convert_filter *filter);
#endif /* MBFL_MBFILTER_SJIS_2004_H */
/*
* charset=UTF-8
*/

View file

@ -1,39 +0,0 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* the source code included in this files was separated from mbfilter_sjis_open.c
* by Rui Hirokawa <hirokawa@php.net> on 25 July 2011.
*
*/
#ifndef MBFL_MBFILTER_SJIS_MAC_H
#define MBFL_MBFILTER_SJIS_MAC_H
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_sjis_mac;
extern const struct mbfl_convert_vtbl vtbl_sjis_mac_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_sjis_mac;
#endif /* MBFL_MBFILTER_SJIS_MAC_H */

View file

@ -1,64 +0,0 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* the source code included in this files was separated from mbfilter_sjis_open.c
* by Rui Hirokawa <hirokawa@php.net> on 25 July 2011.
*
*/
#ifndef MBFL_MBFILTER_SJIS_MOBILE_H
#define MBFL_MBFILTER_SJIS_MOBILE_H
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_sjis_docomo;
extern const mbfl_encoding mbfl_encoding_sjis_kddi;
extern const mbfl_encoding mbfl_encoding_sjis_sb;
extern const struct mbfl_convert_vtbl vtbl_sjis_docomo_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_sjis_docomo;
extern const struct mbfl_convert_vtbl vtbl_sjis_kddi_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_sjis_kddi;
extern const struct mbfl_convert_vtbl vtbl_sjis_sb_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_sjis_sb;
extern const unsigned short mbfl_docomo2uni_pua[4][3];
extern const unsigned short mbfl_kddi2uni_pua[7][3];
extern const unsigned short mbfl_sb2uni_pua[6][3];
extern const unsigned short mbfl_kddi2uni_pua_b[8][3];
int mbfl_filt_conv_sjis_mobile_flush(mbfl_convert_filter *filter);
int mbfilter_sjis_emoji_docomo2unicode(int s, int *snd);
int mbfilter_sjis_emoji_kddi2unicode(int s, int *snd);
int mbfilter_sjis_emoji_sb2unicode(int s, int *snd);
int mbfilter_unicode2sjis_emoji_docomo(int c, int *s1, mbfl_convert_filter *filter);
int mbfilter_unicode2sjis_emoji_kddi(int c, int *s1, mbfl_convert_filter *filter);
int mbfilter_unicode2sjis_emoji_sb(int c, int *s1, mbfl_convert_filter *filter);
int mbfilter_conv_map_tbl(int c, int *w, const unsigned short map[][3], int n);
int mbfilter_conv_r_map_tbl(int c, int *w, const unsigned short map[][3], int n);
#endif /* MBFL_MBFILTER_SJIS_MOBILE_H */

View file

@ -1,297 +0,0 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* The source code included in this files was separated from mbfilter_kr.c
* by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
*
*/
/* UHC was introduced by MicroSoft in Windows 95, and is also known as CP949.
* It is the same as EUC-KR, but with 8,822 additional characters added to
* complete all the characters in the Johab charset. */
#include "mbfilter.h"
#include "mbfilter_uhc.h"
#define UNICODE_TABLE_UHC_DEF
#include "unicode_table_uhc.h"
static int mbfl_filt_conv_uhc_wchar_flush(mbfl_convert_filter *filter);
static size_t mb_uhc_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
static void mb_wchar_to_uhc(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
static const unsigned char mblen_table_uhc[] = { /* 0x81-0xFE */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
};
static const char *mbfl_encoding_uhc_aliases[] = {"CP949", NULL};
const mbfl_encoding mbfl_encoding_uhc = {
mbfl_no_encoding_uhc,
"UHC",
"UHC",
mbfl_encoding_uhc_aliases,
mblen_table_uhc,
0,
&vtbl_uhc_wchar,
&vtbl_wchar_uhc,
mb_uhc_to_wchar,
mb_wchar_to_uhc,
NULL
};
const struct mbfl_convert_vtbl vtbl_uhc_wchar = {
mbfl_no_encoding_uhc,
mbfl_no_encoding_wchar,
mbfl_filt_conv_common_ctor,
NULL,
mbfl_filt_conv_uhc_wchar,
mbfl_filt_conv_uhc_wchar_flush,
NULL,
};
const struct mbfl_convert_vtbl vtbl_wchar_uhc = {
mbfl_no_encoding_wchar,
mbfl_no_encoding_uhc,
mbfl_filt_conv_common_ctor,
NULL,
mbfl_filt_conv_wchar_uhc,
mbfl_filt_conv_common_flush,
NULL,
};
#define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
int mbfl_filt_conv_uhc_wchar(int c, mbfl_convert_filter *filter)
{
switch (filter->status) {
case 0:
if (c >= 0 && c < 0x80) { /* latin */
CK((*filter->output_function)(c, filter->data));
} else if (c > 0x80 && c < 0xfe && c != 0xc9) { /* dbcs lead byte */
filter->status = 1;
filter->cache = c;
} else {
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
}
break;
case 1: /* dbcs second byte */
filter->status = 0;
int c1 = filter->cache, w = 0;
if (c1 >= 0x81 && c1 <= 0xc6 && c >= 0x41 && c <= 0xfe) {
w = (c1 - 0x81)*190 + (c - 0x41);
if (w >= 0 && w < uhc1_ucs_table_size) {
w = uhc1_ucs_table[w];
}
} else if (c1 >= 0xc7 && c1 < 0xfe && c >= 0xa1 && c <= 0xfe) {
w = (c1 - 0xc7)*94 + (c - 0xa1);
if (w >= 0 && w < uhc3_ucs_table_size) {
w = uhc3_ucs_table[w];
}
}
if (w == 0) {
w = MBFL_BAD_INPUT;
}
CK((*filter->output_function)(w, filter->data));
break;
EMPTY_SWITCH_DEFAULT_CASE();
}
return 0;
}
static int mbfl_filt_conv_uhc_wchar_flush(mbfl_convert_filter *filter)
{
if (filter->status == 1) {
/* 2-byte character was truncated */
filter->status = 0;
CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
}
if (filter->flush_function) {
(*filter->flush_function)(filter->data);
}
return 0;
}
int mbfl_filt_conv_wchar_uhc(int c, mbfl_convert_filter *filter)
{
int s = 0;
if (c >= ucs_a1_uhc_table_min && c < ucs_a1_uhc_table_max) {
s = ucs_a1_uhc_table[c - ucs_a1_uhc_table_min];
} else if (c >= ucs_a2_uhc_table_min && c < ucs_a2_uhc_table_max) {
s = ucs_a2_uhc_table[c - ucs_a2_uhc_table_min];
} else if (c >= ucs_a3_uhc_table_min && c < ucs_a3_uhc_table_max) {
s = ucs_a3_uhc_table[c - ucs_a3_uhc_table_min];
} else if (c >= ucs_i_uhc_table_min && c < ucs_i_uhc_table_max) {
s = ucs_i_uhc_table[c - ucs_i_uhc_table_min];
} else if (c >= ucs_s_uhc_table_min && c < ucs_s_uhc_table_max) {
s = ucs_s_uhc_table[c - ucs_s_uhc_table_min];
} else if (c >= ucs_r1_uhc_table_min && c < ucs_r1_uhc_table_max) {
s = ucs_r1_uhc_table[c - ucs_r1_uhc_table_min];
} else if (c >= ucs_r2_uhc_table_min && c < ucs_r2_uhc_table_max) {
s = ucs_r2_uhc_table[c - ucs_r2_uhc_table_min];
}
if (s == 0 && c != 0) {
s = -1;
}
if (s >= 0) {
if (s < 0x80) { /* latin */
CK((*filter->output_function)(s, filter->data));
} else {
CK((*filter->output_function)((s >> 8) & 0xff, filter->data));
CK((*filter->output_function)(s & 0xff, filter->data));
}
} else {
CK(mbfl_filt_conv_illegal_output(c, filter));
}
return 0;
}
static size_t mb_uhc_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
{
unsigned char *p = *in, *e = p + *in_len;
uint32_t *out = buf, *limit = buf + bufsize;
e--; /* Stop the main loop 1 byte short of the end of the input */
while (p < e && out < limit) {
unsigned char c = *p++;
if (c < 0x80) {
*out++ = c;
} else if (c > 0x80 && c < 0xFE) {
/* We don't need to check p < e here; it's not possible that this pointer dereference
* will be outside the input string, because of e-- above */
unsigned char c2 = *p++;
if (c2 < 0x41 || c2 == 0xFF) {
*out++ = MBFL_BAD_INPUT;
continue;
}
unsigned int w = 0;
if (c <= 0xC6) {
w = (c - 0x81)*190 + c2 - 0x41;
ZEND_ASSERT(w < uhc1_ucs_table_size);
w = uhc1_ucs_table[w];
} else if (c2 >= 0xA1) {
w = (c - 0xC7)*94 + c2 - 0xA1;
ZEND_ASSERT(w < uhc3_ucs_table_size);
w = uhc3_ucs_table[w];
if (!w) {
/* If c == 0xC9, we shouldn't have tried to read a 2-byte char at all... but it is faster
* to fix up that rare case here rather than include an extra check in the hot path */
if (c == 0xC9) {
p--;
}
*out++ = MBFL_BAD_INPUT;
continue;
}
}
if (!w) {
w = MBFL_BAD_INPUT;
}
*out++ = w;
} else {
*out++ = MBFL_BAD_INPUT;
}
}
/* Finish up last byte of input string if there is one */
if (p == e && out < limit) {
unsigned char c = *p++;
*out++ = (c < 0x80) ? c : MBFL_BAD_INPUT;
}
*in_len = e - p + 1;
*in = p;
return out - buf;
}
static void mb_wchar_to_uhc(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
{
unsigned char *out, *limit;
MB_CONVERT_BUF_LOAD(buf, out, limit);
MB_CONVERT_BUF_ENSURE(buf, out, limit, len);
while (len--) {
uint32_t w = *in++;
unsigned int s = 0;
if (w >= ucs_a1_uhc_table_min && w < ucs_a1_uhc_table_max) {
s = ucs_a1_uhc_table[w - ucs_a1_uhc_table_min];
} else if (w >= ucs_a2_uhc_table_min && w < ucs_a2_uhc_table_max) {
s = ucs_a2_uhc_table[w - ucs_a2_uhc_table_min];
} else if (w >= ucs_a3_uhc_table_min && w < ucs_a3_uhc_table_max) {
s = ucs_a3_uhc_table[w - ucs_a3_uhc_table_min];
} else if (w >= ucs_i_uhc_table_min && w < ucs_i_uhc_table_max) {
s = ucs_i_uhc_table[w - ucs_i_uhc_table_min];
} else if (w >= ucs_s_uhc_table_min && w < ucs_s_uhc_table_max) {
s = ucs_s_uhc_table[w - ucs_s_uhc_table_min];
} else if (w >= ucs_r1_uhc_table_min && w < ucs_r1_uhc_table_max) {
s = ucs_r1_uhc_table[w - ucs_r1_uhc_table_min];
} else if (w >= ucs_r2_uhc_table_min && w < ucs_r2_uhc_table_max) {
s = ucs_r2_uhc_table[w - ucs_r2_uhc_table_min];
}
if (!s) {
if (w == 0) {
out = mb_convert_buf_add(out, 0);
} else {
MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_uhc);
MB_CONVERT_BUF_ENSURE(buf, out, limit, len);
}
} else if (s < 0x80) {
out = mb_convert_buf_add(out, s);
} else {
MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2);
out = mb_convert_buf_add2(out, (s >> 8) & 0xFF, s & 0xFF);
}
}
MB_CONVERT_BUF_STORE(buf, out, limit);
}

View file

@ -1,42 +0,0 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* The source code included in this files was separated from mbfilter_kr.h
* by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
*
*/
#ifndef MBFL_MBFILTER_UHC_H
#define MBFL_MBFILTER_UHC_H
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_uhc;
extern const struct mbfl_convert_vtbl vtbl_uhc_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_uhc;
int mbfl_filt_conv_uhc_wchar(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_wchar_uhc(int c, mbfl_convert_filter *filter);
#endif /* MBFL_MBFILTER_UHC_H */

View file

@ -30,7 +30,7 @@
#include "mbfilter.h"
#include "mbfilter_utf8_mobile.h"
#include "mbfilter_sjis_mobile.h"
#include "mbfilter_cjk.h"
#include "emoji2uni.h"
@ -47,6 +47,66 @@ static void mb_wchar_to_utf8_kddi_b(uint32_t *in, size_t len, mb_convert_buf *bu
static size_t mb_utf8_sb_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
static void mb_wchar_to_utf8_sb(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
static int mbfilter_conv_map_tbl(int c, int *w, const unsigned short map[][3], int n)
{
for (int i = 0; i < n; i++) {
if (map[i][0] <= c && c <= map[i][1]) {
*w = c - map[i][0] + map[i][2];
return 1;
}
}
return 0;
}
static int mbfilter_conv_r_map_tbl(int c, int *w, const unsigned short map[][3], int n)
{
/* Convert in reverse direction */
for (int i = 0; i < n; i++) {
if (map[i][2] <= c && c <= map[i][2] - map[i][0] + map[i][1]) {
*w = c + map[i][0] - map[i][2];
return 1;
}
}
return 0;
}
static const unsigned short mbfl_docomo2uni_pua[4][3] = {
{0x28c2, 0x292f, 0xe63e},
{0x2930, 0x2934, 0xe6ac},
{0x2935, 0x2951, 0xe6b1},
{0x2952, 0x29db, 0xe6ce},
};
static const unsigned short mbfl_kddi2uni_pua[7][3] = {
{0x26ec, 0x2838, 0xe468},
{0x284c, 0x2863, 0xe5b5},
{0x24b8, 0x24ca, 0xe5cd},
{0x24cb, 0x2545, 0xea80},
{0x2839, 0x284b, 0xeafb},
{0x2546, 0x25c0, 0xeb0e},
{0x25c1, 0x25c6, 0xeb89},
};
static const unsigned short mbfl_kddi2uni_pua_b[8][3] = {
{0x24b8, 0x24f6, 0xec40},
{0x24f7, 0x2573, 0xec80},
{0x2574, 0x25b2, 0xed40},
{0x25b3, 0x25c6, 0xed80},
{0x26ec, 0x272a, 0xef40},
{0x272b, 0x27a7, 0xef80},
{0x27a8, 0x27e6, 0xf040},
{0x27e7, 0x2863, 0xf080},
};
static const unsigned short mbfl_sb2uni_pua[6][3] = {
{0x27a9, 0x2802, 0xe101},
{0x2808, 0x2861, 0xe201},
{0x2921, 0x297a, 0xe001},
{0x2980, 0x29cc, 0xe301},
{0x2a99, 0x2ae4, 0xe401},
{0x2af8, 0x2b35, 0xe501},
};
extern const unsigned char mblen_table_utf8[];
static const char *mbfl_encoding_utf8_docomo_aliases[] = {"UTF-8-DOCOMO", "UTF8-DOCOMO", NULL};
@ -298,8 +358,8 @@ int mbfl_filt_conv_wchar_utf8_mobile(int c, mbfl_convert_filter *filter)
int s1, c1;
if ((filter->to->no_encoding == mbfl_no_encoding_utf8_docomo && mbfilter_unicode2sjis_emoji_docomo(c, &s1, filter) > 0 && mbfilter_conv_map_tbl(s1, &c1, mbfl_docomo2uni_pua, 4) > 0) ||
(filter->to->no_encoding == mbfl_no_encoding_utf8_kddi_a && mbfilter_unicode2sjis_emoji_kddi(c, &s1, filter) > 0 && mbfilter_conv_map_tbl(s1, &c1, mbfl_kddi2uni_pua, 7) > 0) ||
(filter->to->no_encoding == mbfl_no_encoding_utf8_kddi_b && mbfilter_unicode2sjis_emoji_kddi(c, &s1, filter) > 0 && mbfilter_conv_map_tbl(s1, &c1, mbfl_kddi2uni_pua_b, 8) > 0) ||
(filter->to->no_encoding == mbfl_no_encoding_utf8_kddi_a && mbfilter_unicode2sjis_emoji_kddi_sjis(c, &s1, filter) > 0 && mbfilter_conv_map_tbl(s1, &c1, mbfl_kddi2uni_pua, 7) > 0) ||
(filter->to->no_encoding == mbfl_no_encoding_utf8_kddi_b && mbfilter_unicode2sjis_emoji_kddi_sjis(c, &s1, filter) > 0 && mbfilter_conv_map_tbl(s1, &c1, mbfl_kddi2uni_pua_b, 8) > 0) ||
(filter->to->no_encoding == mbfl_no_encoding_utf8_sb && mbfilter_unicode2sjis_emoji_sb(c, &s1, filter) > 0 && mbfilter_conv_map_tbl(s1, &c1, mbfl_sb2uni_pua, 6) > 0)) {
c = c1;
}

View file

@ -25,8 +25,6 @@
#ifndef UNICODE_TABLE_CP932_EXT_H
#define UNICODE_TABLE_CP932_EXT_H
#ifdef UNICODE_TABLE_CP932_DEF
const unsigned short cp932ext1_ucs_table[] = {
/* ku 13 */
0x2460,0x2461,0x2462,0x2463,0x2464,0x2465,0x2466,0x2467,
@ -169,19 +167,4 @@ const unsigned short cp932ext3_ucs_table[] = {
const int cp932ext3_ucs_table_min = (115 - 1)*94;
const int cp932ext3_ucs_table_max = (115 - 1)*94 + (sizeof (cp932ext3_ucs_table) / sizeof (unsigned short));
#else
extern const unsigned short cp932ext1_ucs_table[];
extern const unsigned short cp932ext2_ucs_table[];
extern const unsigned short cp932ext3_ucs_table[];
extern const int cp932ext1_ucs_table_min;
extern const int cp932ext1_ucs_table_max;
extern const int cp932ext2_ucs_table_min;
extern const int cp932ext2_ucs_table_max;
extern const int cp932ext3_ucs_table_min;
extern const int cp932ext3_ucs_table_max;
#endif
#endif /* UNICODE_TABLE_CP932_EXT_H */

View file

@ -19,17 +19,11 @@
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file: Rui Hirokawa <hirokawa@php.net>
*
*/
#ifndef UNICODE_TABLE_CP936_H
#define UNICODE_TABLE_CP936_H
/*
* Unicode table
*/
#ifdef UNICODE_TABLE_CP936_DEF
/* CP936 -> Unicode, but without PUA codepoints used in CP936 and GB18030 */
const unsigned short cp936_ucs_table[] = {
/* 0x8140 */
@ -6634,41 +6628,4 @@ static const unsigned short mbfl_cp936_pua_tbl[][3] = {
static const int mbfl_cp936_pua_tbl_max = sizeof(mbfl_cp936_pua_tbl)/(sizeof(unsigned short)*3);
#else
extern const unsigned short cp936_ucs_table[];
extern const unsigned short cp936_pua_tbl1[];
extern const unsigned short cp936_pua_tbl2[];
extern const unsigned short cp936_pua_tbl3[];
extern const unsigned short ucs_a1_cp936_table[];
extern const unsigned short ucs_a2_cp936_table[];
extern const unsigned short ucs_a3_cp936_table[];
extern const unsigned short ucs_i_cp936_table[];
extern const unsigned short ucs_cf_cp936_table[];
extern const unsigned short ucs_sfv_cp936_table[];
extern const unsigned short ucs_ci_s_cp936_table[];
extern const unsigned short ucs_hff_s_cp936_table[];
extern const int cp936_ucs_table_size;
extern const int ucs_a1_cp936_table_min;
extern const int ucs_a1_cp936_table_max;
extern const int ucs_a2_cp936_table_min;
extern const int ucs_a2_cp936_table_max;
extern const int ucs_a3_cp936_table_min;
extern const int ucs_a3_cp936_table_max;
extern const int ucs_i_cp936_table_min;
extern const int ucs_i_cp936_table_max;
extern const int ucs_ci_cp936_table_min;
extern const int ucs_ci_cp936_table_max;
extern const int ucs_cf_cp936_table_min;
extern const int ucs_cf_cp936_table_max;
extern const int ucs_sfv_cp936_table_min;
extern const int ucs_sfv_cp936_table_max;
extern const int ucs_hff_cp936_table_min;
extern const int ucs_hff_cp936_table_max;
#endif
#endif /* UNICODE_TABLE_CP936_H */

View file

@ -21,17 +21,11 @@
* The authors of this file: PHP3 internationalization team
* You can contact the primary authors;   <sgk@happysize.co.jp>,
* Tsukada Takuya <tsukada@fminn.nagano.nagano.jp>.
*
*/
#ifndef UNICODE_TABLE_JIS_H
#define UNICODE_TABLE_JIS_H
#ifdef UNICODE_TABLE_JIS_DEF
/*
* Unicode table
*/
const unsigned short jisx0208_ucs_table[] = {
/* ku 1 */
0x3000,0x3001,0x3002,0xFF0C,0xFF0E,0x30FB,0xFF1A,0xFF1B,
@ -5846,27 +5840,4 @@ const unsigned short ucs_r_jis_table[] = {
int ucs_r_jis_table_min = 0xFF00;
int ucs_r_jis_table_max = 0xFF00 + (sizeof (ucs_r_jis_table) / sizeof (unsigned short));
#else
extern const unsigned short jisx0208_ucs_table[];
extern const unsigned short jisx0212_ucs_table[];
extern const unsigned short ucs_a1_jis_table[];
extern const unsigned short ucs_a2_jis_table[];
extern const unsigned short ucs_i_jis_table[];
extern const unsigned short ucs_r_jis_table[];
extern const int jisx0208_ucs_table_size;
extern const int jisx0212_ucs_table_size;
extern const int ucs_a1_jis_table_min;
extern const int ucs_a1_jis_table_max;
extern const int ucs_a2_jis_table_min;
extern const int ucs_a2_jis_table_max;
extern const int ucs_i_jis_table_min;
extern const int ucs_i_jis_table_max;
extern int ucs_r_jis_table_min;
extern int ucs_r_jis_table_max;
#endif
#endif /* UNICODE_TABLE_JIS_H */

View file

@ -25,11 +25,6 @@
#ifndef UNICODE_TABLE_UHC_H
#define UNICODE_TABLE_UHC_H
/*
* Unicode table
*/
#ifdef UNICODE_TABLE_UHC_DEF
const unsigned short uhc1_ucs_table[] = {
0xac02,0xac03,0xac05,0xac06,0xac0b,0xac0c,0xac0d,0xac0e,
0xac0f,0xac18,0xac1e,0xac1f,0xac21,0xac22,0xac23,0xac25,
@ -7178,42 +7173,4 @@ const unsigned short ucs_r2_uhc_table[] = {
const int ucs_r2_uhc_table_min = 0xff00;
const int ucs_r2_uhc_table_max = 0xff00 + (sizeof (ucs_r2_uhc_table) / sizeof (unsigned short));
#else
extern const unsigned short uhc1_ucs_table[];
extern const unsigned short uhc2_ucs_table[];
extern const unsigned short uhc3_ucs_table[];
extern const unsigned short ucs_a1_uhc_table[];
extern const unsigned short ucs_a2_uhc_table[];
extern const unsigned short ucs_a3_uhc_table[];
extern const unsigned short ucs_i_uhc_table[];
extern const unsigned short ucs_s_uhc_table[];
extern const unsigned short ucs_r1_uhc_table[];
extern const unsigned short ucs_r2_uhc_table[];
extern const int uhc1_ucs_table_size;
extern const int uhc2_ucs_table_size;
extern const int uhc3_ucs_table_size;
extern const int ucs_a1_uhc_table_min;
extern const int ucs_a1_uhc_table_max;
extern const int ucs_a2_uhc_table_min;
extern const int ucs_a2_uhc_table_max;
extern const int ucs_a3_uhc_table_min;
extern const int ucs_a3_uhc_table_max;
extern const int ucs_i_uhc_table_min;
extern const int ucs_i_uhc_table_max;
extern const int ucs_s_uhc_table_min;
extern const int ucs_s_uhc_table_max;
extern const int ucs_r1_uhc_table_min;
extern const int ucs_r1_uhc_table_max;
extern const int ucs_r2_uhc_table_min;
extern const int ucs_r2_uhc_table_max;
#endif
#endif /* UNICODE_TABLE_UHC_H */

View file

@ -36,30 +36,8 @@
#include "mbfilter_8bit.h"
#include "mbfilter_wchar.h"
#include "filters/mbfilter_euc_cn.h"
#include "filters/mbfilter_hz.h"
#include "filters/mbfilter_euc_tw.h"
#include "filters/mbfilter_big5.h"
#include "filters/mbfilter_uhc.h"
#include "filters/mbfilter_euc_kr.h"
#include "filters/mbfilter_iso2022_kr.h"
#include "filters/mbfilter_sjis.h"
#include "filters/mbfilter_sjis_2004.h"
#include "filters/mbfilter_sjis_mobile.h"
#include "filters/mbfilter_sjis_mac.h"
#include "filters/mbfilter_cp51932.h"
#include "filters/mbfilter_jis.h"
#include "filters/mbfilter_iso2022_jp_ms.h"
#include "filters/mbfilter_iso2022jp_2004.h"
#include "filters/mbfilter_iso2022jp_mobile.h"
#include "filters/mbfilter_euc_jp.h"
#include "filters/mbfilter_euc_jp_2004.h"
#include "filters/mbfilter_euc_jp_win.h"
#include "filters/mbfilter_gb18030.h"
#include "filters/mbfilter_cp932.h"
#include "filters/mbfilter_cp936.h"
#include "filters/mbfilter_cp5022x.h"
#include "filters/mbfilter_base64.h"
#include "filters/mbfilter_cjk.h"
#include "filters/mbfilter_qprint.h"
#include "filters/mbfilter_uuencode.h"
#include "filters/mbfilter_7bit.h"

View file

@ -39,30 +39,8 @@
#include "mbfilter_pass.h"
#include "mbfilter_8bit.h"
#include "filters/mbfilter_euc_cn.h"
#include "filters/mbfilter_hz.h"
#include "filters/mbfilter_euc_tw.h"
#include "filters/mbfilter_big5.h"
#include "filters/mbfilter_uhc.h"
#include "filters/mbfilter_euc_kr.h"
#include "filters/mbfilter_iso2022_kr.h"
#include "filters/mbfilter_sjis.h"
#include "filters/mbfilter_sjis_mobile.h"
#include "filters/mbfilter_sjis_mac.h"
#include "filters/mbfilter_sjis_2004.h"
#include "filters/mbfilter_cp51932.h"
#include "filters/mbfilter_jis.h"
#include "filters/mbfilter_iso2022_jp_ms.h"
#include "filters/mbfilter_iso2022jp_2004.h"
#include "filters/mbfilter_iso2022jp_mobile.h"
#include "filters/mbfilter_euc_jp.h"
#include "filters/mbfilter_euc_jp_win.h"
#include "filters/mbfilter_euc_jp_2004.h"
#include "filters/mbfilter_gb18030.h"
#include "filters/mbfilter_cp932.h"
#include "filters/mbfilter_cp936.h"
#include "filters/mbfilter_cp5022x.h"
#include "filters/mbfilter_base64.h"
#include "filters/mbfilter_cjk.h"
#include "filters/mbfilter_qprint.h"
#include "filters/mbfilter_uuencode.h"
#include "filters/mbfilter_7bit.h"

View file

@ -142,6 +142,8 @@ convertInvalidString("\xEA", "%", "SJIS-win", "UTF-8");
convertInvalidString("\x81\x20", "%", "SJIS-win", "UTF-8");
convertInvalidString("\xEA\xA9", "%", "SJIS-win", "UTF-8");
echo 'mb_strlen("\x80\x81", "CP932") == ' . mb_strlen("\x80\x81", "CP932") . PHP_EOL;
echo "Done!\n";
?>
--EXPECT--
@ -151,4 +153,5 @@ Unicode -> CP932 conversion works on all invalid codepoints
SJIS-win verification and conversion works on all valid characters
SJIS-win verification and conversion works on all invalid characters
Unicode -> SJIS-win conversion works on all invalid codepoints
mb_strlen("\x80\x81", "CP932") == 2
Done!