@mbfilter is changed to bundled version of libmbfl to prevent LGPL licence problem. mbregex is changed to oniguruma.

This commit is contained in:
Rui Hirokawa 2003-08-23 06:18:44 +00:00
parent d696f1267a
commit 5a80df26f3
235 changed files with 45638 additions and 24075 deletions

View file

@ -0,0 +1,16 @@
libmbfl is a streamable multibyte character code filter and
converter library.
libmbfl is distributed under LGPL 2.1 and bundled with PHP.
The original version of libmbfl is developed and distributed
at http://sourceforge.jp/project/php-i18n/ .
If you need to modify the bundled libmbfl, the change also have to be applied for
the libmbfl on sourceforge.jp.
If you have question about libmbfl, please ask to
hirokawa@php.net, moriyoshi@php.net.
See libmbfl/DISCLAIMER for licensing information of libmbfl.

View file

@ -2,41 +2,194 @@ dnl
dnl $Id$
dnl
PHP_ARG_ENABLE(mbstring, whether to enable multibyte string support,
[ --enable-mbstring Enable multibyte string support])
AC_DEFUN([PHP_MBSTRING_INIT], [
PHP_MBSTRING_SOURCES=""
PHP_MBSTRING_EXTRA_BUILD_DIRS=""
PHP_MBSTRING_EXTRA_CONFIG_HEADERS=""
PHP_MBSTRING=""
PHP_MBREGEX=""
PHP_MBSTRING_CFLAGS=""
])
if test "$PHP_MBSTRING" != "no"; then
AC_DEFINE(HAVE_MBSTRING,1,[whether to have multibyte string support])
AC_DEFUN([PHP_MBSTRING_ADD_SOURCES], [
PHP_MBSTRING_SOURCES="$PHP_MBSTRING_SOURCES $1"
])
if test "$PHP_MBSTRING" != "no" -o "$PHP_MBSTRING" = "ja"; then
AC_DEFINE(HAVE_MBSTR_JA,1,[whether to have japanese support])
fi
if test "$PHP_MBSTRING" = "cn"; then
AC_DEFINE(HAVE_MBSTR_CN,1,[whether to have simplified chinese support])
fi
if test "$PHP_MBSTRING" = "tw"; then
AC_DEFINE(HAVE_MBSTR_TW,1,[whether to have traditional chinese support])
fi
if test "$PHP_MBSTRING" = "kr"; then
AC_DEFINE(HAVE_MBSTR_KR,1,[whether to have korean support])
fi
if test "$PHP_MBSTRING" = "ru"; then
AC_DEFINE(HAVE_MBSTR_RU,1,[whether to have russian support])
fi
if test "$PHP_MBSTRING" = "all"; then
AC_DEFINE(HAVE_MBSTR_JA,1,[whether to have japanese support])
AC_DEFINE(HAVE_MBSTR_CN,1,[whether to have simplified chinese support])
AC_DEFINE(HAVE_MBSTR_TW,1,[whether to have traditional chinese support])
AC_DEFINE(HAVE_MBSTR_KR,1,[whether to have korean support])
AC_DEFINE(HAVE_MBSTR_RU,1,[whether to have russian support])
fi
AC_DEFUN([PHP_MBSTRING_ADD_BUILD_DIR], [
PHP_MBSTRING_EXTRA_BUILD_DIRS="$PHP_MBSTRING_EXTRA_BUILD_DIRS $1"
])
PHP_NEW_EXTENSION(mbstring, mbfilter_ja.c mbfilter_cn.c mbfilter_tw.c mbfilter_kr.c mbfilter_ru.c mbfilter.c mbstring.c mbregex.c php_mbregex.c html_entities.c php_unicode.c mb_gpc.c, $ext_shared)
fi
AC_DEFUN([PHP_MBSTRING_CONFIG_HEADER], [
PHP_MBSTRING_EXTRA_CONFIG_HEADERS="$PHP_MBSTRING_EXTRA_CONFIG_HEADERS $1"
])
PHP_ARG_ENABLE(mbregex, whether to enable multibyte regex support,
[ --disable-mbregex Disable multibyte regex support], yes, no)
AC_DEFUN([PHP_MBSTRING_EXTENSION], [
PHP_NEW_EXTENSION(mbstring, $PHP_MBSTRING_SOURCES, $ext_shared,, \\$(PHP_MBSTRING_CFLAGS))
for dir in $PHP_MBSTRING_EXTRA_BUILD_DIRS; do
PHP_ADD_BUILD_DIR([$ext_builddir/$dir])
done
for cfg in $PHP_MBSTRING_EXTRA_CONFIG_HEADERS; do
AC_CONFIG_HEADER([$ext_builddir/$cfg])
done
if test "$LIBMBFL_MODULE_TYPE" = "builtin"; then
PHP_MBSTRING_CFLAGS="-I$ext_srcdir/libmbfl/mbfl $PHP_MBSTRING_CFLAGS"
PHP_MBSTRING_CFLAGS="-I$ext_srcdir/libmbfl $PHP_MBSTRING_CFLAGS"
fi
PHP_SUBST(PHP_MBSTRING_CFLAGS)
])
AC_DEFUN([PHP_MBSTRING_SETUP], [
PHP_ARG_ENABLE(mbstring, whether to enable multibyte string support,
[ --enable-mbstring Enable multibyte string support])
if test "$PHP_MBSTRING" != "no"; then
AC_DEFINE([HAVE_MBSTRING],1,[whether to have multibyte string support])
if test -z "$PHP_MBSTRING" -o "$PHP_MBSTRING" = "all" -o "$PHP_MBSTRING" = "ja"; then
AC_DEFINE([HAVE_MBSTR_JA],1,[whether to have japanese support])
fi
if test "$PHP_MBSTRING" = "all" -o "$PHP_MBSTRING" = "cn"; then
AC_DEFINE([HAVE_MBSTR_CN],1,[whether to have simplified chinese support])
fi
if test "$PHP_MBSTRING" = "all" -o "$PHP_MBSTRING" = "tw"; then
AC_DEFINE([HAVE_MBSTR_TW],1,[whether to have traditional chinese support])
fi
if test "$PHP_MBSTRING" = "all" -o "$PHP_MBSTIRNG" = "kr"; then
AC_DEFINE([HAVE_MBSTR_KR],1,[whether to have korean support])
fi
if test "$PHP_MBSTRING" = "all" -o "$PHP_MBSTRING" = "ru"; then
AC_DEFINE([HAVE_MBSTR_RU],1,[whether to have russian support])
fi
PHP_MBSTRING_ADD_SOURCES([
mbstring.c php_unicode.c mb_gpc.c php_mbfl_allocators.c
])
fi
])
AC_DEFUN([PHP_MBSTRING_SETUP_MBREGEX], [
PHP_ARG_ENABLE([mbregex], [whether to enable multibyte regex support],
[ --disable-mbregex Disable multibyte regex support], yes, no)
if test "$PHP_MBREGEX" != "no" -a "$PHP_MBSTRING" != "no"; then
AC_CACHE_CHECK(for variable length prototypes and stdarg.h, cv_php_mbstring_stdarg, [
AC_TRY_COMPILE([#include <stdarg.h>], [
int foo(int x, ...) {
va_list va;
va_start(va, x);
va_arg(va, int);
va_arg(va, char *);
va_arg(va, double);
return 0;
}
int main() { return foo(10, "", 3.14); }
], [cv_php_mbstring_stdarg=yes], [cv_php_mbstring_stdarg=no])
])
if test "$cv_php_mbstring_stdarg" = "yes"; then
AC_DEFINE([HAVE_STDARG_PROTOTYPES], 1, [Define if stdarg.h is available])
fi
AC_DEFINE([HAVE_MBREGEX], 1, [whether to have multibyte regex support])
PHP_MBSTRING_CFLAGS="-DNOT_RUBY $PHP_MBSTRING_CFLAGS"
PHP_MBSTRING_ADD_BUILD_DIR([oniguruma])
PHP_MBSTRING_CONFIG_HEADER([oniguruma/config.h])
PHP_MBSTRING_ADD_SOURCES([
php_mbregex.c
oniguruma/regcomp.c
oniguruma/regerror.c
oniguruma/regexec.c
oniguruma/reggnu.c
oniguruma/regparse.c
oniguruma/regposerr.c
])
fi
])
AC_DEFUN([PHP_MBSTRING_SETUP_LIBMBFL], [
PHP_ARG_WITH(libmbfl, [ include libmbfl support],
[ --with-libmbfl[=DIR] Include libmbfl support where DIR is libmbfl install prefix.
If DIR is not set, the bundled libmbfl will be used.], no, no)
if test "$PHP_MBSTRING" != "no"; then
LIBMBFL_MODULE_TYPE=builtin
AC_DEFINE([HAVE_LIBMBFL], 1, [whether to have libmbfl support])
PHP_MBSTRING_ADD_BUILD_DIR([libmbfl])
PHP_MBSTRING_CONFIG_HEADER([libmbfl/config.h])
PHP_MBSTRING_ADD_SOURCES([
libmbfl/filters/html_entities.c
libmbfl/filters/mbfilter_7bit.c
libmbfl/filters/mbfilter_ascii.c
libmbfl/filters/mbfilter_base64.c
libmbfl/filters/mbfilter_big5.c
libmbfl/filters/mbfilter_byte2.c
libmbfl/filters/mbfilter_byte4.c
libmbfl/filters/mbfilter_cp1251.c
libmbfl/filters/mbfilter_cp1252.c
libmbfl/filters/mbfilter_cp866.c
libmbfl/filters/mbfilter_cp932.c
libmbfl/filters/mbfilter_cp936.c
libmbfl/filters/mbfilter_euc_cn.c
libmbfl/filters/mbfilter_euc_jp.c
libmbfl/filters/mbfilter_euc_jp_win.c
libmbfl/filters/mbfilter_euc_kr.c
libmbfl/filters/mbfilter_euc_tw.c
libmbfl/filters/mbfilter_htmlent.c
libmbfl/filters/mbfilter_hz.c
libmbfl/filters/mbfilter_iso2022_kr.c
libmbfl/filters/mbfilter_iso8859_1.c
libmbfl/filters/mbfilter_iso8859_10.c
libmbfl/filters/mbfilter_iso8859_13.c
libmbfl/filters/mbfilter_iso8859_14.c
libmbfl/filters/mbfilter_iso8859_15.c
libmbfl/filters/mbfilter_iso8859_2.c
libmbfl/filters/mbfilter_iso8859_3.c
libmbfl/filters/mbfilter_iso8859_4.c
libmbfl/filters/mbfilter_iso8859_5.c
libmbfl/filters/mbfilter_iso8859_6.c
libmbfl/filters/mbfilter_iso8859_7.c
libmbfl/filters/mbfilter_iso8859_8.c
libmbfl/filters/mbfilter_iso8859_9.c
libmbfl/filters/mbfilter_jis.c
libmbfl/filters/mbfilter_koi8r.c
libmbfl/filters/mbfilter_qprint.c
libmbfl/filters/mbfilter_sjis.c
libmbfl/filters/mbfilter_ucs2.c
libmbfl/filters/mbfilter_ucs4.c
libmbfl/filters/mbfilter_uhc.c
libmbfl/filters/mbfilter_utf16.c
libmbfl/filters/mbfilter_utf32.c
libmbfl/filters/mbfilter_utf7.c
libmbfl/filters/mbfilter_utf7imap.c
libmbfl/filters/mbfilter_utf8.c
libmbfl/filters/mbfilter_uuencode.c
libmbfl/mbfl/mbfilter.c
libmbfl/mbfl/mbfilter_8bit.c
libmbfl/mbfl/mbfilter_pass.c
libmbfl/mbfl/mbfilter_wchar.c
libmbfl/mbfl/mbfl_convert.c
libmbfl/mbfl/mbfl_encoding.c
libmbfl/mbfl/mbfl_filter_output.c
libmbfl/mbfl/mbfl_ident.c
libmbfl/mbfl/mbfl_language.c
libmbfl/mbfl/mbfl_memory_device.c
libmbfl/mbfl/mbfl_string.c
libmbfl/nls/nls_de.c
libmbfl/nls/nls_en.c
libmbfl/nls/nls_ja.c
libmbfl/nls/nls_kr.c
libmbfl/nls/nls_neutral.c
libmbfl/nls/nls_ru.c
libmbfl/nls/nls_uni.c
libmbfl/nls/nls_zh.c
])
fi
])
PHP_MBSTRING_INIT
PHP_MBSTRING_SETUP
PHP_MBSTRING_SETUP_MBREGEX
PHP_MBSTRING_SETUP_LIBMBFL
PHP_MBSTRING_EXTENSION
if test "$PHP_MBREGEX" != "no" -a "$PHP_MBSTRING" != "no"; then
AC_DEFINE(HAVE_MBREGEX, 1, [whether to have multibyte regex support])
fi

View file

@ -0,0 +1,8 @@
Hironori Sato <satoh@jpnnet.com>
Shigeru Kanemoto <sgk@happysize.co.jp>
Tsukada Takuya <tsukada@fminn.nagano.nagano.jp>
U. Kenkichi <kenkichi@axes.co.jp>
Tateyama <tateyan@amy.hi-ho.ne.jp>
Moriyoshi Koizumi <moriyoshi@php.net>
Rui Hirokawa <hirokawa@php.net>
Other gracious contributors

View file

@ -0,0 +1,68 @@
# charset=UTF-8
"streamable kanji code filter and converter"
Copyright (c) 1998,1999,2000,2001 HappySize, Inc. All rights reserved.
This software is released under the GNU Lesser General Public License.
(Version 2.1, February 1999)
Please read the following detail of the licence (in japanese).
◆使用許諾条件◆
このソフトウェアは株式会社ハッピーサイズによって開発されました。株式会社ハッ
ピーサイズは、著作権法および万国著作権条約の定めにより、このソフトウェアに関
するすべての権利を留保する権利を持ち、ここに行使します。株式会社ハッピーサイ
ズは以下に明記した条件に従って、このソフトウェアを使用する排他的ではない権利
をお客様に許諾します。何人たりとも、以下の条件に反してこのソフトウェアを使用
することはできません。
このソフトウェアを「GNU Lesser General Public License (Version 2.1, February
1999)」に示された条件で使用することを、全ての方に許諾します。「GNU Lesser
General Public License」を満たさない使用には、株式会社ハッピーサイズから書面
による許諾を得る必要があります。
「GNU Lesser General Public License」の全文は以下のウェブページから取得でき
ます。「GNU Lesser General Public License」とは、これまでLibrary General
Public Licenseと呼ばれていたものです。
http://www.gnu.org/ --- GNUウェブサイト
http://www.gnu.org/copyleft/lesser.html --- ライセンス文面
このライセンスの内容がわからない方、守れない方には使用を許諾しません。
しかしながら、当社とGNUプロジェクトとの特定の関係を示唆または主張するもので
はありません。
◆保証内容◆
このソフトウェアは、期待された動作・機能・性能を持つことを目標として設計され
開発されていますが、これを保証するものではありません。このソフトウェアは「こ
のまま」の状態で提供されており、たとえばこのソフトウェアの有用性ないし特定の
目的に合致することといった、何らかの保証内容が、明示されたり暗黙に示されてい
る場合であっても、その保証は無効です。このソフトウェアを使用した結果ないし使
用しなかった結果によって、直接あるいは間接に受けた身体的な傷害、財産上の損害
、データの損失あるいはその他の全ての損害については、その損害の可能性が使用者
、当社あるいは第三者によって警告されていた場合であっても、当社はその損害の賠
償および補填を行いません。この規定は他の全ての、書面上または書面に無い保証・
契約・規定に優先します。
◆著作権者の連絡先および使用条件についての問い合わせ先◆
〒102-0073
東京都千代田区九段北1-13-5日本地所第一ビル4F
株式会社ハッピーサイズ
Phone: 03-3512-3655, Fax: 03-3512-3656
Email: sales@happysize.co.jp
Web: http://happysize.com/
◆著者◆
金本 茂 <sgk@happysize.co.jp>
◆履歴◆
1998/11/10 sgk implementation in C++
1999/4/25 sgk Cで書きなおし。
1999/4/26 sgk 入力フィルタを実装。漢字コードを推定しながらフィルタを追加。
1999/6/?? Unicodeサポート。
1999/6/22 sgk ライセンスをLGPLに変更。

View file

@ -0,0 +1,12 @@
For UNIX compatible system,
./buildconf
./configure
make
make install
For Windows,
Visual C++ 6.0 and Borland C++ 5.0 is suppoted.

View file

@ -0,0 +1,2 @@
EXTRA_DIST=AUTHORS DISCLAIMER
SUBDIRS = nls filters mbfl

View file

@ -0,0 +1,8 @@
This is Libmbfl, a streamable multibyte character code filter and
converter library.
See DISCLAIMER for licensing information.
See the file INSTALL for instructions on how to build and install
libmbfl.

6
ext/mbstring/libmbfl/buildconf Executable file
View file

@ -0,0 +1,6 @@
#!/bin/sh
libtoolize -c -f --automake
aclocal
autoheader
automake -a -c --foreign
autoconf

View file

@ -0,0 +1,79 @@
/* ext/mbstring/libmbfl/config.h. Generated by configure. */
/* config.h.in. Generated from configure.in by autoheader. */
/* Define to 1 if you have the <assert.h> header file. */
#define HAVE_ASSERT_H 1
/* Define to 1 if you have the <inttypes.h> header file. */
/* #undef HAVE_INTTYPES_H */
/* Define to 1 if your system has a GNU libc compatible `malloc' function, and
to 0 otherwise. */
/* #undef HAVE_MALLOC */
/* Define to 1 if you have the <memory.h> header file. */
/* #undef HAVE_MEMORY_H */
/* Define to 1 if your system has a GNU libc compatible `realloc' function,
and to 0 otherwise. */
/* #undef HAVE_REALLOC */
/* Define to 1 if you have the <stddef.h> header file. */
/* #undef HAVE_STDDEF_H */
/* Define to 1 if you have the <stdint.h> header file. */
/* #undef HAVE_STDINT_H */
/* Define to 1 if you have the <stdlib.h> header file. */
#define HAVE_STDLIB_H 1
/* Define to 1 if you have the `strcasecmp' function. */
#define HAVE_STRCASECMP 1
/* Define to 1 if you have the `strchr' function. */
/* #undef HAVE_STRCHR */
/* Define to 1 if you have the <strings.h> header file. */
/* #undef HAVE_STRINGS_H */
/* Define to 1 if you have the <string.h> header file. */
#define HAVE_STRING_H 1
/* Define to 1 if you have the <sys/stat.h> header file. */
/* #undef HAVE_SYS_STAT_H */
/* Define to 1 if you have the <sys/types.h> header file. */
#define HAVE_SYS_TYPES_H 1
/* Define to 1 if you have the <unistd.h> header file. */
#define HAVE_UNISTD_H 1
/* Define to the address where bug reports for this package should be sent. */
#define PACKAGE_BUGREPORT ""
/* Define to the full name of this package. */
#define PACKAGE_NAME ""
/* Define to the full name and version of this package. */
#define PACKAGE_STRING ""
/* Define to the one symbol short name of this package. */
#define PACKAGE_TARNAME ""
/* Define to the version of this package. */
#define PACKAGE_VERSION ""
/* Define to 1 if you have the ANSI C header files. */
#define STDC_HEADERS 1
/* Define to empty if `const' does not conform to ANSI C. */
/* #undef const */
/* Define to rpl_malloc if the replacement function should be used. */
/* #undef malloc */
/* Define to rpl_realloc if the replacement function should be used. */
/* #undef realloc */
/* Define to `unsigned' if <sys/types.h> does not define. */
/* #undef size_t */

View file

@ -0,0 +1,12 @@
#define HAVE_STDIO_H 1
#define HAVE_STDLIB_H 1
#define HAVE_STDDEF_H 1
#define HAVE_ASSERT_H 1
#define HAVE_MEMORY_H 1
/* #undef HAVE_STRINGS_H */
#define HAVE_STRING_H 1
/* #undef HAVE_STRCASECMP */
#define HAVE_STRICMP 1
#define HAVE_WIN32_NATIVE_THREAD 1
#define USE_WIN32_NATIVE_THREAD 1
#define ENABLE_THREADS 1

View file

@ -0,0 +1,78 @@
/* config.h.in. Generated from configure.in by autoheader. */
/* Define to 1 if you have the <assert.h> header file. */
#undef HAVE_ASSERT_H
/* Define to 1 if you have the <inttypes.h> header file. */
#undef HAVE_INTTYPES_H
/* Define to 1 if your system has a GNU libc compatible `malloc' function, and
to 0 otherwise. */
#undef HAVE_MALLOC
/* Define to 1 if you have the <memory.h> header file. */
#undef HAVE_MEMORY_H
/* Define to 1 if your system has a GNU libc compatible `realloc' function,
and to 0 otherwise. */
#undef HAVE_REALLOC
/* Define to 1 if you have the <stddef.h> header file. */
#undef HAVE_STDDEF_H
/* Define to 1 if you have the <stdint.h> header file. */
#undef HAVE_STDINT_H
/* Define to 1 if you have the <stdlib.h> header file. */
#undef HAVE_STDLIB_H
/* Define to 1 if you have the `strcasecmp' function. */
#undef HAVE_STRCASECMP
/* Define to 1 if you have the `strchr' function. */
#undef HAVE_STRCHR
/* Define to 1 if you have the <strings.h> header file. */
#undef HAVE_STRINGS_H
/* Define to 1 if you have the <string.h> header file. */
#undef HAVE_STRING_H
/* Define to 1 if you have the <sys/stat.h> header file. */
#undef HAVE_SYS_STAT_H
/* Define to 1 if you have the <sys/types.h> header file. */
#undef HAVE_SYS_TYPES_H
/* Define to 1 if you have the <unistd.h> header file. */
#undef HAVE_UNISTD_H
/* Define to the address where bug reports for this package should be sent. */
#undef PACKAGE_BUGREPORT
/* Define to the full name of this package. */
#undef PACKAGE_NAME
/* Define to the full name and version of this package. */
#undef PACKAGE_STRING
/* Define to the one symbol short name of this package. */
#undef PACKAGE_TARNAME
/* Define to the version of this package. */
#undef PACKAGE_VERSION
/* Define to 1 if you have the ANSI C header files. */
#undef STDC_HEADERS
/* Define to empty if `const' does not conform to ANSI C. */
#undef const
/* Define to rpl_malloc if the replacement function should be used. */
#undef malloc
/* Define to rpl_realloc if the replacement function should be used. */
#undef realloc
/* Define to `unsigned' if <sys/types.h> does not define. */
#undef size_t

View file

@ -0,0 +1,14 @@
#define HAVE_STDIO_H 1
#define HAVE_STDLIB_H 1
#define HAVE_STDDEF_H 1
#define HAVE_ASSERT_H 1
#define HAVE_MEMORY_H 1
/* #undef HAVE_STRINGS_H */
#define HAVE_STRING_H 1
/* #undef HAVE_STRCASECMP */
#define HAVE_STRICMP 1
#define HAVE_WIN32_NATIVE_THREAD 1
#define USE_WIN32_NATIVE_THREAD 1
#define ENABLE_THREADS 1
#define strcasecmp(s1, s2) stricmp(s1, s2)

View file

@ -0,0 +1,29 @@
# Process this file with autoconf to produce a configure script.
AC_INIT([libmbfl],[1.0.0])
AM_INIT_AUTOMAKE
AC_CONFIG_SRCDIR([mbfl/mbfilter.c])
AC_CONFIG_HEADER([config.h])
# Checks for programs.
AC_PROG_CC
AC_PROG_CXX
AC_PROG_LIBTOOL
AC_PROG_RANLIB
# Checks for libraries.
# Checks for header files.
AC_HEADER_STDC
AC_CHECK_HEADERS([stdlib.h stddef.h assert.h])
# Checks for typedefs, structures, and compiler characteristics.
AC_C_CONST
AC_TYPE_SIZE_T
# Checks for library functions.
AC_FUNC_MALLOC
AC_FUNC_REALLOC
AC_CHECK_FUNCS([strcasecmp strchr])
AC_CONFIG_FILES([Makefile mbfl/Makefile filters/Makefile nls/Makefile])
AC_OUTPUT

14
ext/mbstring/libmbfl/cvsclean Executable file
View file

@ -0,0 +1,14 @@
#!/bin/sh
function cvsclean_sub() {
prev_pwd=`pwd`
cd $1
cat .cvsignore | while read fname; do
rm -r -f $fname
done
cd "$prev_pwd"
}
cvsclean_sub .
cvsclean_sub mbfl
cvsclean_sub filters
cvsclean_sub nls

View file

@ -0,0 +1,3 @@
noinst_LTLIBRARIES=libmbfl_filters.la
INCLUDES=-I../mbfl
libmbfl_filters_la_SOURCES=mbfilter_cp936.c mbfilter_hz.c mbfilter_euc_tw.c mbfilter_big5.c mbfilter_euc_jp.c mbfilter_jis.c mbfilter_iso8859_1.c mbfilter_iso8859_2.c mbfilter_cp1252.c mbfilter_cp1251.c mbfilter_ascii.c mbfilter_iso8859_3.c mbfilter_iso8859_4.c mbfilter_iso8859_5.c mbfilter_iso8859_6.c mbfilter_iso8859_7.c mbfilter_iso8859_8.c mbfilter_iso8859_9.c mbfilter_iso8859_10.c mbfilter_iso8859_13.c mbfilter_iso8859_14.c mbfilter_iso8859_15.c mbfilter_htmlent.c mbfilter_byte2.c mbfilter_byte4.c mbfilter_uuencode.c mbfilter_base64.c mbfilter_sjis.c mbfilter_7bit.c mbfilter_qprint.c mbfilter_ucs4.c mbfilter_ucs2.c mbfilter_utf32.c mbfilter_utf16.c mbfilter_utf8.c mbfilter_utf7.c mbfilter_utf7imap.c mbfilter_euc_jp_win.c mbfilter_cp932.c mbfilter_euc_cn.c mbfilter_euc_kr.c mbfilter_uhc.c mbfilter_iso2022_kr.c mbfilter_cp866.c mbfilter_koi8r.c html_entities.c

View file

@ -1,3 +1,30 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
#ifndef CP932_TABLE_H
#define CP932_TABLE_H
/*
* cp932 table
*/
@ -64,3 +91,5 @@ static const unsigned short cp932ext3_eucjp_table[] = {
};
static const int cp932ext3_eucjp_table_size = (sizeof (cp932ext3_eucjp_table) / sizeof (unsigned short));
#endif /* CP932_TABLE_H */

View file

@ -1,31 +1,36 @@
/*
+----------------------------------------------------------------------+
| PHP Version 4 |
+----------------------------------------------------------------------+
| Copyright (c) 1997-2003 The PHP Group |
+----------------------------------------------------------------------+
| This source file is subject to version 3.0 of the PHP license, |
| that is bundled with this package in the file LICENSE, and is |
| available through the world-wide-web at the following url: |
| http://www.php.net/license/3_0.txt. |
| If you did not receive a copy of the PHP license and are unable to |
| obtain it through the world-wide-web, please send a note to |
| license@php.net so we can mail you a copy immediately. |
+----------------------------------------------------------------------+
| Author: Marcus Boerger <helly@php.net> |
+----------------------------------------------------------------------+
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this part: Marcus Boerger <marcus.boerger@t-online.de>
*
*/
/*
* The source code included in this files was separated from mbfilter.c
* by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
*
*/
/* $Id$ */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "php.h"
#include "php_globals.h"
#include <stdlib.h>
#include "mbfilter.h"
const mbfl_html_entity mbfl_html_entity_list[] = { /*

View file

@ -0,0 +1,38 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this part: Marcus Boerger <marcus.boerger@t-online.de>
*
*/
/*
* The source code included in this files was separated from mbfilter.h
* by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
*
*/
#ifndef MBFL_HTML_ENTITIES_H
#define MBFL_HTML_ENTITIES_H
#include "mbfilter.h"
extern mbfl_html_entity mbfl_html_entity_list[];
#endif /* MBFL_HTML_ENTITIES_H */

View file

@ -0,0 +1,82 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* The source code included in this files was separated from mbfilter.c
* by Moriyoshi Koizumi <moriyoshi@php.net> on 4 Dec 2002. The file
* mbfilter.c is included in this package .
*
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "mbfilter.h"
#include "mbfilter_7bit.h"
const mbfl_encoding mbfl_encoding_7bit = {
mbfl_no_encoding_7bit,
"7bit",
"7bit",
NULL,
NULL,
MBFL_ENCTYPE_SBCS
};
const struct mbfl_convert_vtbl vtbl_8bit_7bit = {
mbfl_no_encoding_8bit,
mbfl_no_encoding_7bit,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_any_7bit,
mbfl_filt_conv_common_flush
};
const struct mbfl_convert_vtbl vtbl_7bit_8bit = {
mbfl_no_encoding_7bit,
mbfl_no_encoding_8bit,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_7bit_any,
mbfl_filt_conv_common_flush
};
#define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
int mbfl_filt_conv_7bit_any(int c, mbfl_convert_filter *filter)
{
return (*filter->output_function)(c, filter->data);
}
int mbfl_filt_conv_any_7bit(int c, mbfl_convert_filter *filter)
{
if (c >= 0 && c < 0x80) {
CK((*filter->output_function)(c, filter->data));
}
return c;
}

View file

@ -0,0 +1,43 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* The source code included in this files was separated from mbfilter.c
* by Moriyoshi Koizumi <moriyoshi@php.net> on 4 Dec 2002. The file
* mbfilter.c is included in this package .
*
*/
#ifndef MBFL_MBFILTER_7BIT_H
#define MBFL_MBFILTER_7BIT_H
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_7bit;
extern const struct mbfl_convert_vtbl vtbl_8bit_7bit;
extern const struct mbfl_convert_vtbl vtbl_7bit_8bit;
int mbfl_filt_conv_7bit_any(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_any_7bit(int c, mbfl_convert_filter *filter);
#endif /* MBFL_MBFILTER_7BIT_H */

View file

@ -0,0 +1,114 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* The source code included in this files was separated from mbfilter.c
* by Moriyoshi Koizumi <moriyoshi@php.net> on 4 Dec 2002. The file
* mbfilter.c is included in this package .
*
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "mbfilter.h"
#include "mbfilter_ascii.h"
static int mbfl_filt_ident_ascii(int c, mbfl_identify_filter *filter);
static const char *mbfl_encoding_ascii_aliases[] = {"ANSI_X3.4-1968", "iso-ir-6", "ANSI_X3.4-1986", "ISO_646.irv:1991", "US-ASCII", "ISO646-US", "us", "IBM367", "cp367", "csASCII", NULL};
const mbfl_encoding mbfl_encoding_ascii = {
mbfl_no_encoding_ascii,
"ASCII",
"US-ASCII", /* preferred MIME name */
(const char *(*)[])&mbfl_encoding_ascii_aliases,
NULL,
MBFL_ENCTYPE_SBCS
};
const struct mbfl_identify_vtbl vtbl_identify_ascii = {
mbfl_no_encoding_ascii,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_common_dtor,
mbfl_filt_ident_ascii
};
const struct mbfl_convert_vtbl vtbl_ascii_wchar = {
mbfl_no_encoding_ascii,
mbfl_no_encoding_wchar,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_ascii_wchar,
mbfl_filt_conv_common_flush
};
const struct mbfl_convert_vtbl vtbl_wchar_ascii = {
mbfl_no_encoding_wchar,
mbfl_no_encoding_ascii,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_wchar_ascii,
mbfl_filt_conv_common_flush
};
#define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
/*
* ASCII => wchar
*/
int mbfl_filt_conv_ascii_wchar(int c, mbfl_convert_filter *filter)
{
return (*filter->output_function)(c, filter->data);
}
/*
* wchar => ASCII
*/
int mbfl_filt_conv_wchar_ascii(int c, mbfl_convert_filter *filter)
{
if (c >= 0 && c < 0x80) {
CK((*filter->output_function)(c, filter->data));
} else {
if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
CK(mbfl_filt_conv_illegal_output(c, filter));
}
}
return c;
}
static int mbfl_filt_ident_ascii(int c, mbfl_identify_filter *filter)
{
if (c >= 0x20 && c < 0x80) {
;
} else if (c == 0x0d || c == 0x0a || c == 0x09 || c == 0) { /* CR or LF or HTAB or null */
;
} else {
filter->flag = 1;
}
return c;
}

View file

@ -0,0 +1,46 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* The source code included in this files was separated from mbfilter.c
* by Moriyoshi Koizumi <moriyoshi@php.net> on 4 Dec 2002. The file
* mbfilter.c is included in this package .
*
*/
#ifndef MBFL_MBFILTER_ASCII_H
#define MBFL_MBFILTER_ASCII_H
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_ascii;
extern const struct mbfl_identify_vtbl vtbl_identify_ascii;
extern const struct mbfl_convert_vtbl vtbl_ascii_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_ascii;
int mbfl_filt_conv_ascii_wchar(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_wchar_ascii(int c, mbfl_convert_filter *filter);
#endif /* MBFL_MBFILTER_ASCII_H */

View file

@ -0,0 +1,214 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* The source code included in this files was separated from mbfilter.c
* by Moriyoshi Koizumi <moriyoshi@php.net> on 4 Dec 2002. The file
* mbfilter.c is included in this package .
*
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "mbfilter.h"
#include "mbfilter_base64.h"
const mbfl_encoding mbfl_encoding_base64 = {
mbfl_no_encoding_base64,
"BASE64",
"BASE64",
NULL,
NULL,
MBFL_ENCTYPE_SBCS
};
const struct mbfl_convert_vtbl vtbl_8bit_b64 = {
mbfl_no_encoding_8bit,
mbfl_no_encoding_base64,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_base64enc,
mbfl_filt_conv_base64enc_flush
};
const struct mbfl_convert_vtbl vtbl_b64_8bit = {
mbfl_no_encoding_base64,
mbfl_no_encoding_8bit,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_base64dec,
mbfl_filt_conv_base64dec_flush
};
#define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
/*
* any => BASE64
*/
static const unsigned char mbfl_base64_table[] = {
/* 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', */
0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x4b,0x4c,0x4d,
/* 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', */
0x4e,0x4f,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,
/* 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', */
0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,
/* 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', */
0x6e,0x6f,0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,
/* '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/', '\0' */
0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x2b,0x2f,0x00
};
int mbfl_filt_conv_base64enc(int c, mbfl_convert_filter *filter)
{
int n;
n = (filter->status & 0xff);
if (n == 0) {
filter->status++;
filter->cache = (c & 0xff) << 16;
} else if (n == 1) {
filter->status++;
filter->cache |= (c & 0xff) << 8;
} else {
filter->status &= ~0xff;
if ((filter->status & MBFL_BASE64_STS_MIME_HEADER) == 0) {
n = (filter->status & 0xff00) >> 8;
if (n > 72) {
CK((*filter->output_function)(0x0d, filter->data)); /* CR */
CK((*filter->output_function)(0x0a, filter->data)); /* LF */
filter->status &= ~0xff00;
}
filter->status += 0x400;
}
n = filter->cache | (c & 0xff);
CK((*filter->output_function)(mbfl_base64_table[(n >> 18) & 0x3f], filter->data));
CK((*filter->output_function)(mbfl_base64_table[(n >> 12) & 0x3f], filter->data));
CK((*filter->output_function)(mbfl_base64_table[(n >> 6) & 0x3f], filter->data));
CK((*filter->output_function)(mbfl_base64_table[n & 0x3f], filter->data));
}
return c;
}
int mbfl_filt_conv_base64enc_flush(mbfl_convert_filter *filter)
{
int status, cache, len;
status = filter->status & 0xff;
cache = filter->cache;
len = (filter->status & 0xff00) >> 8;
filter->status &= ~0xffff;
filter->cache = 0;
/* flush fragments */
if (status >= 1) {
if ((filter->status & MBFL_BASE64_STS_MIME_HEADER) == 0) {
if (len > 72){
CK((*filter->output_function)(0x0d, filter->data)); /* CR */
CK((*filter->output_function)(0x0a, filter->data)); /* LF */
}
}
CK((*filter->output_function)(mbfl_base64_table[(cache >> 18) & 0x3f], filter->data));
CK((*filter->output_function)(mbfl_base64_table[(cache >> 12) & 0x3f], filter->data));
if (status == 1) {
CK((*filter->output_function)(0x3d, filter->data)); /* '=' */
CK((*filter->output_function)(0x3d, filter->data)); /* '=' */
} else {
CK((*filter->output_function)(mbfl_base64_table[(cache >> 6) & 0x3f], filter->data));
CK((*filter->output_function)(0x3d, filter->data)); /* '=' */
}
}
return 0;
}
/*
* BASE64 => any
*/
int mbfl_filt_conv_base64dec(int c, mbfl_convert_filter *filter)
{
int n;
if (c == 0x0d || c == 0x0a || c == 0x20 || c == 0x09 || c == 0x3d) { /* CR or LF or SPACE or HTAB or '=' */
return c;
}
n = 0;
if (c >= 0x41 && c <= 0x5a) { /* A - Z */
n = c - 65;
} else if (c >= 0x61 && c <= 0x7a) { /* a - z */
n = c - 71;
} else if (c >= 0x30 && c <= 0x39) { /* 0 - 9 */
n = c + 4;
} else if (c == 0x2b) { /* '+' */
n = 62;
} else if (c == 0x2f) { /* '/' */
n = 63;
}
n &= 0x3f;
switch (filter->status) {
case 0:
filter->status = 1;
filter->cache = n << 18;
break;
case 1:
filter->status = 2;
filter->cache |= n << 12;
break;
case 2:
filter->status = 3;
filter->cache |= n << 6;
break;
default:
filter->status = 0;
n |= filter->cache;
CK((*filter->output_function)((n >> 16) & 0xff, filter->data));
CK((*filter->output_function)((n >> 8) & 0xff, filter->data));
CK((*filter->output_function)(n & 0xff, filter->data));
break;
}
return c;
}
int mbfl_filt_conv_base64dec_flush(mbfl_convert_filter *filter)
{
int status, cache;
status = filter->status;
cache = filter->cache;
filter->status = 0;
filter->cache = 0;
/* flush fragments */
if (status >= 2) {
CK((*filter->output_function)((cache >> 16) & 0xff, filter->data));
if (status >= 3) {
CK((*filter->output_function)((cache >> 8) & 0xff, filter->data));
}
}
return 0;
}

View file

@ -0,0 +1,45 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* The source code included in this files was separated from mbfilter.c
* by Moriyoshi Koizumi <moriyoshi@php.net> on 4 Dec 2002. The file
* mbfilter.c is included in this package .
*
*/
#ifndef MBFL_MBFILTER_BASE64_H
#define MBFL_MBFILTER_BASE64_H
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_base64;
extern const struct mbfl_convert_vtbl vtbl_b64_8bit;
extern const struct mbfl_convert_vtbl vtbl_8bit_b64;
int mbfl_filt_conv_base64enc(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_base64enc_flush(mbfl_convert_filter *filter);
int mbfl_filt_conv_base64dec(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_base64dec_flush(mbfl_convert_filter *filter);
#endif /* MBFL_MBFILTER_BASE64_H */

View file

@ -0,0 +1,227 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file: Rui Hirokawa <hirokawa@php.net>
*
*/
/*
* The source code included in this files was separated from mbfilter_tw.c
* by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
*
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "mbfilter.h"
#include "mbfilter_big5.h"
#include "unicode_table_big5.h"
static int mbfl_filt_ident_big5(int c, mbfl_identify_filter *filter);
static const unsigned char mblen_table_big5[] = { /* 0x81-0xFE */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
};
static const char *mbfl_encoding_big5_aliases[] = {"CN-BIG5", "BIG-FIVE", "BIGFIVE", "CP950", NULL};
const mbfl_encoding mbfl_encoding_big5 = {
mbfl_no_encoding_big5,
"BIG-5",
"BIG5",
(const char *(*)[])&mbfl_encoding_big5_aliases,
mblen_table_big5,
MBFL_ENCTYPE_MBCS
};
const struct mbfl_identify_vtbl vtbl_identify_big5 = {
mbfl_no_encoding_big5,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_common_dtor,
mbfl_filt_ident_big5
};
const struct mbfl_convert_vtbl vtbl_big5_wchar = {
mbfl_no_encoding_big5,
mbfl_no_encoding_wchar,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_big5_wchar,
mbfl_filt_conv_common_flush
};
const struct mbfl_convert_vtbl vtbl_wchar_big5 = {
mbfl_no_encoding_wchar,
mbfl_no_encoding_big5,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_wchar_big5,
mbfl_filt_conv_common_flush
};
#define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
/*
* Big5 => wchar
*/
int
mbfl_filt_conv_big5_wchar(int c, mbfl_convert_filter *filter)
{
int c1, w;
switch (filter->status) {
case 0:
if (c >= 0 && c < 0x80) { /* latin */
CK((*filter->output_function)(c, filter->data));
} else if (c > 0xa0 && c < 0xff) { /* dbcs lead byte */
filter->status = 1;
filter->cache = c;
} else {
w = c & MBFL_WCSGROUP_MASK;
w |= MBFL_WCSGROUP_THROUGH;
CK((*filter->output_function)(w, filter->data));
}
break;
case 1: /* dbcs second byte */
filter->status = 0;
c1 = filter->cache;
if ((c > 0x39 && c < 0x7f) | (c > 0xa0 && c < 0xff)) {
if (c < 0x7f){
w = (c1 - 0xa1)*157 + (c - 0x40);
} else {
w = (c1 - 0xa1)*157 + (c - 0xa1) + 0x3f;
}
if (w >= 0 && w < big5_ucs_table_size) {
w = big5_ucs_table[w];
} else {
w = 0;
}
if (w <= 0) {
w = (c1 << 8) | c;
w &= MBFL_WCSPLANE_MASK;
w |= MBFL_WCSPLANE_BIG5;
}
CK((*filter->output_function)(w, filter->data));
} else if ((c >= 0 && c < 0x21) || c == 0x7f) { /* CTLs */
CK((*filter->output_function)(c, filter->data));
} else {
w = (c1 << 8) | c;
w &= MBFL_WCSGROUP_MASK;
w |= MBFL_WCSGROUP_THROUGH;
CK((*filter->output_function)(w, filter->data));
}
break;
default:
filter->status = 0;
break;
}
return c;
}
/*
* wchar => Big5
*/
int
mbfl_filt_conv_wchar_big5(int c, mbfl_convert_filter *filter)
{
int c1, s;
s = 0;
if (c >= ucs_a1_big5_table_min && c < ucs_a1_big5_table_max) {
s = ucs_a1_big5_table[c - ucs_a1_big5_table_min];
} else if (c >= ucs_a2_big5_table_min && c < ucs_a2_big5_table_max) {
s = ucs_a2_big5_table[c - ucs_a2_big5_table_min];
} else if (c >= ucs_a3_big5_table_min && c < ucs_a3_big5_table_max) {
s = ucs_a3_big5_table[c - ucs_a3_big5_table_min];
} else if (c >= ucs_i_big5_table_min && c < ucs_i_big5_table_max) {
s = ucs_i_big5_table[c - ucs_i_big5_table_min];
} else if (c >= ucs_pua_big5_table_min && c < ucs_pua_big5_table_max) {
s = ucs_pua_big5_table[c - ucs_pua_big5_table_min];
} else if (c >= ucs_r1_big5_table_min && c < ucs_r1_big5_table_max) {
s = ucs_r1_big5_table[c - ucs_r1_big5_table_min];
} else if (c >= ucs_r2_big5_table_min && c < ucs_r2_big5_table_max) {
s = ucs_r2_big5_table[c - ucs_r2_big5_table_min];
}
if (s <= 0) {
c1 = c & ~MBFL_WCSPLANE_MASK;
if (c1 == MBFL_WCSPLANE_BIG5) {
s = c & MBFL_WCSPLANE_MASK;
}
if (c == 0) {
s = 0;
} else if (s <= 0) {
s = -1;
}
}
if (s >= 0) {
if (s < 0x80) { /* latin */
CK((*filter->output_function)(s, filter->data));
} else {
CK((*filter->output_function)((s >> 8) & 0xff, filter->data));
CK((*filter->output_function)(s & 0xff, filter->data));
}
} else {
if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
CK(mbfl_filt_conv_illegal_output(c, filter));
}
}
return c;
}
static int mbfl_filt_ident_big5(int c, mbfl_identify_filter *filter)
{
if (filter->status) { /* kanji second char */
if (c < 0x40 || (c > 0x7e && c < 0xa1) ||c > 0xfe) { /* bad */
filter->flag = 1;
}
filter->status = 0;
} else if (c >= 0 && c < 0x80) { /* latin ok */
;
} else if (c > 0xa0 && c < 0xff) { /* DBCS lead byte */
filter->status = 1;
} else { /* bad */
filter->flag = 1;
}
return c;
}

View file

@ -0,0 +1,43 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file: Rui Hirokawa <hirokawa@php.net>
*
*/
/*
* The source code included in this files was separated from mbfilter_tw.h
* by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
*
*/
#ifndef MBFL_MBFILTER_BIG5_H
#define MBFL_MBFILTER_BIG5_H
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_big5;
extern const struct mbfl_identify_vtbl vtbl_identify_big5;
extern const struct mbfl_convert_vtbl vtbl_big5_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_big5;
int mbfl_filt_conv_big5_wchar(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_wchar_big5(int c, mbfl_convert_filter *filter);
#endif /* MBFL_MBFILTER_BIG5_H */

View file

@ -0,0 +1,137 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* The source code included in this files was separated from mbfilter.c
* by Moriyoshi Koizumi <moriyoshi@php.net> on 4 Dec 2002. The file
* mbfilter.c is included in this package .
*
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "mbfilter.h"
#include "mbfilter_byte2.h"
const mbfl_encoding mbfl_encoding_byte2be = {
mbfl_no_encoding_byte2be,
"byte2be",
NULL,
NULL,
NULL,
MBFL_ENCTYPE_SBCS
};
const mbfl_encoding mbfl_encoding_byte2le = {
mbfl_no_encoding_byte2le,
"byte2le",
NULL,
NULL,
NULL,
MBFL_ENCTYPE_SBCS
};
const struct mbfl_convert_vtbl vtbl_byte2be_wchar = {
mbfl_no_encoding_byte2be,
mbfl_no_encoding_wchar,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_byte2be_wchar,
mbfl_filt_conv_common_flush
};
const struct mbfl_convert_vtbl vtbl_wchar_byte2be = {
mbfl_no_encoding_wchar,
mbfl_no_encoding_byte2be,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_wchar_byte2be,
mbfl_filt_conv_common_flush };
const struct mbfl_convert_vtbl vtbl_byte2le_wchar = {
mbfl_no_encoding_byte2le,
mbfl_no_encoding_wchar,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_byte2le_wchar,
mbfl_filt_conv_common_flush };
const struct mbfl_convert_vtbl vtbl_wchar_byte2le = {
mbfl_no_encoding_wchar,
mbfl_no_encoding_byte2le,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_wchar_byte2le,
mbfl_filt_conv_common_flush };
#define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
int mbfl_filt_conv_byte2be_wchar(int c, mbfl_convert_filter *filter)
{
int n;
if (filter->status == 0) {
filter->status = 1;
n = (c & 0xff) << 8;
filter->cache = n;
} else {
filter->status = 0;
n = (c & 0xff) | filter->cache;
CK((*filter->output_function)(n, filter->data));
}
return c;
}
int mbfl_filt_conv_wchar_byte2be(int c, mbfl_convert_filter *filter)
{
CK((*filter->output_function)((c >> 8) & 0xff, filter->data));
CK((*filter->output_function)(c & 0xff, filter->data));
return c;
}
int mbfl_filt_conv_byte2le_wchar(int c, mbfl_convert_filter *filter)
{
int n;
if (filter->status == 0) {
filter->status = 1;
n = c & 0xff;
filter->cache = n;
} else {
filter->status = 0;
n = ((c & 0xff) << 8) | filter->cache;
CK((*filter->output_function)(n, filter->data));
}
return c;
}
int mbfl_filt_conv_wchar_byte2le(int c, mbfl_convert_filter *filter)
{
CK((*filter->output_function)(c & 0xff, filter->data));
CK((*filter->output_function)((c >> 8) & 0xff, filter->data));
return c;
}

View file

@ -0,0 +1,48 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* The source code included in this files was separated from mbfilter.c
* by Moriyoshi Koizumi <moriyoshi@php.net> on 4 Dec 2002. The file
* mbfilter.c is included in this package .
*
*/
#ifndef MBFL_MBFILTER_BYTE2_H
#define MBFL_MBFILTER_BYTE2_H
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_byte2be;
extern const mbfl_encoding mbfl_encoding_byte2le;
extern const struct mbfl_convert_vtbl vtbl_byte2be_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_byte2be;
extern const struct mbfl_convert_vtbl vtbl_byte2le_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_byte2le;
int mbfl_filt_conv_wchar_byte2be(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_byte2be_wchar(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_wchar_byte2le(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_byte2le_wchar(int c, mbfl_convert_filter *filter);
#endif /* MBFL_MBFILTER_BYTE2_H */

View file

@ -0,0 +1,159 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* The source code included in this files was separated from mbfilter.c
* by Moriyoshi Koizumi <moriyoshi@php.net> on 4 Dec 2002. The file
* mbfilter.c is included in this package .
*
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "mbfilter.h"
#include "mbfilter_byte4.h"
const mbfl_encoding mbfl_encoding_byte4be = {
mbfl_no_encoding_byte4be,
"byte4be",
NULL,
NULL,
NULL,
MBFL_ENCTYPE_SBCS
};
const mbfl_encoding mbfl_encoding_byte4le = {
mbfl_no_encoding_byte4le,
"byte4le",
NULL,
NULL,
NULL,
MBFL_ENCTYPE_SBCS
};
const struct mbfl_convert_vtbl vtbl_byte4be_wchar = {
mbfl_no_encoding_byte4be,
mbfl_no_encoding_wchar,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_byte4be_wchar,
mbfl_filt_conv_common_flush
};
const struct mbfl_convert_vtbl vtbl_wchar_byte4be = {
mbfl_no_encoding_wchar,
mbfl_no_encoding_byte4be,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_wchar_byte4be,
mbfl_filt_conv_common_flush };
const struct mbfl_convert_vtbl vtbl_byte4le_wchar = {
mbfl_no_encoding_byte4le,
mbfl_no_encoding_wchar,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_byte4le_wchar,
mbfl_filt_conv_common_flush
};
const struct mbfl_convert_vtbl vtbl_wchar_byte4le = {
mbfl_no_encoding_wchar,
mbfl_no_encoding_byte4le,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_wchar_byte4le,
mbfl_filt_conv_common_flush
};
#define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
int mbfl_filt_conv_byte4be_wchar(int c, mbfl_convert_filter *filter)
{
int n;
if (filter->status == 0) {
filter->status = 1;
n = (c & 0xff) << 24;
filter->cache = n;
} else if (filter->status == 1) {
filter->status = 2;
n = (c & 0xff) << 16;
filter->cache |= n;
} else if (filter->status == 2) {
filter->status = 3;
n = (c & 0xff) << 8;
filter->cache |= n;
} else {
filter->status = 0;
n = (c & 0xff) | filter->cache;
CK((*filter->output_function)(n, filter->data));
}
return c;
}
int mbfl_filt_conv_wchar_byte4be(int c, mbfl_convert_filter *filter)
{
CK((*filter->output_function)((c >> 24) & 0xff, filter->data));
CK((*filter->output_function)((c >> 16) & 0xff, filter->data));
CK((*filter->output_function)((c >> 8) & 0xff, filter->data));
CK((*filter->output_function)(c & 0xff, filter->data));
return c;
}
int mbfl_filt_conv_byte4le_wchar(int c, mbfl_convert_filter *filter)
{
int n;
if (filter->status == 0) {
filter->status = 1;
n = (c & 0xff);
filter->cache = n;
} else if (filter->status == 1) {
filter->status = 2;
n = (c & 0xff) << 8;
filter->cache |= n;
} else if (filter->status == 2) {
filter->status = 3;
n = (c & 0xff) << 16;
filter->cache |= n;
} else {
filter->status = 0;
n = ((c & 0xff) << 24) | filter->cache;
CK((*filter->output_function)(n, filter->data));
}
return c;
}
int mbfl_filt_conv_wchar_byte4le(int c, mbfl_convert_filter *filter)
{
CK((*filter->output_function)(c & 0xff, filter->data));
CK((*filter->output_function)((c >> 8) & 0xff, filter->data));
CK((*filter->output_function)((c >> 16) & 0xff, filter->data));
CK((*filter->output_function)((c >> 24) & 0xff, filter->data));
return c;
}

View file

@ -0,0 +1,46 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* The source code included in this files was separated from mbfilter.c
* by Moriyoshi Koizumi <moriyoshi@php.net> on 4 Dec 2002. The file
* mbfilter.c is included in this package .
*
*/
#ifndef MBFL_MBFILTER_BYTE4_H
#define MBFL_MBFILTER_BYTE4_H
extern const mbfl_encoding mbfl_encoding_byte4be;
extern const mbfl_encoding mbfl_encoding_byte4le;
extern const struct mbfl_convert_vtbl vtbl_byte4be_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_byte4be;
extern const struct mbfl_convert_vtbl vtbl_byte4le_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_byte4le;
int mbfl_filt_conv_wchar_byte4be(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_byte4be_wchar(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_wchar_byte4le(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_byte4le_wchar(int c, mbfl_convert_filter *filter);
#endif /* MBFL_MBFILTER_BYTE4_H */

View file

@ -0,0 +1,152 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this part: Den V. Tsopa <tdv@edisoft.ru>
*
*/
/*
* The source code included in this files was separated from mbfilter_ru.c
* by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
*
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "mbfilter.h"
#include "mbfilter_cp1251.h"
#include "unicode_table_cp1251.h"
static int mbfl_filt_ident_cp1251(int c, mbfl_identify_filter *filter);
static const char *mbfl_encoding_cp1251_aliases[] = {"CP1251", "CP-1251", "WINDOWS-1251", NULL};
const mbfl_encoding mbfl_encoding_cp1251 = {
mbfl_no_encoding_cp1251,
"Windows-1251",
"Windows-1251",
(const char *(*)[])&mbfl_encoding_cp1251_aliases,
NULL,
MBFL_ENCTYPE_SBCS
};
const struct mbfl_identify_vtbl vtbl_identify_cp1251 = {
mbfl_no_encoding_cp1251,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_common_dtor,
mbfl_filt_ident_cp1251
};
const struct mbfl_convert_vtbl vtbl_wchar_cp1251 = {
mbfl_no_encoding_wchar,
mbfl_no_encoding_cp1251,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_wchar_cp1251,
mbfl_filt_conv_common_flush
};
const struct mbfl_convert_vtbl vtbl_cp1251_wchar = {
mbfl_no_encoding_cp1251,
mbfl_no_encoding_wchar,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_cp1251_wchar,
mbfl_filt_conv_common_flush
};
#define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
/*
* cp1251 => wchar
*/
int
mbfl_filt_conv_cp1251_wchar(int c, mbfl_convert_filter *filter)
{
int s;
if (c >= 0 && c < cp1251_ucs_table_min) {
s = c;
} else if (c >= cp1251_ucs_table_min && c < 0x100) {
s = cp1251_ucs_table[c - cp1251_ucs_table_min];
if (s <= 0) {
s = c;
s &= MBFL_WCSPLANE_MASK;
s |= MBFL_WCSPLANE_CP1251;
}
} else {
s = c;
s &= MBFL_WCSGROUP_MASK;
s |= MBFL_WCSGROUP_THROUGH;
}
CK((*filter->output_function)(s, filter->data));
return c;
}
/*
* wchar => cp1251
*/
int
mbfl_filt_conv_wchar_cp1251(int c, mbfl_convert_filter *filter)
{
int s, n;
if (c < 0x80) {
s = c;
} else {
s = -1;
n = cp1251_ucs_table_len-1;
while (n >= 0) {
if (c == cp1251_ucs_table[n]) {
s = cp1251_ucs_table_min + n;
break;
}
n--;
}
if (s <= 0 && (c & ~MBFL_WCSPLANE_MASK) == MBFL_WCSPLANE_CP1251) {
s = c & MBFL_WCSPLANE_MASK;
}
}
if (s >= 0) {
CK((*filter->output_function)(s, filter->data));
} else {
if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
CK(mbfl_filt_conv_illegal_output(c, filter));
}
}
return c;
}
/* all of this is so ugly now! */
static int mbfl_filt_ident_cp1251(int c, mbfl_identify_filter *filter)
{
if (c >= 0x80 && c < 0xff)
filter->flag = 0;
else
filter->flag = 1; /* not it */
return c;
}

View file

@ -0,0 +1,44 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this part: Den V. Tsopa <tdv@edisoft.ru>
*
*/
/*
* the source code included in this files was separated from mbfilter_ru.h
* by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
*
*/
#ifndef MBFL_MBFILTER_CP1251_H
#define MBFL_MBFILTER_CP1251_H
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_cp1251;
extern const struct mbfl_identify_vtbl vtbl_identify_cp1251;
extern const struct mbfl_convert_vtbl vtbl_wchar_cp1251;
extern const struct mbfl_convert_vtbl vtbl_cp1251_wchar;
int mbfl_filt_conv_wchar_cp1251(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_cp1251_wchar(int c, mbfl_convert_filter *filter);
#endif /* MBFL_MBFILTER_CP1251_H */

View file

@ -0,0 +1,147 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this part: Wez Furlong <wez@thebrainroom.com>
*
*/
/*
* the source code included in this files was separated from mbfilter.c
* by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
*
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "mbfilter.h"
#include "mbfilter_cp1252.h"
#include "unicode_table_cp1252.h"
static int mbfl_filt_ident_cp1252(int c, mbfl_identify_filter *filter);
static const char *mbfl_encoding_cp1252_aliases[] = {"cp1252", NULL};
const mbfl_encoding mbfl_encoding_cp1252 = {
mbfl_no_encoding_cp1252,
"Windows-1252",
"Windows-1252",
(const char *(*)[])&mbfl_encoding_cp1252_aliases,
NULL,
MBFL_ENCTYPE_SBCS
};
const struct mbfl_identify_vtbl vtbl_identify_cp1252 = {
mbfl_no_encoding_cp1252,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_common_dtor,
mbfl_filt_ident_cp1252
};
const struct mbfl_convert_vtbl vtbl_cp1252_wchar = {
mbfl_no_encoding_cp1252,
mbfl_no_encoding_wchar,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_cp1252_wchar,
mbfl_filt_conv_common_flush
};
const struct mbfl_convert_vtbl vtbl_wchar_cp1252 = {
mbfl_no_encoding_wchar,
mbfl_no_encoding_cp1252,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_wchar_cp1252,
mbfl_filt_conv_common_flush
};
#define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
/*
* wchar => cp1252
*/
int mbfl_filt_conv_wchar_cp1252(int c, mbfl_convert_filter *filter)
{
int s=-1, n;
if (c >= 0x100) {
/* look it up from the cp1252 table */
s = -1;
n = 31;
while (n >= 0) {
if (c == cp1252_ucs_table[n] && c != 0xfffe) {
s = 0x80 + n;
break;
}
n--;
}
if (s <= 0 && (c & ~MBFL_WCSPLANE_MASK) == MBFL_WCSPLANE_8859_1)
{
s = c & MBFL_WCSPLANE_MASK;
}
}
else if (c >= 0 && c < 0x100) {
s = c;
}
if (s >= 0) {
CK((*filter->output_function)(s, filter->data));
} else {
if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
CK(mbfl_filt_conv_illegal_output(c, filter));
}
}
return c;
}
/*
* cp1252 => wchar
*/
int mbfl_filt_conv_cp1252_wchar(int c, mbfl_convert_filter *filter)
{
int s;
if (c >= 0x80 && c < 0xa0) {
s = cp1252_ucs_table[c - 0x80];
} else {
s = c;
}
CK((*filter->output_function)(s, filter->data));
return c;
}
/* We only distinguish the MS extensions to ISO-8859-1.
* Actually, this is pretty much a NO-OP, since the identification
* system doesn't allow us to discriminate between a positive match,
* a possible match and a definite non-match.
* The problem here is that cp1252 looks like SJIS for certain chars.
* */
static int mbfl_filt_ident_cp1252(int c, mbfl_identify_filter *filter)
{
if (c >= 0x80 && c < 0xa0)
filter->flag = 0;
else
filter->flag = 1; /* not it */
return c;
}

View file

@ -0,0 +1,43 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this part: Wez Furlong <wez@thebrainroom.com>
*
*/
/*
* the source code included in this files was separated from mbfilter.c
* by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
*
*/
#ifndef MBFL_MBFILTER_CP1252_H
#define MBFL_MBFILTER_CP1252_H
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_cp1252;
extern const struct mbfl_identify_vtbl vtbl_identify_cp1252;
extern const struct mbfl_convert_vtbl vtbl_cp1252_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_cp1252;
int mbfl_filt_conv_wchar_cp1252(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_cp1252_wchar(int c, mbfl_convert_filter *filter);
#endif /* MBFL_MBFILTER_CP1252_H */

View file

@ -0,0 +1,151 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this part: Den V. Tsopa <tdv@edisoft.ru>
*
*/
/*
* The source code included in this files was separated from mbfilter_ru.c
* by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
*
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "mbfilter.h"
#include "mbfilter_cp866.h"
#include "unicode_table_cp866.h"
static int mbfl_filt_ident_cp866(int c, mbfl_identify_filter *filter);
static const char *mbfl_encoding_cp866_aliases[] = {"CP866", "CP-866", "IBM-866", NULL};
const mbfl_encoding mbfl_encoding_cp866 = {
mbfl_no_encoding_cp866,
"CP866",
"CP866",
(const char *(*)[])&mbfl_encoding_cp866_aliases,
NULL,
MBFL_ENCTYPE_SBCS
};
const struct mbfl_identify_vtbl vtbl_identify_cp866 = {
mbfl_no_encoding_cp866,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_common_dtor,
mbfl_filt_ident_cp866
};
const struct mbfl_convert_vtbl vtbl_wchar_cp866 = {
mbfl_no_encoding_wchar,
mbfl_no_encoding_cp866,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_wchar_cp866,
mbfl_filt_conv_common_flush
};
const struct mbfl_convert_vtbl vtbl_cp866_wchar = {
mbfl_no_encoding_cp866,
mbfl_no_encoding_wchar,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_cp866_wchar,
mbfl_filt_conv_common_flush
};
#define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
/*
* cp866 => wchar
*/
int
mbfl_filt_conv_cp866_wchar(int c, mbfl_convert_filter *filter)
{
int s;
if (c >= 0 && c < cp866_ucs_table_min) {
s = c;
} else if (c >= cp866_ucs_table_min && c < 0x100) {
s = cp866_ucs_table[c - cp866_ucs_table_min];
if (s <= 0) {
s = c;
s &= MBFL_WCSPLANE_MASK;
s |= MBFL_WCSPLANE_CP866;
}
} else {
s = c;
s &= MBFL_WCSGROUP_MASK;
s |= MBFL_WCSGROUP_THROUGH;
}
CK((*filter->output_function)(s, filter->data));
return c;
}
/*
* wchar => cp866
*/
int
mbfl_filt_conv_wchar_cp866(int c, mbfl_convert_filter *filter)
{
int s, n;
if (c < 0x80) {
s = c;
} else {
s = -1;
n = cp866_ucs_table_len-1;
while (n >= 0) {
if (c == cp866_ucs_table[n]) {
s = cp866_ucs_table_min + n;
break;
}
n--;
}
if (s <= 0 && (c & ~MBFL_WCSPLANE_MASK) == MBFL_WCSPLANE_CP866) {
s = c & MBFL_WCSPLANE_MASK;
}
}
if (s >= 0) {
CK((*filter->output_function)(s, filter->data));
} else {
if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
CK(mbfl_filt_conv_illegal_output(c, filter));
}
}
return c;
}
static int mbfl_filt_ident_cp866(int c, mbfl_identify_filter *filter)
{
if (c >= 0x80 && c < 0xff)
filter->flag = 0;
else
filter->flag = 1; /* not it */
return c;
}

View file

@ -0,0 +1,41 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this part: Den V. Tsopa <tdv@edisoft.ru>
*
*/
/*
* The source code included in this files was separated from mbfilter_ru.h
* by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
*
*/
#ifndef MBFL_MBFILTER_CP866_H
#define MBFL_MBFILTER_CP866_H
extern const mbfl_encoding mbfl_encoding_cp866;
extern const struct mbfl_identify_vtbl vtbl_identify_cp866;
extern const struct mbfl_convert_vtbl vtbl_wchar_cp866;
extern const struct mbfl_convert_vtbl vtbl_cp866_wchar;
int mbfl_filt_conv_cp866_wchar(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_wchar_cp866(int c, mbfl_convert_filter *filter);
#endif /* MBFL_MBFILTER_CP866_H */

View file

@ -0,0 +1,350 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* the source code included in this files was separated from mbfilter_ja.c
* by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
*
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "mbfilter.h"
#include "mbfilter_cp932.h"
#include "unicode_table_cp932_ext.h"
#include "unicode_table_jis.h"
static int mbfl_filt_ident_sjiswin(int c, mbfl_identify_filter *filter);
static const unsigned char mblen_table_sjis[] = { /* 0x80-0x9f,0xE0-0xFF */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
};
static const char *mbfl_encoding_sjis_win_aliases[] = {"SJIS-open", "CP932", "Windows-31J", "MS_Kanji", NULL};
const mbfl_encoding mbfl_encoding_sjis_win = {
mbfl_no_encoding_sjis_win,
"SJIS-win",
"Shift_JIS",
(const char *(*)[])&mbfl_encoding_sjis_win_aliases,
mblen_table_sjis,
MBFL_ENCTYPE_MBCS
};
const struct mbfl_identify_vtbl vtbl_identify_sjiswin = {
mbfl_no_encoding_sjis_win,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_common_dtor,
mbfl_filt_ident_sjiswin
};
const struct mbfl_convert_vtbl vtbl_sjiswin_wchar = {
mbfl_no_encoding_sjis_win,
mbfl_no_encoding_wchar,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_sjiswin_wchar,
mbfl_filt_conv_common_flush
};
const struct mbfl_convert_vtbl vtbl_wchar_sjiswin = {
mbfl_no_encoding_wchar,
mbfl_no_encoding_sjis_win,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_wchar_sjiswin,
mbfl_filt_conv_common_flush
};
#define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
#define SJIS_ENCODE(c1,c2,s1,s2) \
do { \
s1 = c1; \
s1--; \
s1 >>= 1; \
if ((c1) < 0x5f) { \
s1 += 0x71; \
} else { \
s1 += 0xb1; \
} \
s2 = c2; \
if ((c1) & 1) { \
if ((c2) < 0x60) { \
s2--; \
} \
s2 += 0x20; \
} else { \
s2 += 0x7e; \
} \
} while (0)
#define SJIS_DECODE(c1,c2,s1,s2) \
do { \
s1 = c1; \
if (s1 < 0xa0) { \
s1 -= 0x81; \
} else { \
s1 -= 0xc1; \
} \
s1 <<= 1; \
s1 += 0x21; \
s2 = c2; \
if (s2 < 0x9f) { \
if (s2 < 0x7f) { \
s2++; \
} \
s2 -= 0x20; \
} else { \
s1++; \
s2 -= 0x7e; \
} \
} while (0)
/*
* SJIS-win => wchar
*/
int
mbfl_filt_conv_sjiswin_wchar(int c, mbfl_convert_filter *filter)
{
int c1, s, s1, s2, w;
switch (filter->status) {
case 0:
if (c >= 0 && c < 0x80) { /* latin */
CK((*filter->output_function)(c, filter->data));
} else if (c > 0xa0 && c < 0xe0) { /* kana */
CK((*filter->output_function)(0xfec0 + c, filter->data));
} else if (c > 0x80 && c < 0xfd && c != 0xa0) { /* kanji first char */
filter->status = 1;
filter->cache = c;
} else {
w = c & MBFL_WCSGROUP_MASK;
w |= MBFL_WCSGROUP_THROUGH;
CK((*filter->output_function)(w, filter->data));
}
break;
case 1: /* kanji second char */
filter->status = 0;
c1 = filter->cache;
if (c > 0x39 && c < 0xfd && c != 0x7f) {
w = 0;
SJIS_DECODE(c1, c, s1, s2);
s = (s1 - 0x21)*94 + s2 - 0x21;
if (s <= 137) {
if (s == 31) {
w = 0xff3c; /* FULLWIDTH REVERSE SOLIDUS */
} else if (s == 32) {
w = 0xff5e; /* FULLWIDTH TILDE */
} else if (s == 33) {
w = 0x2225; /* PARALLEL TO */
} else if (s == 60) {
w = 0xff0d; /* FULLWIDTH HYPHEN-MINUS */
} else if (s == 80) {
w = 0xffe0; /* FULLWIDTH CENT SIGN */
} else if (s == 81) {
w = 0xffe1; /* FULLWIDTH POUND SIGN */
} else if (s == 137) {
w = 0xffe2; /* FULLWIDTH NOT SIGN */
}
}
if (w == 0) {
if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) { /* vendor ext1 (13ku) */
w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min];
} else if (s >= 0 && s < jisx0208_ucs_table_size) { /* X 0208 */
w = jisx0208_ucs_table[s];
} else if (s >= cp932ext2_ucs_table_min && s < cp932ext2_ucs_table_max) { /* vendor ext2 (89ku - 92ku) */
w = cp932ext2_ucs_table[s - cp932ext2_ucs_table_min];
} else if (s >= cp932ext3_ucs_table_min && s < cp932ext3_ucs_table_max) { /* vendor ext3 (115ku - 119ku) */
w = cp932ext3_ucs_table[s - cp932ext3_ucs_table_min];
} else if (s >= (94*94) && s < (114*94)) { /* user (95ku - 114ku) */
w = s - (94*94) + 0xe000;
}
}
if (w <= 0) {
w = (s1 << 8) | s2;
w &= MBFL_WCSPLANE_MASK;
w |= MBFL_WCSPLANE_WINCP932;
}
CK((*filter->output_function)(w, filter->data));
} else if ((c >= 0 && c < 0x21) || c == 0x7f) { /* CTLs */
CK((*filter->output_function)(c, filter->data));
} else {
w = (c1 << 8) | c;
w &= MBFL_WCSGROUP_MASK;
w |= MBFL_WCSGROUP_THROUGH;
CK((*filter->output_function)(w, filter->data));
}
break;
default:
filter->status = 0;
break;
}
return c;
}
/*
* wchar => SJIS-win
*/
int
mbfl_filt_conv_wchar_sjiswin(int c, mbfl_convert_filter *filter)
{
int c1, c2, s1, s2;
s1 = 0;
s2 = 0;
if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) {
s1 = ucs_a1_jis_table[c - ucs_a1_jis_table_min];
} else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) {
s1 = ucs_a2_jis_table[c - ucs_a2_jis_table_min];
} else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) {
s1 = ucs_i_jis_table[c - ucs_i_jis_table_min];
} else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) {
s1 = ucs_r_jis_table[c - ucs_r_jis_table_min];
} else if (c >= 0xe000 && c < (0xe000 + 20*94)) { /* user (95ku - 114ku) */
s1 = c - 0xe000;
c1 = s1/94 + 0x7f;
c2 = s1%94 + 0x21;
s1 = (c1 << 8) | c2;
s2 = 1;
}
if (s1 <= 0) {
c1 = c & ~MBFL_WCSPLANE_MASK;
if (c1 == MBFL_WCSPLANE_WINCP932) {
s1 = c & MBFL_WCSPLANE_MASK;
s2 = 1;
} else if (c1 == MBFL_WCSPLANE_JIS0208) {
s1 = c & MBFL_WCSPLANE_MASK;
} else if (c1 == MBFL_WCSPLANE_JIS0212) {
s1 = c & MBFL_WCSPLANE_MASK;
s1 |= 0x8080;
} else if (c == 0xa5) { /* YEN SIGN */
s1 = 0x216f; /* FULLWIDTH YEN SIGN */
} else if (c == 0x203e) { /* OVER LINE */
s1 = 0x2131; /* FULLWIDTH MACRON */
} else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */
s1 = 0x2140;
} else if (c == 0xff5e) { /* FULLWIDTH TILDE */
s1 = 0x2141;
} else if (c == 0x2225) { /* PARALLEL TO */
s1 = 0x2142;
} else if (c == 0xff0d) { /* FULLWIDTH HYPHEN-MINUS */
s1 = 0x215d;
} else if (c == 0xffe0) { /* FULLWIDTH CENT SIGN */
s1 = 0x2171;
} else if (c == 0xffe1) { /* FULLWIDTH POUND SIGN */
s1 = 0x2172;
} else if (c == 0xffe2) { /* FULLWIDTH NOT SIGN */
s1 = 0x224c;
}
}
if ((s1 <= 0) || (s1 >= 0x8080 && s2 == 0)) { /* not found or X 0212 */
s1 = -1;
c1 = 0;
c2 = cp932ext1_ucs_table_max - cp932ext1_ucs_table_min;
while (c1 < c2) { /* CP932 vendor ext1 (13ku) */
if (c == cp932ext1_ucs_table[c1]) {
s1 = ((c1/94 + 0x2d) << 8) + (c1%94 + 0x21);
break;
}
c1++;
}
if (s1 <= 0) {
c1 = 0;
c2 = cp932ext3_ucs_table_max - cp932ext3_ucs_table_min;
while (c1 < c2) { /* CP932 vendor ext3 (115ku - 119ku) */
if (c == cp932ext3_ucs_table[c1]) {
s1 = ((c1/94 + 0x93) << 8) + (c1%94 + 0x21);
break;
}
c1++;
}
}
if (c == 0) {
s1 = 0;
} else if (s1 <= 0) {
s1 = -1;
}
}
if (s1 >= 0) {
if (s1 < 0x100) { /* latin or kana */
CK((*filter->output_function)(s1, filter->data));
} else { /* kanji */
c1 = (s1 >> 8) & 0xff;
c2 = s1 & 0xff;
SJIS_ENCODE(c1, c2, s1, s2);
CK((*filter->output_function)(s1, filter->data));
CK((*filter->output_function)(s2, filter->data));
}
} else {
if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
CK(mbfl_filt_conv_illegal_output(c, filter));
}
}
return c;
}
static int mbfl_filt_ident_sjiswin(int c, mbfl_identify_filter *filter)
{
if (filter->status) { /* kanji second char */
if (c < 0x40 || c > 0xfc || c == 0x7f) { /* bad */
filter->flag = 1;
}
filter->status = 0;
} else if (c >= 0 && c < 0x80) { /* latin ok */
;
} else if (c > 0xa0 && c < 0xe0) { /* kana ok */
;
} else if (c > 0x80 && c < 0xfd && c != 0xa0) { /* kanji first char */
filter->status = 1;
} else { /* bad */
filter->flag = 1;
}
return c;
}

View file

@ -0,0 +1,43 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* the source code included in this files was separated from mbfilter_ja.c
* by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
*
*/
#ifndef MBFL_MBFILTER_CP932_H
#define MBFL_MBFILTER_CP932_H
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_sjis_win;
extern const struct mbfl_identify_vtbl vtbl_identify_sjiswin;
extern const struct mbfl_convert_vtbl vtbl_sjiswin_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_sjiswin;
int mbfl_filt_conv_sjiswin_wchar(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_wchar_sjiswin(int c, mbfl_convert_filter *filter);
#endif /* MBFL_MBFILTER_CP932_H */

View file

@ -0,0 +1,228 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* the source code included in this files was separated from mbfilter_cn.c
* by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
*
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "mbfilter.h"
#include "mbfilter_cp936.h"
#include "unicode_table_cp936.h"
static int mbfl_filt_ident_cp936(int c, mbfl_identify_filter *filter);
static const unsigned char mblen_table_cp936[] = { /* 0x81-0xFE */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
};
static const char *mbfl_encoding_cp936_aliases[] = {"CP-936", "GBK", NULL};
const mbfl_encoding mbfl_encoding_cp936 = {
mbfl_no_encoding_cp936,
"CP936",
"CP936",
(const char *(*)[])&mbfl_encoding_cp936_aliases,
mblen_table_cp936,
MBFL_ENCTYPE_MBCS
};
const struct mbfl_identify_vtbl vtbl_identify_cp936 = {
mbfl_no_encoding_cp936,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_common_dtor,
mbfl_filt_ident_cp936
};
const struct mbfl_convert_vtbl vtbl_cp936_wchar = {
mbfl_no_encoding_cp936,
mbfl_no_encoding_wchar,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_cp936_wchar,
mbfl_filt_conv_common_flush
};
const struct mbfl_convert_vtbl vtbl_wchar_cp936 = {
mbfl_no_encoding_wchar,
mbfl_no_encoding_cp936,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_wchar_cp936,
mbfl_filt_conv_common_flush
};
#define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
/*
* CP936 => wchar
*/
int
mbfl_filt_conv_cp936_wchar(int c, mbfl_convert_filter *filter)
{
int c1, w;
switch (filter->status) {
case 0:
if (c >= 0 && c < 0x80) { /* latin */
CK((*filter->output_function)(c, filter->data));
} else if (c == 0x80) { /* euro sign */
CK((*filter->output_function)(0x20ac, filter->data));
} else if (c > 0x80 && c < 0xff) { /* dbcs lead byte */
filter->status = 1;
filter->cache = c;
} else {
w = c & MBFL_WCSGROUP_MASK;
w |= MBFL_WCSGROUP_THROUGH;
CK((*filter->output_function)(w, filter->data));
}
break;
case 1: /* dbcs second byte */
filter->status = 0;
c1 = filter->cache;
if ( c1 < 0xff && c1 > 0x80 && c > 0x39 && c < 0xff && c != 0x7f) {
w = (c1 - 0x81)*192 + (c - 0x40);
if (w >= 0 && w < cp936_ucs_table_size) {
w = cp936_ucs_table[w];
} else {
w = 0;
}
if (w <= 0) {
w = (c1 << 8) | c;
w &= MBFL_WCSPLANE_MASK;
w |= MBFL_WCSPLANE_WINCP936;
}
CK((*filter->output_function)(w, filter->data));
} else if ((c >= 0 && c < 0x21) || c == 0x7f) { /* CTLs */
CK((*filter->output_function)(c, filter->data));
} else {
w = (c1 << 8) | c;
w &= MBFL_WCSGROUP_MASK;
w |= MBFL_WCSGROUP_THROUGH;
CK((*filter->output_function)(w, filter->data));
}
break;
default:
filter->status = 0;
break;
}
return c;
}
/*
* wchar => CP936
*/
int
mbfl_filt_conv_wchar_cp936(int c, mbfl_convert_filter *filter)
{
int c1, s;
s = 0;
if (c >= ucs_a1_cp936_table_min && c < ucs_a1_cp936_table_max) {
s = ucs_a1_cp936_table[c - ucs_a1_cp936_table_min];
} else if (c >= ucs_a2_cp936_table_min && c < ucs_a2_cp936_table_max) {
s = ucs_a2_cp936_table[c - ucs_a2_cp936_table_min];
} else if (c >= ucs_a3_cp936_table_min && c < ucs_a3_cp936_table_max) {
s = ucs_a3_cp936_table[c - ucs_a3_cp936_table_min];
} else if (c >= ucs_i_cp936_table_min && c < ucs_i_cp936_table_max) {
s = ucs_i_cp936_table[c - ucs_i_cp936_table_min];
} else if (c >= ucs_ci_cp936_table_min && c < ucs_ci_cp936_table_max) {
s = ucs_ci_cp936_table[c - ucs_ci_cp936_table_min];
} else if (c >= ucs_cf_cp936_table_min && c < ucs_cf_cp936_table_max) {
s = ucs_cf_cp936_table[c - ucs_cf_cp936_table_min];
} else if (c >= ucs_sfv_cp936_table_min && c < ucs_sfv_cp936_table_max) {
s = ucs_sfv_cp936_table[c - ucs_sfv_cp936_table_min];
} else if (c >= ucs_hff_cp936_table_min && c < ucs_hff_cp936_table_max) {
s = ucs_hff_cp936_table[c - ucs_hff_cp936_table_min];
}
if (s <= 0) {
c1 = c & ~MBFL_WCSPLANE_MASK;
if (c1 == MBFL_WCSPLANE_WINCP936) {
s = c & MBFL_WCSPLANE_MASK;
}
if (c == 0) {
s = 0;
} else if (s <= 0) {
s = -1;
}
}
if (s >= 0) {
if (s < 0x80) { /* latin */
CK((*filter->output_function)(s, filter->data));
} else {
CK((*filter->output_function)((s >> 8) & 0xff, filter->data));
CK((*filter->output_function)(s & 0xff, filter->data));
}
} else {
if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
CK(mbfl_filt_conv_illegal_output(c, filter));
}
}
return c;
}
static int mbfl_filt_ident_cp936(int c, mbfl_identify_filter *filter)
{
if (filter->status) { /* kanji second char */
if (c < 0x40 || c > 0xfe || c == 0x7f) { /* bad */
filter->flag = 1;
}
filter->status = 0;
} else if (c >= 0 && c < 0x80) { /* latin ok */
;
} else if (c > 0x80 && c < 0xff) { /* DBCS lead byte */
filter->status = 1;
} else { /* bad */
filter->flag = 1;
}
return c;
}

View file

@ -0,0 +1,43 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* the source code included in this files was separated from mbfilter_cn.h
* by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
*
*/
#ifndef MBFL_MBFILTER_CP936_H
#define MBFL_MBFILTER_CP936_H
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_cp936;
extern const struct mbfl_identify_vtbl vtbl_identify_cp936;
extern const struct mbfl_convert_vtbl vtbl_cp936_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_cp936;
int mbfl_filt_conv_cp936_wchar(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_wchar_cp936(int c, mbfl_convert_filter *filter);
#endif /* MBFL_MBFILTER_CP936_H */

View file

@ -0,0 +1,236 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* The source code included in this files was separated from mbfilter_cn.c
* by Moriyoshi Koizumi <moriyoshi@php.net> on 4 Dec 2002.
*
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "mbfilter.h"
#include "mbfilter_euc_cn.h"
#include "unicode_table_cp936.h"
static int mbfl_filt_ident_euccn(int c, mbfl_identify_filter *filter);
static const unsigned char mblen_table_euccn[] = { /* 0xA1-0xFE */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
};
static const char *mbfl_encoding_euc_cn_aliases[] = {"CN-GB", "EUC_CN", "eucCN", "x-euc-cn", "gb2312", NULL};
const mbfl_encoding mbfl_encoding_euc_cn = {
mbfl_no_encoding_euc_cn,
"EUC-CN",
"CN-GB",
(const char *(*)[])&mbfl_encoding_euc_cn_aliases,
mblen_table_euccn,
MBFL_ENCTYPE_MBCS
};
const struct mbfl_identify_vtbl vtbl_identify_euccn = {
mbfl_no_encoding_euc_cn,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_common_dtor,
mbfl_filt_ident_euccn
};
const struct mbfl_convert_vtbl vtbl_euccn_wchar = {
mbfl_no_encoding_euc_cn,
mbfl_no_encoding_wchar,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_euccn_wchar,
mbfl_filt_conv_common_flush
};
const struct mbfl_convert_vtbl vtbl_wchar_euccn = {
mbfl_no_encoding_wchar,
mbfl_no_encoding_euc_cn,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_wchar_euccn,
mbfl_filt_conv_common_flush
};
#define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
/*
* EUC-CN => wchar
*/
int
mbfl_filt_conv_euccn_wchar(int c, mbfl_convert_filter *filter)
{
int c1, w;
switch (filter->status) {
case 0:
if (c >= 0 && c < 0x80) { /* latin */
CK((*filter->output_function)(c, filter->data));
} else if (c > 0xa0 && c < 0xff) { /* dbcs lead byte */
filter->status = 1;
filter->cache = c;
} else {
w = c & MBFL_WCSGROUP_MASK;
w |= MBFL_WCSGROUP_THROUGH;
CK((*filter->output_function)(w, filter->data));
}
break;
case 1: /* dbcs second byte */
filter->status = 0;
c1 = filter->cache;
if (c1 > 0xa0 && c1 < 0xff && c > 0xa0 && c < 0xff) {
w = (c1 - 0x81)*192 + (c - 0x40);
if (w >= 0 && w < cp936_ucs_table_size) {
w = cp936_ucs_table[w];
} else {
w = 0;
}
if (w <= 0) {
w = (c1 << 8) | c;
w &= MBFL_WCSPLANE_MASK;
w |= MBFL_WCSPLANE_GB2312;
}
CK((*filter->output_function)(w, filter->data));
} else if ((c >= 0 && c < 0x21) || c == 0x7f) { /* CTLs */
CK((*filter->output_function)(c, filter->data));
} else {
w = (c1 << 8) | c;
w &= MBFL_WCSGROUP_MASK;
w |= MBFL_WCSGROUP_THROUGH;
CK((*filter->output_function)(w, filter->data));
}
break;
default:
filter->status = 0;
break;
}
return c;
}
/*
* wchar => EUC-CN
*/
int
mbfl_filt_conv_wchar_euccn(int c, mbfl_convert_filter *filter)
{
int c1, c2, s;
s = 0;
if (c >= ucs_a1_cp936_table_min && c < ucs_a1_cp936_table_max) {
s = ucs_a1_cp936_table[c - ucs_a1_cp936_table_min];
} else if (c >= ucs_a2_cp936_table_min && c < ucs_a2_cp936_table_max) {
s = ucs_a2_cp936_table[c - ucs_a2_cp936_table_min];
} else if (c >= ucs_a3_cp936_table_min && c < ucs_a3_cp936_table_max) {
s = ucs_a3_cp936_table[c - ucs_a3_cp936_table_min];
} else if (c >= ucs_i_cp936_table_min && c < ucs_i_cp936_table_max) {
s = ucs_i_cp936_table[c - ucs_i_cp936_table_min];
} else if (c >= ucs_hff_cp936_table_min && c < ucs_hff_cp936_table_max) {
s = ucs_hff_cp936_table[c - ucs_hff_cp936_table_min];
}
c1 = (s >> 8) & 0xff;
c2 = s & 0xff;
if (c1 < 0xa1 || c2 < 0xa1) { /* exclude CP936 extension */
s = c;
}
if (s <= 0) {
c1 = c & ~MBFL_WCSPLANE_MASK;
if (c1 == MBFL_WCSPLANE_GB2312) {
s = c & MBFL_WCSPLANE_MASK;
}
if (c == 0) {
s = 0;
} else if (s <= 0) {
s = -1;
}
}
if (s >= 0) {
if (s < 0x80) { /* latin */
CK((*filter->output_function)(s, filter->data));
} else {
CK((*filter->output_function)((s >> 8) & 0xff, filter->data));
CK((*filter->output_function)(s & 0xff, filter->data));
}
} else {
if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
CK(mbfl_filt_conv_illegal_output(c, filter));
}
}
return c;
}
static int mbfl_filt_ident_euccn(int c, mbfl_identify_filter *filter)
{
switch (filter->status) {
case 0: /* latin */
if (c >= 0 && c < 0x80) { /* ok */
;
} else if (c > 0xa0 && c < 0xff) { /* DBCS lead byte */
filter->status = 1;
} else { /* bad */
filter->flag = 1;
}
break;
case 1: /* got lead byte */
if (c < 0xa1 || c > 0xfe) { /* bad */
filter->flag = 1;
}
filter->status = 0;
break;
default:
filter->status = 0;
break;
}
return c;
}

View file

@ -0,0 +1,43 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* The source code included in this files was separated from mbfilter_euc_cn.h
* by Moriyoshi Koizumi <moriyoshi@php.net> on 4 Dec 2002.
*
*/
#ifndef MBFL_MBFILTER_EUC_CN_H
#define MBFL_MBFILTER_EUC_CN_H
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_euc_cn;
extern const struct mbfl_identify_vtbl vtbl_identify_euccn;
extern const struct mbfl_convert_vtbl vtbl_euccn_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_euccn;
int mbfl_filt_conv_euccn_wchar(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_wchar_euccn(int c, mbfl_convert_filter *filter);
#endif /* MBFL_MBFILTER_EUC_CN_H */

View file

@ -0,0 +1,331 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* The source code included in this files was separated from mbfilter_ja.c
* by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
*
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "mbfilter.h"
#include "mbfilter_euc_jp.h"
#include "unicode_table_cp932_ext.h"
#include "unicode_table_jis.h"
static int mbfl_filt_ident_eucjp(int c, mbfl_identify_filter *filter);
static const unsigned char mblen_table_eucjp[] = { /* 0xA1-0xFE */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
};
static const char *mbfl_encoding_euc_jp_aliases[] = {"EUC", "EUC_JP", "eucJP", "x-euc-jp", NULL};
const mbfl_encoding mbfl_encoding_euc_jp = {
mbfl_no_encoding_euc_jp,
"EUC-JP",
"EUC-JP",
(const char *(*)[])&mbfl_encoding_euc_jp_aliases,
mblen_table_eucjp,
MBFL_ENCTYPE_MBCS
};
const struct mbfl_identify_vtbl vtbl_identify_eucjp = {
mbfl_no_encoding_euc_jp,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_common_dtor,
mbfl_filt_ident_eucjp
};
const struct mbfl_convert_vtbl vtbl_eucjp_wchar = {
mbfl_no_encoding_euc_jp,
mbfl_no_encoding_wchar,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_eucjp_wchar,
mbfl_filt_conv_common_flush
};
const struct mbfl_convert_vtbl vtbl_wchar_eucjp = {
mbfl_no_encoding_wchar,
mbfl_no_encoding_euc_jp,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_wchar_eucjp,
mbfl_filt_conv_common_flush
};
#define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
/*
* EUC-JP => wchar
*/
int
mbfl_filt_conv_eucjp_wchar(int c, mbfl_convert_filter *filter)
{
int c1, s, w;
switch (filter->status) {
case 0:
if (c >= 0 && c < 0x80) { /* latin */
CK((*filter->output_function)(c, filter->data));
} else if (c > 0xa0 && c < 0xff) { /* X 0208 first char */
filter->status = 1;
filter->cache = c;
} else if (c == 0x8e) { /* kana first char */
filter->status = 2;
} else if (c == 0x8f) { /* X 0212 first char */
filter->status = 3;
} else {
w = c & MBFL_WCSGROUP_MASK;
w |= MBFL_WCSGROUP_THROUGH;
CK((*filter->output_function)(w, filter->data));
}
break;
case 1: /* got first half */
filter->status = 0;
c1 = filter->cache;
if (c > 0xa0 && c < 0xff) {
s = (c1 - 0xa1)*94 + c - 0xa1;
if (s >= 0 && s < jisx0208_ucs_table_size) {
w = jisx0208_ucs_table[s];
} else {
w = 0;
}
if (w <= 0) {
w = ((c1 & 0x7f) << 8) | (c & 0x7f);
w &= MBFL_WCSPLANE_MASK;
w |= MBFL_WCSPLANE_JIS0208;
}
CK((*filter->output_function)(w, filter->data));
} else if ((c >= 0 && c < 0x21) || c == 0x7f) { /* CTLs */
CK((*filter->output_function)(c, filter->data));
} else {
w = (c1 << 8) | c;
w &= MBFL_WCSGROUP_MASK;
w |= MBFL_WCSGROUP_THROUGH;
CK((*filter->output_function)(w, filter->data));
}
break;
case 2: /* got 0x8e */
filter->status = 0;
if (c > 0xa0 && c < 0xe0) {
w = 0xfec0 + c;
CK((*filter->output_function)(w, filter->data));
} else if ((c >= 0 && c < 0x21) || c == 0x7f) { /* CTLs */
CK((*filter->output_function)(c, filter->data));
} else {
w = 0x8e00 | c;
w &= MBFL_WCSGROUP_MASK;
w |= MBFL_WCSGROUP_THROUGH;
CK((*filter->output_function)(w, filter->data));
}
break;
case 3: /* got 0x8f, X 0212 first char */
if ((c >= 0 && c < 0x21) || c == 0x7f) { /* CTLs */
CK((*filter->output_function)(c, filter->data));
filter->status = 0;
} else {
filter->status++;
filter->cache = c;
}
break;
case 4: /* got 0x8f, X 0212 second char */
filter->status = 0;
c1 = filter->cache;
if (c1 > 0xa0 && c1 < 0xff && c > 0xa0 && c < 0xff) {
s = (c1 - 0xa1)*94 + c - 0xa1;
if (s >= 0 && s < jisx0212_ucs_table_size) {
w = jisx0212_ucs_table[s];
} else {
w = 0;
}
if (w <= 0) {
w = ((c1 & 0x7f) << 8) | (c & 0x7f);
w &= MBFL_WCSPLANE_MASK;
w |= MBFL_WCSPLANE_JIS0212;
}
CK((*filter->output_function)(w, filter->data));
} else if ((c >= 0 && c < 0x21) || c == 0x7f) { /* CTLs */
CK((*filter->output_function)(c, filter->data));
} else {
w = (c1 << 8) | c | 0x8f0000;
w &= MBFL_WCSGROUP_MASK;
w |= MBFL_WCSGROUP_THROUGH;
CK((*filter->output_function)(w, filter->data));
}
break;
default:
filter->status = 0;
break;
}
return c;
}
/*
* wchar => EUC-JP
*/
int
mbfl_filt_conv_wchar_eucjp(int c, mbfl_convert_filter *filter)
{
int c1, s;
s = 0;
if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) {
s = ucs_a1_jis_table[c - ucs_a1_jis_table_min];
} else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) {
s = ucs_a2_jis_table[c - ucs_a2_jis_table_min];
} else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) {
s = ucs_i_jis_table[c - ucs_i_jis_table_min];
} else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) {
s = ucs_r_jis_table[c - ucs_r_jis_table_min];
}
if (s <= 0) {
c1 = c & ~MBFL_WCSPLANE_MASK;
if (c1 == MBFL_WCSPLANE_JIS0208) {
s = c & MBFL_WCSPLANE_MASK;
} else if (c1 == MBFL_WCSPLANE_JIS0212) {
s = c & MBFL_WCSPLANE_MASK;
s |= 0x8080;
} else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */
s = 0x2140;
} else if (c == 0xff5e) { /* FULLWIDTH TILDE */
s = 0x2141;
} else if (c == 0x2225) { /* PARALLEL TO */
s = 0x2142;
} else if (c == 0xff0d) { /* FULLWIDTH HYPHEN-MINUS */
s = 0x215d;
} else if (c == 0xffe0) { /* FULLWIDTH CENT SIGN */
s = 0x2171;
} else if (c == 0xffe1) { /* FULLWIDTH POUND SIGN */
s = 0x2172;
} else if (c == 0xffe2) { /* FULLWIDTH NOT SIGN */
s = 0x224c;
}
if (c == 0) {
s = 0;
} else if (s <= 0) {
s = -1;
}
}
if (s >= 0) {
if (s < 0x80) { /* latin */
CK((*filter->output_function)(s, filter->data));
} else if (s < 0x100) { /* kana */
CK((*filter->output_function)(0x8e, filter->data));
CK((*filter->output_function)(s, filter->data));
} else if (s < 0x8080) { /* X 0208 */
CK((*filter->output_function)(((s >> 8) & 0xff) | 0x80, filter->data));
CK((*filter->output_function)((s & 0xff) | 0x80, filter->data));
} else { /* X 0212 */
CK((*filter->output_function)(0x8f, filter->data));
CK((*filter->output_function)(((s >> 8) & 0xff) | 0x80, filter->data));
CK((*filter->output_function)((s & 0xff) | 0x80, filter->data));
}
} else {
if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
CK(mbfl_filt_conv_illegal_output(c, filter));
}
}
return c;
}
static int mbfl_filt_ident_eucjp(int c, mbfl_identify_filter *filter)
{
switch (filter->status) {
case 0: /* latin */
if (c >= 0 && c < 0x80) { /* ok */
;
} else if (c > 0xa0 && c < 0xff) { /* kanji first char */
filter->status = 1;
} else if (c == 0x8e) { /* kana first char */
filter->status = 2;
} else if (c == 0x8f) { /* X 0212 first char */
filter->status = 3;
} else { /* bad */
filter->flag = 1;
}
break;
case 1: /* got first half */
if (c < 0xa1 || c > 0xfe) { /* bad */
filter->flag = 1;
}
filter->status = 0;
break;
case 2: /* got 0x8e */
if (c < 0xa1 || c > 0xdf) { /* bad */
filter->flag = 1;
}
filter->status = 0;
break;
case 3: /* got 0x8f */
if (c < 0xa1 || c > 0xfe) { /* bad */
filter->flag = 1;
}
filter->status++;
break;
case 4: /* got 0x8f */
if (c < 0xa1 || c > 0xfe) { /* bad */
filter->flag = 1;
}
filter->status = 0;
break;
default:
filter->status = 0;
break;
}
return c;
}

View file

@ -0,0 +1,43 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* The source code included in this files was separated from mbfilter_ja.h
* by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
*
*/
#ifndef MBFL_MBFILTER_EUC_JP_H
#define MBFL_MBFILTER_EUC_JP_H
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_euc_jp;
extern const struct mbfl_identify_vtbl vtbl_identify_eucjp;
extern const struct mbfl_convert_vtbl vtbl_eucjp_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_eucjp;
int mbfl_filt_conv_eucjp_wchar(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_wchar_eucjp(int c, mbfl_convert_filter *filter);
#endif /* MBFL_MBFILTER_EUC_JP_H */

View file

@ -0,0 +1,420 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* The source code included in this files was separated from mbfilter_ja.c
* by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
*
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "mbfilter.h"
#include "mbfilter_euc_jp_win.h"
#include "unicode_table_cp932_ext.h"
#include "unicode_table_jis.h"
#include "cp932_table.h"
static int mbfl_filt_ident_eucjp_win(int c, mbfl_identify_filter *filter);
static const unsigned char mblen_table_eucjp[] = { /* 0xA1-0xFE */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
};
static const char *mbfl_encoding_eucjp_win_aliases[] = {"eucJP-open", NULL};
const struct mbfl_identify_vtbl vtbl_identify_eucjpwin = {
mbfl_no_encoding_eucjp_win,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_common_dtor,
mbfl_filt_ident_eucjp_win
};
const mbfl_encoding mbfl_encoding_eucjp_win = {
mbfl_no_encoding_eucjp_win,
"eucJP-win",
"EUC-JP",
(const char *(*)[])&mbfl_encoding_eucjp_win_aliases,
mblen_table_eucjp,
MBFL_ENCTYPE_MBCS
};
const struct mbfl_convert_vtbl vtbl_eucjpwin_wchar = {
mbfl_no_encoding_eucjp_win,
mbfl_no_encoding_wchar,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_eucjpwin_wchar,
mbfl_filt_conv_common_flush
};
const struct mbfl_convert_vtbl vtbl_wchar_eucjpwin = {
mbfl_no_encoding_wchar,
mbfl_no_encoding_eucjp_win,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_wchar_eucjpwin,
mbfl_filt_conv_common_flush
};
#define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
/*
* eucJP-win => wchar
*/
int
mbfl_filt_conv_eucjpwin_wchar(int c, mbfl_convert_filter *filter)
{
int c1, s, w, n;
switch (filter->status) {
case 0:
if (c >= 0 && c < 0x80) { /* latin */
CK((*filter->output_function)(c, filter->data));
} else if (c > 0xa0 && c < 0xff) { /* CP932 first char */
filter->status = 1;
filter->cache = c;
} else if (c == 0x8e) { /* kana first char */
filter->status = 2;
} else if (c == 0x8f) { /* X 0212 first char */
filter->status = 3;
} else {
w = c & MBFL_WCSGROUP_MASK;
w |= MBFL_WCSGROUP_THROUGH;
CK((*filter->output_function)(w, filter->data));
}
break;
case 1: /* got first half */
filter->status = 0;
c1 = filter->cache;
if (c > 0xa0 && c < 0xff) {
w = 0;
s = (c1 - 0xa1)*94 + c - 0xa1;
if (s <= 137) {
if (s == 31) {
w = 0xff3c; /* FULLWIDTH REVERSE SOLIDUS */
} else if (s == 32) {
w = 0xff5e; /* FULLWIDTH TILDE */
} else if (s == 33) {
w = 0x2225; /* PARALLEL TO */
} else if (s == 60) {
w = 0xff0d; /* FULLWIDTH HYPHEN-MINUS */
} else if (s == 80) {
w = 0xffe0; /* FULLWIDTH CENT SIGN */
} else if (s == 81) {
w = 0xffe1; /* FULLWIDTH POUND SIGN */
} else if (s == 137) {
w = 0xffe2; /* FULLWIDTH NOT SIGN */
}
}
if (w == 0) {
if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) { /* vendor ext1 (13ku) */
w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min];
} else if (s >= 0 && s < jisx0208_ucs_table_size) { /* X 0208 */
w = jisx0208_ucs_table[s];
} else if (s >= (84*94)) { /* user (85ku - 94ku) */
w = s - (84*94) + 0xe000;
}
}
if (w <= 0) {
w = ((c1 & 0x7f) << 8) | (c & 0x7f);
w &= MBFL_WCSPLANE_MASK;
w |= MBFL_WCSPLANE_WINCP932;
}
CK((*filter->output_function)(w, filter->data));
} else if ((c >= 0 && c < 0x21) || c == 0x7f) { /* CTLs */
CK((*filter->output_function)(c, filter->data));
} else {
w = (c1 << 8) | c;
w &= MBFL_WCSGROUP_MASK;
w |= MBFL_WCSGROUP_THROUGH;
CK((*filter->output_function)(w, filter->data));
}
break;
case 2: /* got 0x8e, X0201 kana */
filter->status = 0;
if (c > 0xa0 && c < 0xe0) {
w = 0xfec0 + c;
CK((*filter->output_function)(w, filter->data));
} else if ((c >= 0 && c < 0x21) || c == 0x7f) { /* CTLs */
CK((*filter->output_function)(c, filter->data));
} else {
w = 0x8e00 | c;
w &= MBFL_WCSGROUP_MASK;
w |= MBFL_WCSGROUP_THROUGH;
CK((*filter->output_function)(w, filter->data));
}
break;
case 3: /* got 0x8f, X 0212 first char */
if ((c >= 0 && c < 0x21) || c == 0x7f) { /* CTLs */
CK((*filter->output_function)(c, filter->data));
filter->status = 0;
} else {
filter->status++;
filter->cache = c;
}
break;
case 4: /* got 0x8f, X 0212 second char */
filter->status = 0;
c1 = filter->cache;
if (c1 > 0xa0 && c1 < 0xff && c > 0xa0 && c < 0xff) {
s = (c1 - 0xa1)*94 + c - 0xa1;
if (s >= 0 && s < jisx0212_ucs_table_size) {
w = jisx0212_ucs_table[s];
} else if (s >= (82*94) && s < (84*94)) { /* vender ext3 (83ku - 84ku) <-> CP932 (115ku -120ku) */
s = (c1<< 8) | c;
w = 0;
n = 0;
while (n < cp932ext3_eucjp_table_size) {
if (s == cp932ext3_eucjp_table[n]) {
if (n < (cp932ext3_ucs_table_max - cp932ext3_ucs_table_min)) {
w = cp932ext3_ucs_table[n];
}
break;
}
n++;
}
} else if (s >= (84*94)) { /* user (85ku - 94ku) */
w = s - (84*94) + (0xe000 + (94*10));
} else {
w = 0;
}
if (w <= 0) {
w = ((c1 & 0x7f) << 8) | (c & 0x7f);
w &= MBFL_WCSPLANE_MASK;
w |= MBFL_WCSPLANE_JIS0212;
}
CK((*filter->output_function)(w, filter->data));
} else if ((c >= 0 && c < 0x21) || c == 0x7f) { /* CTLs */
CK((*filter->output_function)(c, filter->data));
} else {
w = (c1 << 8) | c | 0x8f0000;
w &= MBFL_WCSGROUP_MASK;
w |= MBFL_WCSGROUP_THROUGH;
CK((*filter->output_function)(w, filter->data));
}
break;
default:
filter->status = 0;
break;
}
return c;
}
/*
* wchar => eucJP-win
*/
int
mbfl_filt_conv_wchar_eucjpwin(int c, mbfl_convert_filter *filter)
{
int c1, c2, s1;
s1 = 0;
if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) {
s1 = ucs_a1_jis_table[c - ucs_a1_jis_table_min];
} else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) {
s1 = ucs_a2_jis_table[c - ucs_a2_jis_table_min];
} else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) {
s1 = ucs_i_jis_table[c - ucs_i_jis_table_min];
} else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) {
s1 = ucs_r_jis_table[c - ucs_r_jis_table_min];
} else if (c >= 0xe000 && c < (0xe000 + 10*94)) { /* user (X0208 85ku - 94ku) */
s1 = c - 0xe000;
c1 = s1/94 + 0x75;
c2 = s1%94 + 0x21;
s1 = (c1 << 8) | c2;
} else if (c >= (0xe000 + 10*94) && c < (0xe000 + 20*94)) { /* user (X0212 85ku - 94ku) */
s1 = c - (0xe000 + 10*94);
c1 = s1/94 + 0xf5;
c2 = s1%94 + 0xa1;
s1 = (c1 << 8) | c2;
}
if (s1 <= 0) {
c1 = c & ~MBFL_WCSPLANE_MASK;
if (c1 == MBFL_WCSPLANE_WINCP932) {
s1 = c & MBFL_WCSPLANE_MASK;
if (s1 >= ((85 + 0x20) << 8)) { /* 85ku - 120ku */
s1 = -1;
}
} else if (c1 == MBFL_WCSPLANE_JIS0208) {
s1 = c & MBFL_WCSPLANE_MASK;
if (s1 >= ((85 + 0x20) << 8)) { /* 85ku - 94ku */
s1 = -1;
}
} else if (c1 == MBFL_WCSPLANE_JIS0212) {
s1 = c & MBFL_WCSPLANE_MASK;
if (s1 >= ((83 + 0x20) << 8)) { /* 83ku - 94ku */
s1 = -1;
} else {
s1 |= 0x8080;
}
} else if (c == 0xa5) { /* YEN SIGN */
s1 = 0x216f; /* FULLWIDTH YEN SIGN */
} else if (c == 0x203e) { /* OVER LINE */
s1 = 0x2131; /* FULLWIDTH MACRON */
} else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */
s1 = 0x2140;
} else if (c == 0xff5e) { /* FULLWIDTH TILDE */
s1 = 0x2141;
} else if (c == 0x2225) { /* PARALLEL TO */
s1 = 0x2142;
} else if (c == 0xff0d) { /* FULLWIDTH HYPHEN-MINUS */
s1 = 0x215d;
} else if (c == 0xffe0) { /* FULLWIDTH CENT SIGN */
s1 = 0x2171;
} else if (c == 0xffe1) { /* FULLWIDTH POUND SIGN */
s1 = 0x2172;
} else if (c == 0xffe2) { /* FULLWIDTH NOT SIGN */
s1 = 0x224c;
} else {
s1 = -1;
c1 = 0;
c2 = cp932ext1_ucs_table_max - cp932ext1_ucs_table_min;
while (c1 < c2) { /* CP932 vendor ext1 (13ku) */
if (c == cp932ext1_ucs_table[c1]) {
s1 = ((c1/94 + 0x2d) << 8) + (c1%94 + 0x21);
break;
}
c1++;
}
if (s1 < 0) {
c1 = 0;
c2 = cp932ext3_ucs_table_max - cp932ext3_ucs_table_min;
while (c1 < c2) { /* CP932 vendor ext3 (115ku - 119ku) */
if (c == cp932ext3_ucs_table[c1]) {
if (c1 < cp932ext3_eucjp_table_size) {
s1 = cp932ext3_eucjp_table[c1];
}
break;
}
c1++;
}
}
}
if (c == 0) {
s1 = 0;
} else if (s1 <= 0) {
s1 = -1;
}
}
if (s1 >= 0) {
if (s1 < 0x80) { /* latin */
CK((*filter->output_function)(s1, filter->data));
} else if (s1 < 0x100) { /* kana */
CK((*filter->output_function)(0x8e, filter->data));
CK((*filter->output_function)(s1, filter->data));
} else if (s1 < 0x8080) { /* X 0208 */
CK((*filter->output_function)(((s1 >> 8) & 0xff) | 0x80, filter->data));
CK((*filter->output_function)((s1 & 0xff) | 0x80, filter->data));
} else { /* X 0212 */
CK((*filter->output_function)(0x8f, filter->data));
CK((*filter->output_function)(((s1 >> 8) & 0xff) | 0x80, filter->data));
CK((*filter->output_function)((s1 & 0xff) | 0x80, filter->data));
}
} else {
if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
CK(mbfl_filt_conv_illegal_output(c, filter));
}
}
return c;
}
static int mbfl_filt_ident_eucjp_win(int c, mbfl_identify_filter *filter)
{
switch (filter->status) {
case 0: /* latin */
if (c >= 0 && c < 0x80) { /* ok */
;
} else if (c > 0xa0 && c < 0xff) { /* kanji first char */
filter->status = 1;
} else if (c == 0x8e) { /* kana first char */
filter->status = 2;
} else if (c == 0x8f) { /* X 0212 first char */
filter->status = 3;
} else { /* bad */
filter->flag = 1;
}
break;
case 1: /* got first half */
if (c < 0xa1 || c > 0xfe) { /* bad */
filter->flag = 1;
}
filter->status = 0;
break;
case 2: /* got 0x8e */
if (c < 0xa1 || c > 0xdf) { /* bad */
filter->flag = 1;
}
filter->status = 0;
break;
case 3: /* got 0x8f */
if (c < 0xa1 || c > 0xfe) { /* bad */
filter->flag = 1;
}
filter->status++;
break;
case 4: /* got 0x8f */
if (c < 0xa1 || c > 0xfe) { /* bad */
filter->flag = 1;
}
filter->status = 0;
break;
default:
filter->status = 0;
break;
}
return c;
}

View file

@ -0,0 +1,43 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* The source code included in this files was separated from mbfilter_ja.h
* by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
*
*/
#ifndef MBFL_MBFILTER_EUC_JP_WIN_H
#define MBFL_MBFILTER_EUC_JP_WIN_H
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_eucjp_win;
extern const struct mbfl_identify_vtbl vtbl_identify_eucjpwin;
extern const struct mbfl_convert_vtbl vtbl_eucjpwin_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_eucjpwin;
int mbfl_filt_conv_eucjpwin_wchar(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_wchar_eucjpwin(int c, mbfl_convert_filter *filter);
#endif /* MBFL_MBFILTER_EUC_JP_WIN_H */

View file

@ -0,0 +1,256 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* The source code included in this files was separated from mbfilter_kr.c
* by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
*
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "mbfilter.h"
#include "mbfilter_euc_kr.h"
#include "unicode_table_uhc.h"
static int mbfl_filt_ident_euckr(int c, mbfl_identify_filter *filter);
static const unsigned char mblen_table_euckr[] = { /* 0xA1-0xFE */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
};
static const char *mbfl_encoding_euc_kr_aliases[] = {"EUC_KR", "eucKR", "x-euc-kr", NULL};
const mbfl_encoding mbfl_encoding_euc_kr = {
mbfl_no_encoding_euc_kr,
"EUC-KR",
"EUC-KR",
(const char *(*)[])&mbfl_encoding_euc_kr_aliases,
mblen_table_euckr,
MBFL_ENCTYPE_MBCS
};
const struct mbfl_identify_vtbl vtbl_identify_euckr = {
mbfl_no_encoding_euc_kr,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_common_dtor,
mbfl_filt_ident_euckr
};
const struct mbfl_convert_vtbl vtbl_euckr_wchar = {
mbfl_no_encoding_euc_kr,
mbfl_no_encoding_wchar,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_euckr_wchar,
mbfl_filt_conv_common_flush
};
const struct mbfl_convert_vtbl vtbl_wchar_euckr = {
mbfl_no_encoding_wchar,
mbfl_no_encoding_euc_kr,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_wchar_euckr,
mbfl_filt_conv_common_flush
};
#define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
/*
* EUC-KR => wchar
*/
int
mbfl_filt_conv_euckr_wchar(int c, mbfl_convert_filter *filter)
{
int c1, w, flag;
switch (filter->status) {
case 0:
if (c >= 0 && c < 0x80) { /* latin */
CK((*filter->output_function)(c, filter->data));
} else if (c > 0xa0 && c < 0xff && c != 0xc9) { /* dbcs lead byte */
filter->status = 1;
filter->cache = c;
} else {
w = c & MBFL_WCSGROUP_MASK;
w |= MBFL_WCSGROUP_THROUGH;
CK((*filter->output_function)(w, filter->data));
}
break;
case 1: /* dbcs second byte */
filter->status = 0;
c1 = filter->cache;
flag = 0;
if (c1 >= 0xa1 && c1 <= 0xc6) {
flag = 1;
} else if (c1 >= 0xc7 && c1 <= 0xfe && c1 != 0xc9) {
flag = 2;
}
if (flag > 0 && c >= 0xa1 && c <= 0xfe) {
if (flag == 1){ /* 1st: 0xa1..0xc6, 2nd: 0x41..0x7a, 0x81..0xfe */
w = (c1 - 0xa1)*190 + (c - 0x41);
if (w >= 0 && w < uhc2_ucs_table_size) {
w = uhc2_ucs_table[w];
} else {
w = 0;
}
} else { /* 1st: 0xc7..0xc8,0xca..0xfe, 2nd: 0xa1..0xfe */
w = (c1 - 0xc7)*94 + (c - 0xa1);
if (w >= 0 && w < uhc3_ucs_table_size) {
w = uhc3_ucs_table[w];
} else {
w = 0;
}
}
if (w <= 0) {
w = (c1 << 8) | c;
w &= MBFL_WCSPLANE_MASK;
w |= MBFL_WCSPLANE_KSC5601;
}
CK((*filter->output_function)(w, filter->data));
} else if ((c >= 0 && c < 0x21) || c == 0x7f) { /* CTLs */
CK((*filter->output_function)(c, filter->data));
} else {
w = (c1 << 8) | c;
w &= MBFL_WCSGROUP_MASK;
w |= MBFL_WCSGROUP_THROUGH;
CK((*filter->output_function)(w, filter->data));
}
break;
default:
filter->status = 0;
break;
}
return c;
}
/*
* wchar => EUC-KR
*/
int
mbfl_filt_conv_wchar_euckr(int c, mbfl_convert_filter *filter)
{
int c1, c2, s;
s = 0;
if (c >= ucs_a1_uhc_table_min && c < ucs_a1_uhc_table_max) {
s = ucs_a1_uhc_table[c - ucs_a1_uhc_table_min];
} else if (c >= ucs_a2_uhc_table_min && c < ucs_a2_uhc_table_max) {
s = ucs_a2_uhc_table[c - ucs_a2_uhc_table_min];
} else if (c >= ucs_a3_uhc_table_min && c < ucs_a3_uhc_table_max) {
s = ucs_a3_uhc_table[c - ucs_a3_uhc_table_min];
} else if (c >= ucs_i_uhc_table_min && c < ucs_i_uhc_table_max) {
s = ucs_i_uhc_table[c - ucs_i_uhc_table_min];
} else if (c >= ucs_s_uhc_table_min && c < ucs_s_uhc_table_max) {
s = ucs_s_uhc_table[c - ucs_s_uhc_table_min];
} else if (c >= ucs_r1_uhc_table_min && c < ucs_r1_uhc_table_max) {
s = ucs_r1_uhc_table[c - ucs_r1_uhc_table_min];
} else if (c >= ucs_r2_uhc_table_min && c < ucs_r2_uhc_table_max) {
s = ucs_r2_uhc_table[c - ucs_r2_uhc_table_min];
}
c1 = (s >> 8) & 0xff;
c2 = s & 0xff;
/* exclude UHC extension area */
if (c1 < 0xa1 || c2 < 0xa1){
s = c;
}
if (s <= 0) {
c1 = c & ~MBFL_WCSPLANE_MASK;
if (c1 == MBFL_WCSPLANE_KSC5601) {
s = c & MBFL_WCSPLANE_MASK;
}
if (c == 0) {
s = 0;
} else if (s <= 0) {
s = -1;
}
}
if (s >= 0) {
if (s < 0x80) { /* latin */
CK((*filter->output_function)(s, filter->data));
} else {
CK((*filter->output_function)((s >> 8) & 0xff, filter->data));
CK((*filter->output_function)(s & 0xff, filter->data));
}
} else {
if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
CK(mbfl_filt_conv_illegal_output(c, filter));
}
}
return c;
}
static int mbfl_filt_ident_euckr(int c, mbfl_identify_filter *filter)
{
switch (filter->status) {
case 0: /* latin */
if (c >= 0 && c < 0x80) { /* ok */
;
} else if (c > 0xa0 && c < 0xff) { /* DBCS lead byte */
filter->status = 1;
} else { /* bad */
filter->flag = 1;
}
break;
case 1: /* got lead byte */
if (c < 0xa1 || c > 0xfe) { /* bad */
filter->flag = 1;
}
filter->status = 0;
break;
default:
filter->status = 0;
break;
}
return c;
}

View file

@ -0,0 +1,43 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* The source code included in this files was separated from mbfilter_kr.h
* by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
*
*/
#ifndef MBFL_MBFILTER_EUC_KR_H
#define MBFL_MBFILTER_EUC_KR_H
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_euc_kr;
extern const struct mbfl_identify_vtbl vtbl_identify_euckr;
extern const struct mbfl_convert_vtbl vtbl_euckr_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_euckr;
int mbfl_filt_conv_euckr_wchar(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_wchar_euckr(int c, mbfl_convert_filter *filter);
#endif /* MBFL_MBFILTER_EUC_KR_H */

View file

@ -0,0 +1,329 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file: Rui Hirokawa <hirokawa@php.net>
*
*/
/*
* The source code included in this files was separated from mbfilter_tw.c
* by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
*
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "mbfilter.h"
#include "mbfilter_euc_tw.h"
#include "unicode_table_cns11643.h"
static int mbfl_filt_ident_euctw(int c, mbfl_identify_filter *filter);
static const unsigned char mblen_table_euctw[] = { /* 0xA1-0xFE */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
};
static const char *mbfl_encoding_euc_tw_aliases[] = {"EUC_TW", "eucTW", "x-euc-tw", NULL};
const mbfl_encoding mbfl_encoding_euc_tw = {
mbfl_no_encoding_euc_tw,
"EUC-TW",
"EUC-TW",
(const char *(*)[])&mbfl_encoding_euc_tw_aliases,
mblen_table_euctw,
MBFL_ENCTYPE_MBCS
};
const struct mbfl_identify_vtbl vtbl_identify_euctw = {
mbfl_no_encoding_euc_tw,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_common_dtor,
mbfl_filt_ident_euctw
};
const struct mbfl_convert_vtbl vtbl_euctw_wchar = {
mbfl_no_encoding_euc_tw,
mbfl_no_encoding_wchar,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_euctw_wchar,
mbfl_filt_conv_common_flush
};
const struct mbfl_convert_vtbl vtbl_wchar_euctw = {
mbfl_no_encoding_wchar,
mbfl_no_encoding_euc_tw,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_wchar_euctw,
mbfl_filt_conv_common_flush
};
#define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
/*
* EUC-TW => wchar
*/
int
mbfl_filt_conv_euctw_wchar(int c, mbfl_convert_filter *filter)
{
int c1, s, w, plane;
switch (filter->status) {
case 0:
if (c >= 0 && c < 0x80) { /* latin */
CK((*filter->output_function)(c, filter->data));
} else if (c > 0xa0 && c < 0xff) { /* dbcs first byte */
filter->status = 1;
filter->cache = c;
} else if (c == 0x8e) { /* mbcs first byte */
filter->status = 2;
filter->cache = c;
} else {
w = c & MBFL_WCSGROUP_MASK;
w |= MBFL_WCSGROUP_THROUGH;
CK((*filter->output_function)(w, filter->data));
}
break;
case 1: /* mbcs second byte */
filter->status = 0;
c1 = filter->cache;
if (c > 0xa0 && c < 0xff) {
w = (c1 - 0xa1)*94 + (c - 0xa1);
if (w >= 0 && w < cns11643_1_ucs_table_size) {
w = cns11643_1_ucs_table[w];
} else {
w = 0;
}
if (w <= 0) {
w = (c1 << 8) | c;
w &= MBFL_WCSPLANE_MASK;
w |= MBFL_WCSPLANE_CNS11643;
}
CK((*filter->output_function)(w, filter->data));
} else if ((c >= 0 && c < 0x21) || c == 0x7f) { /* CTLs */
CK((*filter->output_function)(c, filter->data));
} else {
w = (c1 << 8) | c;
w &= MBFL_WCSGROUP_MASK;
w |= MBFL_WCSGROUP_THROUGH;
CK((*filter->output_function)(w, filter->data));
}
break;
case 2: /* got 0x8e, first char */
c1 = filter->cache;
if ((c >= 0 && c < 0x21) || c == 0x7f) { /* CTLs */
CK((*filter->output_function)(c, filter->data));
filter->status = 0;
} else if (c > 0xa0 && c < 0xaf) {
filter->status = 3;
filter->cache = c - 0xa1;
} else {
w = (c1 << 8) | c;
w &= MBFL_WCSGROUP_MASK;
w |= MBFL_WCSGROUP_THROUGH;
CK((*filter->output_function)(w, filter->data));
}
break;
case 3: /* got 0x8e, third char */
filter->status = 0;
c1 = filter->cache;
if ((c >= 0 && c < 0x21) || c == 0x7f) { /* CTLs */
CK((*filter->output_function)(c, filter->data));
filter->status = 0;
} else if (c > 0xa0 && c < 0xff) {
filter->status = 4;
filter->cache = (c1 << 8) + c - 0xa1;
} else {
w = (c1 << 8) | c;
w &= MBFL_WCSGROUP_MASK;
w |= MBFL_WCSGROUP_THROUGH;
CK((*filter->output_function)(w, filter->data));
}
break;
case 4: /* mbcs fourth char */
filter->status = 0;
c1 = filter->cache;
if (c1 >= 0x100 && c1 <= 0xdff && c > 0xa0 && c < 0xff) {
plane = (c1 & 0xf00) >> 8;
s = (c1 & 0xff)*94 + c - 0xa1;
w = 0;
if (s >= 0) {
if (plane == 1 && s < cns11643_2_ucs_table_size) {
w = cns11643_2_ucs_table[s];
}
if (plane == 13 && s < cns11643_14_ucs_table_size) {
w = cns11643_14_ucs_table[s];
}
}
if (w <= 0) {
w = ((c1 & 0x7f) << 8) | (c & 0x7f);
w &= MBFL_WCSPLANE_MASK;
w |= MBFL_WCSPLANE_CNS11643;
}
CK((*filter->output_function)(w, filter->data));
} else if ((c >= 0 && c < 0x21) || c == 0x7f) { /* CTLs */
CK((*filter->output_function)(c, filter->data));
} else {
w = (c1 << 8) | c | 0x8e0000;
w &= MBFL_WCSGROUP_MASK;
w |= MBFL_WCSGROUP_THROUGH;
CK((*filter->output_function)(w, filter->data));
}
break;
default:
filter->status = 0;
break;
}
return c;
}
/*
* wchar => EUC-TW
*/
int
mbfl_filt_conv_wchar_euctw(int c, mbfl_convert_filter *filter)
{
int c1, s, plane;
s = 0;
if (c >= ucs_a1_cns11643_table_min && c < ucs_a1_cns11643_table_max) {
s = ucs_a1_cns11643_table[c - ucs_a1_cns11643_table_min];
} else if (c >= ucs_a2_cns11643_table_min && c < ucs_a2_cns11643_table_max) {
s = ucs_a2_cns11643_table[c - ucs_a2_cns11643_table_min];
} else if (c >= ucs_a3_cns11643_table_min && c < ucs_a3_cns11643_table_max) {
s = ucs_a3_cns11643_table[c - ucs_a3_cns11643_table_min];
} else if (c >= ucs_i_cns11643_table_min && c < ucs_i_cns11643_table_max) {
s = ucs_i_cns11643_table[c - ucs_i_cns11643_table_min];
} else if (c >= ucs_r_cns11643_table_min && c < ucs_r_cns11643_table_max) {
s = ucs_r_cns11643_table[c - ucs_r_cns11643_table_min];
}
if (s <= 0) {
c1 = c & ~MBFL_WCSPLANE_MASK;
if (c1 == MBFL_WCSPLANE_CNS11643) {
s = c & MBFL_WCSPLANE_MASK;
}
if (c == 0) {
s = 0;
} else if (s <= 0) {
s = -1;
}
}
if (s >= 0) {
plane = (s & 0x1f0000) >> 16;
if (plane <= 1){
if (s < 0x80) { /* latin */
CK((*filter->output_function)(s, filter->data));
} else {
s = (s & 0xffff) | 0x8080;
CK((*filter->output_function)((s >> 8) & 0xff, filter->data));
CK((*filter->output_function)(s & 0xff, filter->data));
}
} else {
s = (0x8ea00000 + (plane << 16)) | ((s & 0xffff) | 0x8080);
CK((*filter->output_function)(0x8e , filter->data));
CK((*filter->output_function)((s >> 16) & 0xff, filter->data));
CK((*filter->output_function)((s >> 8) & 0xff, filter->data));
CK((*filter->output_function)(s & 0xff, filter->data));
}
} else {
if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
CK(mbfl_filt_conv_illegal_output(c, filter));
}
}
return c;
}
static int mbfl_filt_ident_euctw(int c, mbfl_identify_filter *filter)
{
switch (filter->status) {
case 0: /* latin */
if (c >= 0 && c < 0x80) { /* ok */
;
} else if (c > 0xa0 && c < 0xff) { /* DBCS lead byte */
filter->status = 1;
} else if (c == 0x8e) { /* DBCS lead byte */
filter->status = 2;
} else { /* bad */
filter->flag = 1;
}
break;
case 1: /* got lead byte */
if (c < 0xa1 || c > 0xfe) { /* bad */
filter->flag = 1;
}
filter->status = 0;
break;
case 2: /* got lead byte */
if (c >= 0xa1 && c < 0xaf) { /* ok */
filter->status = 3;
} else {
filter->flag = 1; /* bad */
}
break;
case 3: /* got lead byte */
if (c < 0xa1 || c > 0xfe) { /* bad */
filter->flag = 1;
}
filter->status = 4;
break;
case 4: /* got lead byte */
if (c < 0xa1 || c > 0xfe) { /* bad */
filter->flag = 1;
}
filter->status = 0;
break;
default:
filter->status = 0;
break;
}
return c;
}

View file

@ -0,0 +1,43 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file: Rui Hirokawa <hirokawa@php.net>
*
*/
/*
* The source code included in this files was separated from mbfilter_tw.h
* by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
*
*/
#ifndef MBFL_MBFILTER_EUC_TW_H
#define MBFL_MBFILTER_EUC_TW_H
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_euc_tw;
extern const struct mbfl_identify_vtbl vtbl_identify_euctw;
extern const struct mbfl_convert_vtbl vtbl_euctw_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_euctw;
int mbfl_filt_conv_euctw_wchar(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_wchar_euctw(int c, mbfl_convert_filter *filter);
#endif /* MBFL_MBFILTER_EUC_TW_H */

View file

@ -0,0 +1,255 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this part: Marcus Boerger <marcus.boerger@t-online.de>
*
*/
/*
* The source code included in this files was separated from mbfilter.c
* by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
*
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#ifdef HAVE_STRING_H
#include <string.h>
#endif
#ifdef HAVE_STRINGS_H
#include <strings.h>
#endif
#include "mbfilter.h"
#include "mbfilter_htmlent.h"
#include "html_entities.h"
static const unsigned char mblen_table_html[] = { /* 0x00, 0x80 - 0xFF, only valid for numeric entities */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6
};
static const char *mbfl_encoding_html_ent_aliases[] = {"HTML", "html", NULL};
const mbfl_encoding mbfl_encoding_html_ent = {
mbfl_no_encoding_html_ent,
"HTML-ENTITIES",
"US-ASCII",
(const char *(*)[])&mbfl_encoding_html_ent_aliases,
NULL, /* mblen_table_html, Do not use table instead calulate length based on entities actually used */
MBFL_ENCTYPE_HTML_ENT
};
const struct mbfl_convert_vtbl vtbl_wchar_html = {
mbfl_no_encoding_wchar,
mbfl_no_encoding_html_ent,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_html_enc,
mbfl_filt_conv_html_enc_flush
};
const struct mbfl_convert_vtbl vtbl_html_wchar = {
mbfl_no_encoding_html_ent,
mbfl_no_encoding_wchar,
mbfl_filt_conv_html_dec_ctor,
mbfl_filt_conv_html_dec_dtor,
mbfl_filt_conv_html_dec,
mbfl_filt_conv_html_dec_flush };
#define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
/*
* any => HTML
*/
int mbfl_filt_conv_html_enc(int c, mbfl_convert_filter *filter)
{
int tmp[10];
int i = 0, p = 0, e;
unsigned int uc;
if (c<256 && mblen_table_html[c]==1) {
CK((*filter->output_function)(c, filter->data));
} else {
/*php_error_docref("ref.mbstring" TSRMLS_CC, E_NOTICE, "mbfl_filt_conv_html_enc(0x%08X = %d)", c, c);*/
CK((*filter->output_function)('&', filter->data));
while (1) {
e = mbfl_html_entity_list[i].code;
if (c < e || e == -1) {
break;
}
if (c == e) {
while(mbfl_html_entity_list[i].name[p]) {
CK((*filter->output_function)((int)mbfl_html_entity_list[i].name[p++], filter->data));
}
break;
}
i++;
}
i=0;
if (!p) {
CK((*filter->output_function)('#', filter->data));
uc = (unsigned int)c;
do {
tmp[i++] = '0'+uc%10;
uc /= 10;
} while (uc);
do {
CK((*filter->output_function)(tmp[--i], filter->data));
} while (i);
}
CK((*filter->output_function)(';', filter->data));
}
return c;
}
int mbfl_filt_conv_html_enc_flush(mbfl_convert_filter *filter)
{
filter->status = 0;
filter->cache = 0;
return 0;
}
/*
* HTML => any
*/
#define html_enc_buffer_size 16
static const char html_entity_chars[] = "#0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
void mbfl_filt_conv_html_dec_ctor(mbfl_convert_filter *filter)
{
filter->status = 0;
filter->cache = (int)mbfl_malloc(html_enc_buffer_size+1);
}
void mbfl_filt_conv_html_dec_dtor(mbfl_convert_filter *filter)
{
filter->status = 0;
if (filter->cache)
{
mbfl_free((void*)filter->cache);
}
filter->cache = 0;
}
int mbfl_filt_conv_html_dec(int c, mbfl_convert_filter *filter)
{
int pos, ent = 0;
mbfl_html_entity *entity;
char *buffer = (char*)filter->cache;
if (!filter->status) {
if (c == '&' ) {
filter->status = 1;
buffer[0] = '&';
} else {
CK((*filter->output_function)(c, filter->data));
}
} else {
if (c == ';') {
buffer[filter->status] = 0;
if (buffer[1]=='#') {
/* numeric entity */
for (pos=2; pos<filter->status; pos++) {
ent = ent*10 + (buffer[pos] - '0');
}
CK((*filter->output_function)(ent, filter->data));
filter->status = 0;
/*php_error_docref("ref.mbstring" TSRMLS_CC, E_NOTICE, "mbstring decoded '%s'=%d", buffer, ent);*/
} else {
/* named entity */
entity = (mbfl_html_entity *)mbfl_html_entity_list;
while (entity->name) {
if (!strcmp(buffer+1, entity->name)) {
ent = entity->code;
break;
}
entity++;
}
if (ent) {
/* decoded */
CK((*filter->output_function)(ent, filter->data));
filter->status = 0;
/*php_error_docref("ref.mbstring" TSRMLS_CC, E_NOTICE,"mbstring decoded '%s'=%d", buffer, ent);*/
} else {
/* failure */
buffer[filter->status++] = ';';
buffer[filter->status] = 0;
/* php_error_docref("ref.mbstring" TSRMLS_CC, E_WARNING, "mbstring cannot decode '%s'", buffer); */
mbfl_filt_conv_html_dec_flush(filter);
}
}
} else {
/* add character */
buffer[filter->status++] = c;
/* add character and check */
if (!strchr(html_entity_chars, c) || filter->status+1==html_enc_buffer_size || (c=='#' && filter->status>2))
{
/* illegal character or end of buffer */
if (c=='&')
filter->status--;
buffer[filter->status] = 0;
/* php_error_docref("ref.mbstring" TSRMLS_CC, E_WARNING, "mbstring cannot decode '%s'", buffer)l */
mbfl_filt_conv_html_dec_flush(filter);
if (c=='&')
{
filter->status = 1;
buffer[0] = '&';
}
}
}
}
return c;
}
int mbfl_filt_conv_html_dec_flush(mbfl_convert_filter *filter)
{
int status, pos = 0;
char *buffer;
buffer = (char*)filter->cache;
status = filter->status;
/* flush fragments */
while (status--) {
CK((*filter->output_function)(buffer[pos++], filter->data));
}
filter->status = 0;
/*filter->buffer = 0; of cause NOT*/
return 0;
}

View file

@ -0,0 +1,48 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this part: Marcus Boerger <marcus.boerger@t-online.de>
*
*/
/*
* The source code included in this files was separated from mbfilter.c
* by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
*
*/
#ifndef MBFL_MBFILTER_HTMLENT_H
#define MBFL_MBFILTER_HTMLENT_H
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_html_ent;
extern const struct mbfl_convert_vtbl vtbl_wchar_html;
extern const struct mbfl_convert_vtbl vtbl_html_wchar;
void mbfl_filt_conv_html_dec_ctor(mbfl_convert_filter *filter);
void mbfl_filt_conv_html_dec_dtor(mbfl_convert_filter *filter);
int mbfl_filt_conv_html_enc(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_html_enc_flush(mbfl_convert_filter *filter);
int mbfl_filt_conv_html_dec(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_html_dec_flush(mbfl_convert_filter *filter);
void mbfl_filt_conv_html_dec_ctor(mbfl_convert_filter *filter);
void mbfl_filt_conv_html_dec_dtor(mbfl_convert_filter *filter);
#endif /* MBFL_MBFILTER_HTMLENT_H */

View file

@ -0,0 +1,276 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* The source code included in this files was separated from mbfilter_cn.c
* by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
*
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "mbfilter.h"
#include "mbfilter_hz.h"
#include "unicode_table_cp936.h"
static int mbfl_filt_ident_hz(int c, mbfl_identify_filter *filter);
const mbfl_encoding mbfl_encoding_hz = {
mbfl_no_encoding_hz,
"HZ",
"HZ-GB-2312",
NULL,
NULL,
MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE
};
const struct mbfl_identify_vtbl vtbl_identify_hz = {
mbfl_no_encoding_hz,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_common_dtor,
mbfl_filt_ident_hz
};
const struct mbfl_convert_vtbl vtbl_hz_wchar = {
mbfl_no_encoding_hz,
mbfl_no_encoding_wchar,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_hz_wchar,
mbfl_filt_conv_common_flush
};
const struct mbfl_convert_vtbl vtbl_wchar_hz = {
mbfl_no_encoding_wchar,
mbfl_no_encoding_hz,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_wchar_hz,
mbfl_filt_conv_any_hz_flush
};
#define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
/*
* HZ => wchar
*/
int
mbfl_filt_conv_hz_wchar(int c, mbfl_convert_filter *filter)
{
int c1, s, w;
switch (filter->status & 0xf) {
/* case 0x00: ASCII */
/* case 0x10: GB2312 */
case 0:
if (c == 0x7e) {
filter->status += 2;
} else if (filter->status == 0x10 && c > 0x20 && c < 0x7f) { /* DBCS first char */
filter->cache = c;
filter->status += 1;
} else if (c >= 0 && c < 0x80) { /* latin, CTLs */
CK((*filter->output_function)(c, filter->data));
} else {
w = c & MBFL_WCSGROUP_MASK;
w |= MBFL_WCSGROUP_THROUGH;
CK((*filter->output_function)(w, filter->data));
}
break;
/* case 0x11: GB2312 second char */
case 1:
filter->status &= ~0xf;
c1 = filter->cache;
if (c1 > 0x20 && c1 < 0x7f && c > 0x20 && c < 0x7f) {
s = (c1 - 1)*192 + c + 0x40; /* GB2312 */
if (s >= 0 && s < cp936_ucs_table_size) {
w = cp936_ucs_table[s];
} else {
w = 0;
}
if (w <= 0) {
w = (c1 << 8) | c;
w &= MBFL_WCSPLANE_MASK;
w |= MBFL_WCSPLANE_GB2312;
}
CK((*filter->output_function)(w, filter->data));
} else if ((c >= 0 && c < 0x21) || c == 0x7f) { /* CTLs */
CK((*filter->output_function)(c, filter->data));
} else {
w = (c1 << 8) | c;
w &= MBFL_WCSGROUP_MASK;
w |= MBFL_WCSGROUP_THROUGH;
CK((*filter->output_function)(w, filter->data));
}
break;
/* '~' */
case 2:
if (c == 0x7d) { /* '}' */
filter->status = 0x0;
} else if (c == 0x7b) { /* '{' */
filter->status = 0x10;
} else if (c == 0x7e) { /* '~' */
filter->status = 0x0;
CK((*filter->output_function)(0x007e, filter->data));
}
break;
default:
filter->status = 0;
break;
}
return c;
}
/*
* wchar => HZ
*/
int
mbfl_filt_conv_wchar_hz(int c, mbfl_convert_filter *filter)
{
int s;
s = 0;
if (c >= ucs_a1_cp936_table_min && c < ucs_a1_cp936_table_max) {
s = ucs_a1_cp936_table[c - ucs_a1_cp936_table_min];
} else if (c >= ucs_a2_cp936_table_min && c < ucs_a2_cp936_table_max) {
s = ucs_a2_cp936_table[c - ucs_a2_cp936_table_min];
} else if (c >= ucs_a3_cp936_table_min && c < ucs_a3_cp936_table_max) {
s = ucs_a3_cp936_table[c - ucs_a3_cp936_table_min];
} else if (c >= ucs_i_cp936_table_min && c < ucs_i_cp936_table_max) {
s = ucs_i_cp936_table[c - ucs_i_cp936_table_min];
} else if (c >= ucs_hff_cp936_table_min && c < ucs_hff_cp936_table_max) {
s = ucs_hff_cp936_table[c - ucs_hff_cp936_table_min];
}
if (s & 0x8000) {
s -= 0x8080;
}
if (s <= 0) {
if (c == 0) {
s = 0;
} else if (s <= 0) {
s = -1;
}
} else if ((s >= 0x80 && s < 0x2121) || (s > 0x8080)) {
s = -1;
}
if (s >= 0) {
if (s < 0x80) { /* ASCII */
if ((filter->status & 0xff00) != 0) {
CK((*filter->output_function)(0x7e, filter->data)); /* '~' */
CK((*filter->output_function)(0x7d, filter->data)); /* '}' */
}
filter->status = 0;
if (s == 0x7e){
CK((*filter->output_function)(0x7e, filter->data));
}
CK((*filter->output_function)(s, filter->data));
} else { /* GB 2312-80 */
if ((filter->status & 0xff00) != 0x200) {
CK((*filter->output_function)(0x7e, filter->data)); /* '~' */
CK((*filter->output_function)(0x7b, filter->data)); /* '{' */
}
filter->status = 0x200;
CK((*filter->output_function)((s >> 8) & 0x7f, filter->data));
CK((*filter->output_function)(s & 0x7f, filter->data));
}
} else {
if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
CK(mbfl_filt_conv_illegal_output(c, filter));
}
}
return c;
}
int
mbfl_filt_conv_any_hz_flush(mbfl_convert_filter *filter)
{
/* back to latin */
if ((filter->status & 0xff00) != 0) {
CK((*filter->output_function)(0x7e, filter->data)); /* ~ */
CK((*filter->output_function)(0x7d, filter->data)); /* '{' */
}
filter->status &= 0xff;
return 0;
}
static int mbfl_filt_ident_hz(int c, mbfl_identify_filter *filter)
{
switch (filter->status & 0xf) {
/* case 0x00: ASCII */
/* case 0x10: GB2312 */
case 0:
if (c == 0x7e) {
filter->status += 2;
} else if (filter->status == 0x10 && c > 0x20 && c < 0x7f) { /* DBCS first char */
filter->status += 1;
} else if (c >= 0 && c < 0x80) { /* latin, CTLs */
;
} else {
filter->flag = 1; /* bad */
}
break;
/* case 0x11: GB2312 second char */
case 1:
filter->status &= ~0xf;
if (c < 0x21 || c > 0x7e) { /* bad */
filter->flag = 1;
}
break;
case 2:
if (c == 0x7d) { /* '}' */
filter->status = 0;
} else if (c == 0x7b) { /* '{' */
filter->status = 0x10;
} else if (c == 0x7e) { /* '~' */
filter->status = 0;
} else {
filter->flag = 1; /* bad */
filter->status &= ~0xf;
}
break;
default:
filter->status = 0;
break;
}
return c;
}
/*
* Local variables:
* tab-width: 4
* c-basic-offset: 4
* End:
*/

View file

@ -0,0 +1,44 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* The source code included in this files was separated from mbfilter_cn.h
* by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
*
*/
#ifndef MBFL_MBFILTER_HZ_H
#define MBFL_MBFILTER_HZ_H
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_hz;
extern const struct mbfl_identify_vtbl vtbl_identify_hz;
extern const struct mbfl_convert_vtbl vtbl_hz_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_hz;
int mbfl_filt_conv_hz_wchar(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_wchar_hz(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_any_hz_flush(mbfl_convert_filter *filter);
#endif /* MBFL_MBFILTER_HZ_H */

View file

@ -0,0 +1,353 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* The source code included in this files was separated from mbfilter_kr.c
* by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
*
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "mbfilter.h"
#include "mbfilter_iso2022_kr.h"
#include "unicode_table_uhc.h"
static int mbfl_filt_ident_2022kr(int c, mbfl_identify_filter *filter);
const mbfl_encoding mbfl_encoding_2022kr = {
mbfl_no_encoding_2022kr,
"ISO-2022-KR",
"ISO-2022-KR",
NULL,
NULL,
MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE
};
const struct mbfl_identify_vtbl vtbl_identify_2022kr = {
mbfl_no_encoding_2022kr,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_common_dtor,
mbfl_filt_ident_2022kr
};
const struct mbfl_convert_vtbl vtbl_wchar_2022kr = {
mbfl_no_encoding_wchar,
mbfl_no_encoding_2022kr,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_wchar_2022kr,
mbfl_filt_conv_any_2022kr_flush
};
const struct mbfl_convert_vtbl vtbl_2022kr_wchar = {
mbfl_no_encoding_2022kr,
mbfl_no_encoding_wchar,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_2022kr_wchar,
mbfl_filt_conv_common_flush
};
#define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
/*
* ISO-2022-KR => wchar
*/
int
mbfl_filt_conv_2022kr_wchar(int c, mbfl_convert_filter *filter)
{
int c1, w, flag;
retry:
switch (filter->status & 0xf) {
/* case 0x00: ASCII */
/* case 0x10: KSC5601 */
case 0:
if (c == 0x1b) { /* ESC */
filter->status += 2;
} else if (c == 0x0f) { /* SI (ASCII) */
filter->status &= ~0xff;
} else if (c == 0x0e) { /* SO (KSC5601) */
filter->status |= 0x10;
} else if ((filter->status & 0x10) != 0 && c > 0x20 && c < 0x7f) {
/* KSC5601 lead byte */
filter->cache = c;
filter->status += 1;
} else if ((filter->status & 0x10) == 0 && c >= 0 && c < 0x80) {
/* latin, CTLs */
CK((*filter->output_function)(c, filter->data));
} else {
w = c & MBFL_WCSGROUP_MASK;
w |= MBFL_WCSGROUP_THROUGH;
CK((*filter->output_function)(w, filter->data));
}
break;
case 1: /* dbcs second byte */
filter->status &= ~0xf;
c1 = filter->cache;
flag = 0;
if (c1 > 0x20 && c1 < 0x47) {
flag = 1;
} else if (c1 >= 0x47 && c1 <= 0x7e && c1 != 0x49) {
flag = 2;
}
if (flag > 0 && c > 0x20 && c < 0x7f) {
if (flag == 1){
w = (c1 - 0x21)*190 + (c - 0x41) + 0x80;
if (w >= 0 && w < uhc2_ucs_table_size) {
w = uhc2_ucs_table[w];
} else {
w = 0;
}
} else {
w = (c1 - 0x47)*94 + (c - 0x21);
if (w >= 0 && w < uhc3_ucs_table_size) {
w = uhc3_ucs_table[w];
} else {
w = 0;
}
}
if (w <= 0) {
w = (c1 << 8) | c;
w &= MBFL_WCSPLANE_MASK;
w |= MBFL_WCSPLANE_KSC5601;
}
CK((*filter->output_function)(w, filter->data));
} else if (c == 0x1b) { /* ESC */
filter->status++;
} else if ((c >= 0 && c < 0x21) || c == 0x7f) { /* CTLs */
CK((*filter->output_function)(c, filter->data));
} else {
w = (c1 << 8) | c;
w &= MBFL_WCSGROUP_MASK;
w |= MBFL_WCSGROUP_THROUGH;
CK((*filter->output_function)(w, filter->data));
}
break;
case 2: /* ESC */
if (c == 0x24) { /* '$' */
filter->status++;
} else {
filter->status &= ~0xf;
CK((*filter->output_function)(0x1b, filter->data));
goto retry;
}
break;
case 3: /* ESC $ */
if (c == 0x29) { /* ')' */
filter->status++;
} else {
filter->status &= ~0xf;
CK((*filter->output_function)(0x1b, filter->data));
CK((*filter->output_function)(0x24, filter->data));
goto retry;
}
break;
case 4: /* ESC $ ) */
if (c == 0x43) { /* 'C' */
filter->status &= ~0xf;
filter->status |= 0x100;
} else {
filter->status &= ~0xf;
CK((*filter->output_function)(0x1b, filter->data));
CK((*filter->output_function)(0x24, filter->data));
CK((*filter->output_function)(0x29, filter->data));
goto retry;
}
break;
default:
filter->status = 0;
break;
}
return c;
}
/*
* wchar => ISO-2022-KR
*/
int
mbfl_filt_conv_wchar_2022kr(int c, mbfl_convert_filter *filter)
{
int c1, c2, s;
s = 0;
if (c >= ucs_a1_uhc_table_min && c < ucs_a1_uhc_table_max) {
s = ucs_a1_uhc_table[c - ucs_a1_uhc_table_min];
} else if (c >= ucs_a2_uhc_table_min && c < ucs_a2_uhc_table_max) {
s = ucs_a2_uhc_table[c - ucs_a2_uhc_table_min];
} else if (c >= ucs_a3_uhc_table_min && c < ucs_a3_uhc_table_max) {
s = ucs_a3_uhc_table[c - ucs_a3_uhc_table_min];
} else if (c >= ucs_i_uhc_table_min && c < ucs_i_uhc_table_max) {
s = ucs_i_uhc_table[c - ucs_i_uhc_table_min];
} else if (c >= ucs_s_uhc_table_min && c < ucs_s_uhc_table_max) {
s = ucs_s_uhc_table[c - ucs_s_uhc_table_min];
} else if (c >= ucs_r1_uhc_table_min && c < ucs_r1_uhc_table_max) {
s = ucs_r1_uhc_table[c - ucs_r1_uhc_table_min];
} else if (c >= ucs_r2_uhc_table_min && c < ucs_r2_uhc_table_max) {
s = ucs_r2_uhc_table[c - ucs_r2_uhc_table_min];
}
c1 = (s >> 8) & 0xff;
c2 = s & 0xff;
/* exclude UHC extension area */
if (c1 < 0xa1 || c2 < 0xa1){
s = c;
}
if (s & 0x8000) {
s -= 0x8080;
}
if (s <= 0) {
c1 = c & ~MBFL_WCSPLANE_MASK;
if (c1 == MBFL_WCSPLANE_KSC5601) {
s = c & MBFL_WCSPLANE_MASK;
}
if (c == 0) {
s = 0;
} else if (s <= 0) {
s = -1;
}
} else if ((s >= 0x80 && s < 0x2121) || (s > 0x8080)) {
s = -1;
}
if (s >= 0) {
if (s < 0x80 && s > 0) { /* ASCII */
if ((filter->status & 0x10) != 0) {
CK((*filter->output_function)(0x0f, filter->data)); /* SI */
filter->status &= ~0x10;
}
CK((*filter->output_function)(s, filter->data));
} else {
if ( (filter->status & 0x100) == 0) {
CK((*filter->output_function)(0x1b, filter->data)); /* ESC */
CK((*filter->output_function)(0x24, filter->data)); /* '$' */
CK((*filter->output_function)(0x29, filter->data)); /* ')' */
CK((*filter->output_function)(0x43, filter->data)); /* 'C' */
filter->status |= 0x100;
}
if ((filter->status & 0x10) == 0) {
CK((*filter->output_function)(0x0e, filter->data)); /* SO */
filter->status |= 0x10;
}
CK((*filter->output_function)((s >> 8) & 0xff, filter->data));
CK((*filter->output_function)(s & 0xff, filter->data));
}
} else {
if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
CK(mbfl_filt_conv_illegal_output(c, filter));
}
}
return c;
}
int
mbfl_filt_conv_any_2022kr_flush(mbfl_convert_filter *filter)
{
/* back to ascii */
if ((filter->status & 0xff00) != 0) {
CK((*filter->output_function)(0x0f, filter->data)); /* SI */
}
filter->status &= 0xff;
return 0;
}
static int mbfl_filt_ident_2022kr(int c, mbfl_identify_filter *filter)
{
retry:
switch (filter->status & 0xf) {
/* case 0x00: ASCII */
/* case 0x10: KSC5601 mode */
/* case 0x20: KSC5601 DBCS */
/* case 0x40: KSC5601 SBCS */
case 0:
if (!(filter->status & 0x10)) {
if (c == 0x1b)
filter->status += 2;
} else if (filter->status == 0x20 && c > 0x20 && c < 0x7f) { /* kanji first char */
filter->status += 1;
} else if (c >= 0 && c < 0x80) { /* latin, CTLs */
;
} else {
filter->flag = 1; /* bad */
}
break;
/* case 0x21: KSC5601 second char */
case 1:
filter->status &= ~0xf;
if (c < 0x21 || c > 0x7e) { /* bad */
filter->flag = 1;
}
break;
/* ESC */
case 2:
if (c == 0x24) { /* '$' */
filter->status++;
} else {
filter->flag = 1; /* bad */
filter->status &= ~0xf;
goto retry;
}
break;
/* ESC $ */
case 3:
if (c == 0x29) { /* ')' */
filter->status++;
} else {
filter->flag = 1; /* bad */
filter->status &= ~0xf;
goto retry;
}
break;
/* ESC $) */
case 5:
if (c == 0x43) { /* 'C' */
filter->status = 0x10;
} else {
filter->flag = 1; /* bad */
filter->status &= ~0xf;
goto retry;
}
break;
default:
filter->status = 0;
break;
}
return c;
}

View file

@ -0,0 +1,44 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* The source code included in this files was separated from mbfilter_kr.h
* by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
*
*/
#ifndef MBFL_MBFILTER_ISO2022_KR_H
#define MBFL_MBFILTER_ISO2022_KR_H
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_2022kr;
extern const struct mbfl_identify_vtbl vtbl_identify_2022kr;
extern const struct mbfl_convert_vtbl vtbl_wchar_2022kr;
extern const struct mbfl_convert_vtbl vtbl_2022kr_wchar;
int mbfl_filt_conv_2022kr_wchar(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_wchar_2022kr(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_any_2022kr_flush(mbfl_convert_filter *filter);
#endif /* MBFL_MBFILTER_ISO2022_KR_H */

View file

@ -0,0 +1,100 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* The source code included in this files was separated from mbfilter.c
* by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
*
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "mbfilter.h"
#include "mbfilter_iso8859_1.h"
static const char *mbfl_encoding_8859_1_aliases[] = {"ISO_8859-1", "latin1", NULL};
const mbfl_encoding mbfl_encoding_8859_1 = {
mbfl_no_encoding_8859_1,
"ISO-8859-1",
"ISO-8859-1",
(const char *(*)[])&mbfl_encoding_8859_1_aliases,
NULL,
MBFL_ENCTYPE_SBCS
};
const struct mbfl_identify_vtbl vtbl_identify_8859_1 = {
mbfl_no_encoding_8859_1,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_common_dtor,
mbfl_filt_ident_true
};
const struct mbfl_convert_vtbl vtbl_8859_1_wchar = {
mbfl_no_encoding_8859_1,
mbfl_no_encoding_wchar,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_8859_1_wchar,
mbfl_filt_conv_common_flush
};
const struct mbfl_convert_vtbl vtbl_wchar_8859_1 = {
mbfl_no_encoding_wchar,
mbfl_no_encoding_8859_1,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_wchar_8859_1,
mbfl_filt_conv_common_flush
};
#define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
/*
* ISO-8859-1 => wchar
*/
int mbfl_filt_conv_8859_1_wchar(int c, mbfl_convert_filter *filter)
{
return (*filter->output_function)(c, filter->data);
}
/*
* wchar => ISO-8859-1
*/
int mbfl_filt_conv_wchar_8859_1(int c, mbfl_convert_filter *filter)
{
if (c >= 0 && c < 0x100) {
CK((*filter->output_function)(c, filter->data));
} else {
if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
CK(mbfl_filt_conv_illegal_output(c, filter));
}
}
return c;
}

View file

@ -0,0 +1,43 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* The source code included in this files was separated from mbfilter.h
* by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
*
*/
#ifndef MBFL_MBFILTER_ISO8859_1_H
#define MBFL_MBFILTER_ISO8859_1_H
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_8859_1;
extern const struct mbfl_identify_vtbl vtbl_identify_8859_1;
extern const struct mbfl_convert_vtbl vtbl_8859_1_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_8859_1;
int mbfl_filt_conv_8859_1_wchar(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_wchar_8859_1(int c, mbfl_convert_filter *filter);
#endif /* MBFL_MBFILTER_ISO8859_1_H */

View file

@ -0,0 +1,138 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* The source code included in this files was separated from mbfilter.c
* by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
*
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "mbfilter.h"
#include "mbfilter_iso8859_10.h"
#include "unicode_table_iso8859_10.h"
static const char *mbfl_encoding_8859_10_aliases[] = {"ISO_8859-10", "latin6", NULL};
const mbfl_encoding mbfl_encoding_8859_10 = {
mbfl_no_encoding_8859_10,
"ISO-8859-10",
"ISO-8859-10",
(const char *(*)[])&mbfl_encoding_8859_10_aliases,
NULL,
MBFL_ENCTYPE_SBCS
};
const struct mbfl_identify_vtbl vtbl_identify_8859_10 = {
mbfl_no_encoding_8859_10,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_common_dtor,
mbfl_filt_ident_true
};
const struct mbfl_convert_vtbl vtbl_8859_10_wchar = {
mbfl_no_encoding_8859_10,
mbfl_no_encoding_wchar,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_8859_10_wchar,
mbfl_filt_conv_common_flush
};
const struct mbfl_convert_vtbl vtbl_wchar_8859_10 = {
mbfl_no_encoding_wchar,
mbfl_no_encoding_8859_10,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_wchar_8859_10,
mbfl_filt_conv_common_flush
};
#define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
/*
* ISO-8859-10 => wchar
*/
int mbfl_filt_conv_8859_10_wchar(int c, mbfl_convert_filter *filter)
{
int s;
if (c >= 0 && c < 0xa0) {
s = c;
} else if (c >= 0xa0 && c < 0x100) {
s = iso8859_10_ucs_table[c - 0xa0];
if (s <= 0) {
s = c;
s &= MBFL_WCSPLANE_MASK;
s |= MBFL_WCSPLANE_8859_10;
}
} else {
s = c;
s &= MBFL_WCSGROUP_MASK;
s |= MBFL_WCSGROUP_THROUGH;
}
CK((*filter->output_function)(s, filter->data));
return c;
}
/*
* wchar => ISO-8859-10
*/
int mbfl_filt_conv_wchar_8859_10(int c, mbfl_convert_filter *filter)
{
int s, n;
if (c >= 0 && c < 0xa0) {
s = c;
} else {
s = -1;
n = 95;
while (n >= 0) {
if (c == iso8859_10_ucs_table[n]) {
s = 0xa0 + n;
break;
}
n--;
}
if (s <= 0 && (c & ~MBFL_WCSPLANE_MASK) == MBFL_WCSPLANE_8859_10) {
s = c & MBFL_WCSPLANE_MASK;
}
}
if (s >= 0) {
CK((*filter->output_function)(s, filter->data));
} else {
if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
CK(mbfl_filt_conv_illegal_output(c, filter));
}
}
return c;
}

View file

@ -0,0 +1,43 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* The source code included in this files was separated from mbfilter.c
* by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
*
*/
#ifndef MBFL_MBFILTER_ISO8859_10_H
#define MBFL_MBFILTER_ISO8859_10_H
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_8859_10;
extern const struct mbfl_identify_vtbl vtbl_identify_8859_10;
extern const struct mbfl_convert_vtbl vtbl_8859_10_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_8859_10;
int mbfl_filt_conv_8859_10_wchar(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_wchar_8859_10(int c, mbfl_convert_filter *filter);
#endif /* MBFL_MBFILTER_ISO8859_10_H */

View file

@ -0,0 +1,138 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* The source code included in this files was separated from mbfilter.c
* by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
*
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "mbfilter.h"
#include "mbfilter_iso8859_13.h"
#include "unicode_table_iso8859_13.h"
static const char *mbfl_encoding_8859_13_aliases[] = {"ISO_8859-13", NULL};
const mbfl_encoding mbfl_encoding_8859_13 = {
mbfl_no_encoding_8859_13,
"ISO-8859-13",
"ISO-8859-13",
(const char *(*)[])&mbfl_encoding_8859_13_aliases,
NULL,
MBFL_ENCTYPE_SBCS
};
const struct mbfl_identify_vtbl vtbl_identify_8859_13 = {
mbfl_no_encoding_8859_13,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_common_dtor,
mbfl_filt_ident_true
};
const struct mbfl_convert_vtbl vtbl_8859_13_wchar = {
mbfl_no_encoding_8859_13,
mbfl_no_encoding_wchar,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_8859_13_wchar,
mbfl_filt_conv_common_flush
};
const struct mbfl_convert_vtbl vtbl_wchar_8859_13 = {
mbfl_no_encoding_wchar,
mbfl_no_encoding_8859_13,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_wchar_8859_13,
mbfl_filt_conv_common_flush
};
#define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
/*
* ISO-8859-13 => wchar
*/
int mbfl_filt_conv_8859_13_wchar(int c, mbfl_convert_filter *filter)
{
int s;
if (c >= 0 && c < 0xa0) {
s = c;
} else if (c >= 0xa0 && c < 0x100) {
s = iso8859_13_ucs_table[c - 0xa0];
if (s <= 0) {
s = c;
s &= MBFL_WCSPLANE_MASK;
s |= MBFL_WCSPLANE_8859_13;
}
} else {
s = c;
s &= MBFL_WCSGROUP_MASK;
s |= MBFL_WCSGROUP_THROUGH;
}
CK((*filter->output_function)(s, filter->data));
return c;
}
/*
* wchar => ISO-8859-13
*/
int mbfl_filt_conv_wchar_8859_13(int c, mbfl_convert_filter *filter)
{
int s, n;
if (c >= 0 && c < 0xa0) {
s = c;
} else {
s = -1;
n = 95;
while (n >= 0) {
if (c == iso8859_13_ucs_table[n]) {
s = 0xa0 + n;
break;
}
n--;
}
if (s <= 0 && (c & ~MBFL_WCSPLANE_MASK) == MBFL_WCSPLANE_8859_13) {
s = c & MBFL_WCSPLANE_MASK;
}
}
if (s >= 0) {
CK((*filter->output_function)(s, filter->data));
} else {
if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
CK(mbfl_filt_conv_illegal_output(c, filter));
}
}
return c;
}

View file

@ -0,0 +1,43 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* The source code included in this files was separated from mbfilter.c
* by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
*
*/
#ifndef MBFL_MBFILTER_ISO8859_13_H
#define MBFL_MBFILTER_ISO8859_13_H
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_8859_13;
extern const struct mbfl_identify_vtbl vtbl_identify_8859_13;
extern const struct mbfl_convert_vtbl vtbl_8859_13_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_8859_13;
int mbfl_filt_conv_8859_13_wchar(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_wchar_8859_13(int c, mbfl_convert_filter *filter);
#endif /* MBFL_MBFILTER_ISO8859_13_H */

View file

@ -0,0 +1,138 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* The source code included in this files was separated from mbfilter.c
* by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
*
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "mbfilter.h"
#include "mbfilter_iso8859_14.h"
#include "unicode_table_iso8859_14.h"
static const char *mbfl_encoding_8859_14_aliases[] = {"ISO_8859-14", "latin8", NULL};
const mbfl_encoding mbfl_encoding_8859_14 = {
mbfl_no_encoding_8859_14,
"ISO-8859-14",
"ISO-8859-14",
(const char *(*)[])&mbfl_encoding_8859_14_aliases,
NULL,
MBFL_ENCTYPE_SBCS
};
const struct mbfl_identify_vtbl vtbl_identify_8859_14 = {
mbfl_no_encoding_8859_14,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_common_dtor,
mbfl_filt_ident_true
};
const struct mbfl_convert_vtbl vtbl_8859_14_wchar = {
mbfl_no_encoding_8859_14,
mbfl_no_encoding_wchar,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_8859_14_wchar,
mbfl_filt_conv_common_flush
};
const struct mbfl_convert_vtbl vtbl_wchar_8859_14 = {
mbfl_no_encoding_wchar,
mbfl_no_encoding_8859_14,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_wchar_8859_14,
mbfl_filt_conv_common_flush
};
#define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
/*
* ISO-8859-14 => wchar
*/
int mbfl_filt_conv_8859_14_wchar(int c, mbfl_convert_filter *filter)
{
int s;
if (c >= 0 && c < 0xa0) {
s = c;
} else if (c >= 0xa0 && c < 0x100) {
s = iso8859_14_ucs_table[c - 0xa0];
if (s <= 0) {
s = c;
s &= MBFL_WCSPLANE_MASK;
s |= MBFL_WCSPLANE_8859_14;
}
} else {
s = c;
s &= MBFL_WCSGROUP_MASK;
s |= MBFL_WCSGROUP_THROUGH;
}
CK((*filter->output_function)(s, filter->data));
return c;
}
/*
* wchar => ISO-8859-14
*/
int mbfl_filt_conv_wchar_8859_14(int c, mbfl_convert_filter *filter)
{
int s, n;
if (c >= 0 && c < 0xa0) {
s = c;
} else {
s = -1;
n = 95;
while (n >= 0) {
if (c == iso8859_14_ucs_table[n]) {
s = 0xa0 + n;
break;
}
n--;
}
if (s <= 0 && (c & ~MBFL_WCSPLANE_MASK) == MBFL_WCSPLANE_8859_14) {
s = c & MBFL_WCSPLANE_MASK;
}
}
if (s >= 0) {
CK((*filter->output_function)(s, filter->data));
} else {
if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
CK(mbfl_filt_conv_illegal_output(c, filter));
}
}
return c;
}

View file

@ -0,0 +1,26 @@
/*
* COPYRIGHT NOTICE
*
* This file is a portion of "streamable kanji code filter and converter"
* library, which is distributed under GNU Lesser General Public License
* version 2.1.
*
* The source code included in this files was separated from mbfilter.c
* by Moriyoshi Koizumi <moriyoshi@php.net> on 4 Dec 2002.
*
*/
#ifndef MBFL_MBFILTER_ISO8859_14_H
#define MBFL_MBFILTER_ISO8859_14_H
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_8859_14;
extern const struct mbfl_identify_vtbl vtbl_identify_8859_14;
extern const struct mbfl_convert_vtbl vtbl_8859_14_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_8859_14;
int mbfl_filt_conv_8859_14_wchar(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_wchar_8859_14(int c, mbfl_convert_filter *filter);
#endif /* MBFL_MBFILTER_ISO8859_14_H */

View file

@ -0,0 +1,136 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* The source code included in this files was separated from mbfilter.c
* by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
*
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "mbfilter.h"
#include "mbfilter_iso8859_15.h"
#include "unicode_table_iso8859_15.h"
static const char *mbfl_encoding_8859_15_aliases[] = {"ISO_8859-15", NULL};
const mbfl_encoding mbfl_encoding_8859_15 = {
mbfl_no_encoding_8859_15,
"ISO-8859-15",
"ISO-8859-15",
(const char *(*)[])&mbfl_encoding_8859_15_aliases,
NULL,
MBFL_ENCTYPE_SBCS
};
const struct mbfl_identify_vtbl vtbl_identify_8859_15 = {
mbfl_no_encoding_8859_15,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_common_dtor,
mbfl_filt_ident_true
};
const struct mbfl_convert_vtbl vtbl_8859_15_wchar = {
mbfl_no_encoding_8859_15,
mbfl_no_encoding_wchar,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_8859_15_wchar,
mbfl_filt_conv_common_flush
};
const struct mbfl_convert_vtbl vtbl_wchar_8859_15 = {
mbfl_no_encoding_wchar,
mbfl_no_encoding_8859_15,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_wchar_8859_15,
mbfl_filt_conv_common_flush
};
#define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
/*
* ISO-8859-15 => wchar
*/
int mbfl_filt_conv_8859_15_wchar(int c, mbfl_convert_filter *filter)
{
int s;
if (c >= 0 && c < 0xa0) {
s = c;
} else if (c >= 0xa0 && c < 0x100) {
s = iso8859_15_ucs_table[c - 0xa0];
if (s <= 0) {
s = c;
s &= MBFL_WCSPLANE_MASK;
s |= MBFL_WCSPLANE_8859_15;
}
} else {
s = c;
s &= MBFL_WCSGROUP_MASK;
s |= MBFL_WCSGROUP_THROUGH;
}
CK((*filter->output_function)(s, filter->data));
return c;
}
/*
* wchar => ISO-8859-15
*/
int mbfl_filt_conv_wchar_8859_15(int c, mbfl_convert_filter *filter)
{
int s, n;
if (c >= 0 && c < 0xa0) {
s = c;
} else {
s = -1;
n = 95;
while (n >= 0) {
if (c == iso8859_15_ucs_table[n]) {
s = 0xa0 + n;
break;
}
n--;
}
if (s <= 0 && (c & ~MBFL_WCSPLANE_MASK) == MBFL_WCSPLANE_8859_15) {
s = c & MBFL_WCSPLANE_MASK;
}
}
if (s >= 0) {
CK((*filter->output_function)(s, filter->data));
} else {
if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
CK(mbfl_filt_conv_illegal_output(c, filter));
}
}
return c;
}

View file

@ -0,0 +1,26 @@
/*
* COPYRIGHT NOTICE
*
* This file is a portion of "streamable kanji code filter and converter"
* library, which is distributed under GNU Lesser General Public License
* version 2.1.
*
* The source code included in this files was separated from mbfilter.c
* by Moriyoshi Koizumi <moriyoshi@php.net> on 4 Dec 2002.
*
*/
#ifndef MBFL_MBFILTER_ISO8859_15_H
#define MBFL_MBFILTER_ISO8859_15_H
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_8859_15;
extern const struct mbfl_identify_vtbl vtbl_identify_8859_15;
extern const struct mbfl_convert_vtbl vtbl_8859_15_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_8859_15;
int mbfl_filt_conv_8859_15_wchar(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_wchar_8859_15(int c, mbfl_convert_filter *filter);
#endif /* MBFL_MBFILTER_ISO8859_15_H */

View file

@ -0,0 +1,139 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* The source code included in this files was separated from mbfilter.c
* by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
*
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "mbfilter.h"
#include "mbfilter_iso8859_2.h"
#include "unicode_table_iso8859_2.h"
static const char *mbfl_encoding_8859_2_aliases[] = {"ISO_8859-2", "latin2", NULL};
const mbfl_encoding mbfl_encoding_8859_2 = {
mbfl_no_encoding_8859_2,
"ISO-8859-2",
"ISO-8859-2",
(const char *(*)[])&mbfl_encoding_8859_2_aliases,
NULL,
MBFL_ENCTYPE_SBCS
};
const struct mbfl_identify_vtbl vtbl_identify_8859_2 = {
mbfl_no_encoding_8859_2,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_common_dtor,
mbfl_filt_ident_true
};
const struct mbfl_convert_vtbl vtbl_8859_2_wchar = {
mbfl_no_encoding_8859_2,
mbfl_no_encoding_wchar,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_8859_2_wchar,
mbfl_filt_conv_common_flush
};
const struct mbfl_convert_vtbl vtbl_wchar_8859_2 = {
mbfl_no_encoding_wchar,
mbfl_no_encoding_8859_2,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_wchar_8859_2,
mbfl_filt_conv_common_flush
};
#define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
/*
* ISO-8859-2 => wchar
*/
int mbfl_filt_conv_8859_2_wchar(int c, mbfl_convert_filter *filter)
{
int s;
if (c >= 0 && c < 0xa0) {
s = c;
} else if (c >= 0xa0 && c < 0x100) {
s = iso8859_2_ucs_table[c - 0xa0];
if (s <= 0) {
s = c;
s &= MBFL_WCSPLANE_MASK;
s |= MBFL_WCSPLANE_8859_2;
}
} else {
s = c;
s &= MBFL_WCSGROUP_MASK;
s |= MBFL_WCSGROUP_THROUGH;
}
CK((*filter->output_function)(s, filter->data));
return c;
}
/*
* wchar => ISO-8859-2
*/
int mbfl_filt_conv_wchar_8859_2(int c, mbfl_convert_filter *filter)
{
int s, n;
if (c >= 0 && c < 0xa0) {
s = c;
} else {
s = -1;
n = 95;
while (n >= 0) {
if (c == iso8859_2_ucs_table[n]) {
s = 0xa0 + n;
break;
}
n--;
}
if (s <= 0 && (c & ~MBFL_WCSPLANE_MASK) == MBFL_WCSPLANE_8859_2) {
s = c & MBFL_WCSPLANE_MASK;
}
}
if (s >= 0) {
CK((*filter->output_function)(s, filter->data));
} else {
if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
CK(mbfl_filt_conv_illegal_output(c, filter));
}
}
return c;
}

View file

@ -0,0 +1,43 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* The source code included in this files was separated from mbfilter.c
* by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
*
*/
#ifndef MBFL_MBFILTER_ISO8859_2_H
#define MBFL_MBFILTER_ISO8859_2_H
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_8859_2;
extern const struct mbfl_identify_vtbl vtbl_identify_8859_2;
extern const struct mbfl_convert_vtbl vtbl_8859_2_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_8859_2;
int mbfl_filt_conv_8859_2_wchar(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_wchar_8859_2(int c, mbfl_convert_filter *filter);
#endif /* MBFL_MBFILTER_ISO8859_2_H */

View file

@ -0,0 +1,139 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* The source code included in this files was separated from mbfilter.c
* by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
*
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "mbfilter.h"
#include "mbfilter_iso8859_3.h"
#include "unicode_table_iso8859_3.h"
static const char *mbfl_encoding_8859_3_aliases[] = {"ISO_8859-3", "latin3", NULL};
const mbfl_encoding mbfl_encoding_8859_3 = {
mbfl_no_encoding_8859_3,
"ISO-8859-3",
"ISO-8859-3",
(const char *(*)[])&mbfl_encoding_8859_3_aliases,
NULL,
MBFL_ENCTYPE_SBCS
};
const struct mbfl_identify_vtbl vtbl_identify_8859_3 = {
mbfl_no_encoding_8859_3,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_common_dtor,
mbfl_filt_ident_true
};
const struct mbfl_convert_vtbl vtbl_8859_3_wchar = {
mbfl_no_encoding_8859_3,
mbfl_no_encoding_wchar,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_8859_3_wchar,
mbfl_filt_conv_common_flush
};
const struct mbfl_convert_vtbl vtbl_wchar_8859_3 = {
mbfl_no_encoding_wchar,
mbfl_no_encoding_8859_3,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_wchar_8859_3,
mbfl_filt_conv_common_flush
};
#define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
/*
* ISO-8859-3 => wchar
*/
int mbfl_filt_conv_8859_3_wchar(int c, mbfl_convert_filter *filter)
{
int s;
if (c >= 0 && c < 0xa0) {
s = c;
} else if (c >= 0xa0 && c < 0x100) {
s = iso8859_3_ucs_table[c - 0xa0];
if (s <= 0) {
s = c;
s &= MBFL_WCSPLANE_MASK;
s |= MBFL_WCSPLANE_8859_3;
}
} else {
s = c;
s &= MBFL_WCSGROUP_MASK;
s |= MBFL_WCSGROUP_THROUGH;
}
CK((*filter->output_function)(s, filter->data));
return c;
}
/*
* wchar => ISO-8859-3
*/
int mbfl_filt_conv_wchar_8859_3(int c, mbfl_convert_filter *filter)
{
int s, n;
if (c >= 0 && c < 0xa0) {
s = c;
} else {
s = -1;
n = 95;
while (n >= 0) {
if (c == iso8859_3_ucs_table[n]) {
s = 0xa0 + n;
break;
}
n--;
}
if (s <= 0 && (c & ~MBFL_WCSPLANE_MASK) == MBFL_WCSPLANE_8859_3) {
s = c & MBFL_WCSPLANE_MASK;
}
}
if (s >= 0) {
CK((*filter->output_function)(s, filter->data));
} else {
if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
CK(mbfl_filt_conv_illegal_output(c, filter));
}
}
return c;
}

View file

@ -0,0 +1,43 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* The source code included in this files was separated from mbfilter.c
* by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
*
*/
#ifndef MBFL_MBFILTER_ISO8859_3_H
#define MBFL_MBFILTER_ISO8859_3_H
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_8859_3;
extern const struct mbfl_identify_vtbl vtbl_identify_8859_3;
extern const struct mbfl_convert_vtbl vtbl_8859_3_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_8859_3;
int mbfl_filt_conv_8859_3_wchar(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_wchar_8859_3(int c, mbfl_convert_filter *filter);
#endif /* MBFL_MBFILTER_ISO8859_3_H */

View file

@ -0,0 +1,137 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* The source code included in this files was separated from mbfilter.c
* by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
*
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "mbfilter.h"
#include "mbfilter_iso8859_4.h"
#include "unicode_table_iso8859_4.h"
static const char *mbfl_encoding_8859_4_aliases[] = {"ISO_8859-4", "latin4", NULL};
const mbfl_encoding mbfl_encoding_8859_4 = {
mbfl_no_encoding_8859_4,
"ISO-8859-4",
"ISO-8859-4",
(const char *(*)[])&mbfl_encoding_8859_4_aliases,
NULL,
MBFL_ENCTYPE_SBCS
};
const struct mbfl_identify_vtbl vtbl_identify_8859_4 = {
mbfl_no_encoding_8859_4,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_common_dtor,
mbfl_filt_ident_true };
const struct mbfl_convert_vtbl vtbl_8859_4_wchar = {
mbfl_no_encoding_8859_4,
mbfl_no_encoding_wchar,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_8859_4_wchar,
mbfl_filt_conv_common_flush
};
const struct mbfl_convert_vtbl vtbl_wchar_8859_4 = {
mbfl_no_encoding_wchar,
mbfl_no_encoding_8859_4,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_wchar_8859_4,
mbfl_filt_conv_common_flush
};
#define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
/*
* ISO-8859-4 => wchar
*/
int mbfl_filt_conv_8859_4_wchar(int c, mbfl_convert_filter *filter)
{
int s;
if (c >= 0 && c < 0xa0) {
s = c;
} else if (c >= 0xa0 && c < 0x100) {
s = iso8859_4_ucs_table[c - 0xa0];
if (s <= 0) {
s = c;
s &= MBFL_WCSPLANE_MASK;
s |= MBFL_WCSPLANE_8859_4;
}
} else {
s = c;
s &= MBFL_WCSGROUP_MASK;
s |= MBFL_WCSGROUP_THROUGH;
}
CK((*filter->output_function)(s, filter->data));
return c;
}
/*
* wchar => ISO-8859-4
*/
int mbfl_filt_conv_wchar_8859_4(int c, mbfl_convert_filter *filter)
{
int s, n;
if (c >= 0 && c < 0xa0) {
s = c;
} else {
s = -1;
n = 95;
while (n >= 0) {
if (c == iso8859_4_ucs_table[n]) {
s = 0xa0 + n;
break;
}
n--;
}
if (s <= 0 && (c & ~MBFL_WCSPLANE_MASK) == MBFL_WCSPLANE_8859_4) {
s = c & MBFL_WCSPLANE_MASK;
}
}
if (s >= 0) {
CK((*filter->output_function)(s, filter->data));
} else {
if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
CK(mbfl_filt_conv_illegal_output(c, filter));
}
}
return c;
}

View file

@ -0,0 +1,41 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* The source code included in this files was separated from mbfilter.c
* by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
*
*/
#ifndef MBFL_MBFILTER_ISO8859_4_H
#define MBFL_MBFILTER_ISO8859_4_H
extern const mbfl_encoding mbfl_encoding_8859_4;
extern const struct mbfl_identify_vtbl vtbl_identify_8859_4;
extern const struct mbfl_convert_vtbl vtbl_8859_4_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_8859_4;
int mbfl_filt_conv_8859_4_wchar(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_wchar_8859_4(int c, mbfl_convert_filter *filter);
#endif /* MBFL_MBFILTER_ISO8859_4_H */

View file

@ -0,0 +1,139 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* The source code included in this files was separated from mbfilter.c
* by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
*
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "mbfilter.h"
#include "mbfilter_iso8859_5.h"
#include "unicode_table_iso8859_5.h"
static const char *mbfl_encoding_8859_5_aliases[] = {"ISO_8859-5", "cyrillic", NULL};
const mbfl_encoding mbfl_encoding_8859_5 = {
mbfl_no_encoding_8859_5,
"ISO-8859-5",
"ISO-8859-5",
(const char *(*)[])&mbfl_encoding_8859_5_aliases,
NULL,
MBFL_ENCTYPE_SBCS
};
const struct mbfl_identify_vtbl vtbl_identify_8859_5 = {
mbfl_no_encoding_8859_5,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_common_dtor,
mbfl_filt_ident_true
};
const struct mbfl_convert_vtbl vtbl_8859_5_wchar = {
mbfl_no_encoding_8859_5,
mbfl_no_encoding_wchar,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_8859_5_wchar,
mbfl_filt_conv_common_flush
};
const struct mbfl_convert_vtbl vtbl_wchar_8859_5 = {
mbfl_no_encoding_wchar,
mbfl_no_encoding_8859_5,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_wchar_8859_5,
mbfl_filt_conv_common_flush
};
#define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
/*
* ISO-8859-5 => wchar
*/
int mbfl_filt_conv_8859_5_wchar(int c, mbfl_convert_filter *filter)
{
int s;
if (c >= 0 && c < 0xa0) {
s = c;
} else if (c >= 0xa0 && c < 0x100) {
s = iso8859_5_ucs_table[c - 0xa0];
if (s <= 0) {
s = c;
s &= MBFL_WCSPLANE_MASK;
s |= MBFL_WCSPLANE_8859_5;
}
} else {
s = c;
s &= MBFL_WCSGROUP_MASK;
s |= MBFL_WCSGROUP_THROUGH;
}
CK((*filter->output_function)(s, filter->data));
return c;
}
/*
* wchar => ISO-8859-5
*/
int mbfl_filt_conv_wchar_8859_5(int c, mbfl_convert_filter *filter)
{
int s, n;
if (c >= 0 && c < 0xa0) {
s = c;
} else {
s = -1;
n = 95;
while (n >= 0) {
if (c == iso8859_5_ucs_table[n]) {
s = 0xa0 + n;
break;
}
n--;
}
if (s <= 0 && (c & ~MBFL_WCSPLANE_MASK) == MBFL_WCSPLANE_8859_5) {
s = c & MBFL_WCSPLANE_MASK;
}
}
if (s >= 0) {
CK((*filter->output_function)(s, filter->data));
} else {
if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
CK(mbfl_filt_conv_illegal_output(c, filter));
}
}
return c;
}

View file

@ -0,0 +1,43 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* The source code included in this files was separated from mbfilter.c
* by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
*
*/
#ifndef MBFL_MBFILTER_ISO8859_5_H
#define MBFL_MBFILTER_ISO8859_5_H
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_8859_5;
extern const struct mbfl_identify_vtbl vtbl_identify_8859_5;
extern const struct mbfl_convert_vtbl vtbl_8859_5_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_8859_5;
int mbfl_filt_conv_8859_5_wchar(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_wchar_8859_5(int c, mbfl_convert_filter *filter);
#endif /* MBFL_MBFILTER_ISO8859_5_H */

View file

@ -0,0 +1,139 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* The source code included in this files was separated from mbfilter.c
* by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
*
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "mbfilter.h"
#include "mbfilter_iso8859_6.h"
#include "unicode_table_iso8859_6.h"
static const char *mbfl_encoding_8859_6_aliases[] = {"ISO_8859-6", "arabic", NULL};
const mbfl_encoding mbfl_encoding_8859_6 = {
mbfl_no_encoding_8859_6,
"ISO-8859-6",
"ISO-8859-6",
(const char *(*)[])&mbfl_encoding_8859_6_aliases,
NULL,
MBFL_ENCTYPE_SBCS
};
const struct mbfl_identify_vtbl vtbl_identify_8859_6 = {
mbfl_no_encoding_8859_6,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_common_dtor,
mbfl_filt_ident_true
};
const struct mbfl_convert_vtbl vtbl_8859_6_wchar = {
mbfl_no_encoding_8859_6,
mbfl_no_encoding_wchar,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_8859_6_wchar,
mbfl_filt_conv_common_flush
};
const struct mbfl_convert_vtbl vtbl_wchar_8859_6 = {
mbfl_no_encoding_wchar,
mbfl_no_encoding_8859_6,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_wchar_8859_6,
mbfl_filt_conv_common_flush
};
#define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
/*
* ISO-8859-6 => wchar
*/
int mbfl_filt_conv_8859_6_wchar(int c, mbfl_convert_filter *filter)
{
int s;
if (c >= 0 && c < 0xa0) {
s = c;
} else if (c >= 0xa0 && c < 0x100) {
s = iso8859_6_ucs_table[c - 0xa0];
if (s <= 0) {
s = c;
s &= MBFL_WCSPLANE_MASK;
s |= MBFL_WCSPLANE_8859_6;
}
} else {
s = c;
s &= MBFL_WCSGROUP_MASK;
s |= MBFL_WCSGROUP_THROUGH;
}
CK((*filter->output_function)(s, filter->data));
return c;
}
/*
* wchar => ISO-8859-6
*/
int mbfl_filt_conv_wchar_8859_6(int c, mbfl_convert_filter *filter)
{
int s, n;
if (c >= 0 && c < 0xa0) {
s = c;
} else {
s = -1;
n = 95;
while (n >= 0) {
if (c == iso8859_6_ucs_table[n]) {
s = 0xa0 + n;
break;
}
n--;
}
if (s <= 0 && (c & ~MBFL_WCSPLANE_MASK) == MBFL_WCSPLANE_8859_6) {
s = c & MBFL_WCSPLANE_MASK;
}
}
if (s >= 0) {
CK((*filter->output_function)(s, filter->data));
} else {
if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
CK(mbfl_filt_conv_illegal_output(c, filter));
}
}
return c;
}

View file

@ -0,0 +1,43 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* The source code included in this files was separated from mbfilter.c
* by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
*
*/
#ifndef MBFL_MBFILTER_ISO8859_6_H
#define MBFL_MBFILTER_ISO8859_6_H
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_8859_6;
extern const struct mbfl_identify_vtbl vtbl_identify_8859_6;
extern const struct mbfl_convert_vtbl vtbl_8859_6_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_8859_6;
int mbfl_filt_conv_8859_6_wchar(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_wchar_8859_6(int c, mbfl_convert_filter *filter);
#endif /* MBFL_MBFILTER_ISO8859_6_H */

View file

@ -0,0 +1,139 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* The source code included in this files was separated from mbfilter.c
* by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
*
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "mbfilter.h"
#include "mbfilter_iso8859_7.h"
#include "unicode_table_iso8859_7.h"
static const char *mbfl_encoding_8859_7_aliases[] = {"ISO_8859-7", "greek", NULL};
const mbfl_encoding mbfl_encoding_8859_7 = {
mbfl_no_encoding_8859_7,
"ISO-8859-7",
"ISO-8859-7",
(const char *(*)[])&mbfl_encoding_8859_7_aliases,
NULL,
MBFL_ENCTYPE_SBCS
};
const struct mbfl_identify_vtbl vtbl_identify_8859_7 = {
mbfl_no_encoding_8859_7,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_common_dtor,
mbfl_filt_ident_true
};
const struct mbfl_convert_vtbl vtbl_8859_7_wchar = {
mbfl_no_encoding_8859_7,
mbfl_no_encoding_wchar,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_8859_7_wchar,
mbfl_filt_conv_common_flush
};
const struct mbfl_convert_vtbl vtbl_wchar_8859_7 = {
mbfl_no_encoding_wchar,
mbfl_no_encoding_8859_7,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_wchar_8859_7,
mbfl_filt_conv_common_flush
};
#define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
/*
* ISO-8859-7 => wchar
*/
int mbfl_filt_conv_8859_7_wchar(int c, mbfl_convert_filter *filter)
{
int s;
if (c >= 0 && c < 0xa0) {
s = c;
} else if (c >= 0xa0 && c < 0x100) {
s = iso8859_7_ucs_table[c - 0xa0];
if (s <= 0) {
s = c;
s &= MBFL_WCSPLANE_MASK;
s |= MBFL_WCSPLANE_8859_7;
}
} else {
s = c;
s &= MBFL_WCSGROUP_MASK;
s |= MBFL_WCSGROUP_THROUGH;
}
CK((*filter->output_function)(s, filter->data));
return c;
}
/*
* wchar => ISO-8859-7
*/
int mbfl_filt_conv_wchar_8859_7(int c, mbfl_convert_filter *filter)
{
int s, n;
if (c >= 0 && c < 0xa0) {
s = c;
} else {
s = -1;
n = 95;
while (n >= 0) {
if (c == iso8859_7_ucs_table[n]) {
s = 0xa0 + n;
break;
}
n--;
}
if (s <= 0 && (c & ~MBFL_WCSPLANE_MASK) == MBFL_WCSPLANE_8859_7) {
s = c & MBFL_WCSPLANE_MASK;
}
}
if (s >= 0) {
CK((*filter->output_function)(s, filter->data));
} else {
if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
CK(mbfl_filt_conv_illegal_output(c, filter));
}
}
return c;
}

View file

@ -0,0 +1,43 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* The source code included in this files was separated from mbfilter.c
* by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
*
*/
#ifndef MBFL_MBFILTER_ISO8859_7_H
#define MBFL_MBFILTER_ISO8859_7_H
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_8859_7;
extern const struct mbfl_identify_vtbl vtbl_identify_8859_7;
extern const struct mbfl_convert_vtbl vtbl_8859_7_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_8859_7;
int mbfl_filt_conv_8859_7_wchar(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_wchar_8859_7(int c, mbfl_convert_filter *filter);
#endif /* MBFL_MBFILTER_ISO8859_7_H */

View file

@ -0,0 +1,137 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* The source code included in this files was separated from mbfilter.c
* by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
*
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "mbfilter.h"
#include "mbfilter_iso8859_8.h"
#include "unicode_table_iso8859_8.h"
static const char *mbfl_encoding_8859_8_aliases[] = {"ISO_8859-8", "hebrew", NULL};
const mbfl_encoding mbfl_encoding_8859_8 = {
mbfl_no_encoding_8859_8,
"ISO-8859-8",
"ISO-8859-8",
(const char *(*)[])&mbfl_encoding_8859_8_aliases,
NULL,
MBFL_ENCTYPE_SBCS
};
const struct mbfl_identify_vtbl vtbl_identify_8859_8 = {
mbfl_no_encoding_8859_8,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_common_dtor,
mbfl_filt_ident_true
};
const struct mbfl_convert_vtbl vtbl_8859_8_wchar = {
mbfl_no_encoding_8859_8,
mbfl_no_encoding_wchar,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_8859_8_wchar,
mbfl_filt_conv_common_flush
};
const struct mbfl_convert_vtbl vtbl_wchar_8859_8 = {
mbfl_no_encoding_wchar,
mbfl_no_encoding_8859_8,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_wchar_8859_8,
mbfl_filt_conv_common_flush
};
#define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
/*
* ISO-8859-8 => wchar
*/
int mbfl_filt_conv_8859_8_wchar(int c, mbfl_convert_filter *filter)
{
int s;
if (c >= 0 && c < 0xa0) {
s = c;
} else if (c >= 0xa0 && c < 0x100) {
s = iso8859_8_ucs_table[c - 0xa0];
if (s <= 0) {
s = c;
s &= MBFL_WCSPLANE_MASK;
s |= MBFL_WCSPLANE_8859_8;
}
} else {
s = c;
s &= MBFL_WCSGROUP_MASK;
s |= MBFL_WCSGROUP_THROUGH;
}
CK((*filter->output_function)(s, filter->data));
return c;
}
/*
* wchar => ISO-8859-8
*/
int mbfl_filt_conv_wchar_8859_8(int c, mbfl_convert_filter *filter)
{
int s, n;
if (c >= 0 && c < 0xa0) {
s = c;
} else {
s = -1;
n = 95;
while (n >= 0) {
if (c == iso8859_8_ucs_table[n]) {
s = 0xa0 + n;
break;
}
n--;
}
if (s <= 0 && (c & ~MBFL_WCSPLANE_MASK) == MBFL_WCSPLANE_8859_8) {
s = c & MBFL_WCSPLANE_MASK;
}
}
if (s >= 0) {
CK((*filter->output_function)(s, filter->data));
} else {
if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
CK(mbfl_filt_conv_illegal_output(c, filter));
}
}
return c;
}

View file

@ -0,0 +1,43 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* The source code included in this files was separated from mbfilter.c
* by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
*
*/
#ifndef MBFL_MBFILTER_ISO8859_8_H
#define MBFL_MBFILTER_ISO8859_8_H
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_8859_8;
extern const struct mbfl_identify_vtbl vtbl_identify_8859_8;
extern const struct mbfl_convert_vtbl vtbl_8859_8_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_8859_8;
int mbfl_filt_conv_8859_8_wchar(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_wchar_8859_8(int c, mbfl_convert_filter *filter);
#endif /* MBFL_MBFILTER_ISO8859_8_H */

View file

@ -0,0 +1,138 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* The source code included in this files was separated from mbfilter.c
* by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
*
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "mbfilter.h"
#include "mbfilter_iso8859_9.h"
#include "unicode_table_iso8859_9.h"
static const char *mbfl_encoding_8859_9_aliases[] = {"ISO_8859-9", "latin5", NULL};
const mbfl_encoding mbfl_encoding_8859_9 = {
mbfl_no_encoding_8859_9,
"ISO-8859-9",
"ISO-8859-9",
(const char *(*)[])&mbfl_encoding_8859_9_aliases,
NULL,
MBFL_ENCTYPE_SBCS
};
const struct mbfl_identify_vtbl vtbl_identify_8859_9 = {
mbfl_no_encoding_8859_9,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_common_dtor,
mbfl_filt_ident_true
};
const struct mbfl_convert_vtbl vtbl_8859_9_wchar = {
mbfl_no_encoding_8859_9,
mbfl_no_encoding_wchar,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_8859_9_wchar,
mbfl_filt_conv_common_flush
};
const struct mbfl_convert_vtbl vtbl_wchar_8859_9 = {
mbfl_no_encoding_wchar,
mbfl_no_encoding_8859_9,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_wchar_8859_9,
mbfl_filt_conv_common_flush
};
#define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
/*
* ISO-8859-9 => wchar
*/
int mbfl_filt_conv_8859_9_wchar(int c, mbfl_convert_filter *filter)
{
int s;
if (c >= 0 && c < 0xa0) {
s = c;
} else if (c >= 0xa0 && c < 0x100) {
s = iso8859_9_ucs_table[c - 0xa0];
if (s <= 0) {
s = c;
s &= MBFL_WCSPLANE_MASK;
s |= MBFL_WCSPLANE_8859_9;
}
} else {
s = c;
s &= MBFL_WCSGROUP_MASK;
s |= MBFL_WCSGROUP_THROUGH;
}
CK((*filter->output_function)(s, filter->data));
return c;
}
/*
* wchar => ISO-8859-9
*/
int mbfl_filt_conv_wchar_8859_9(int c, mbfl_convert_filter *filter)
{
int s, n;
if (c >= 0 && c < 0xa0) {
s = c;
} else {
s = -1;
n = 95;
while (n >= 0) {
if (c == iso8859_9_ucs_table[n]) {
s = 0xa0 + n;
break;
}
n--;
}
if (s <= 0 && (c & ~MBFL_WCSPLANE_MASK) == MBFL_WCSPLANE_8859_9) {
s = c & MBFL_WCSPLANE_MASK;
}
}
if (s >= 0) {
CK((*filter->output_function)(s, filter->data));
} else {
if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
CK(mbfl_filt_conv_illegal_output(c, filter));
}
}
return c;
}

View file

@ -0,0 +1,43 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* The source code included in this files was separated from mbfilter.c
* by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
*
*/
#ifndef MBFL_MBFILTER_ISO8859_9_H
#define MBFL_MBFILTER_ISO8859_9_H
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_8859_9;
extern const struct mbfl_identify_vtbl vtbl_identify_8859_9;
extern const struct mbfl_convert_vtbl vtbl_8859_9_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_8859_9;
int mbfl_filt_conv_8859_9_wchar(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_wchar_8859_9(int c, mbfl_convert_filter *filter);
#endif /* MBFL_MBFILTER_ISO8859_9_H */

View file

@ -0,0 +1,658 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* The source code included in this files was separated from mbfilter_ja.c
* by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
*
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "mbfilter.h"
#include "mbfilter_jis.h"
#include "unicode_table_cp932_ext.h"
#include "unicode_table_jis.h"
static int mbfl_filt_ident_jis(int c, mbfl_identify_filter *filter);
static int mbfl_filt_ident_2022jp(int c, mbfl_identify_filter *filter);
const mbfl_encoding mbfl_encoding_jis = {
mbfl_no_encoding_jis,
"JIS",
"ISO-2022-JP",
NULL,
NULL,
MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE
};
const mbfl_encoding mbfl_encoding_2022jp = {
mbfl_no_encoding_2022jp,
"ISO-2022-JP",
"ISO-2022-JP",
NULL,
NULL,
MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE
};
const struct mbfl_identify_vtbl vtbl_identify_jis = {
mbfl_no_encoding_jis,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_common_dtor,
mbfl_filt_ident_jis
};
const struct mbfl_identify_vtbl vtbl_identify_2022jp = {
mbfl_no_encoding_2022jp,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_common_dtor,
mbfl_filt_ident_2022jp
};
const struct mbfl_convert_vtbl vtbl_jis_wchar = {
mbfl_no_encoding_jis,
mbfl_no_encoding_wchar,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_jis_wchar,
mbfl_filt_conv_common_flush
};
const struct mbfl_convert_vtbl vtbl_wchar_jis = {
mbfl_no_encoding_wchar,
mbfl_no_encoding_jis,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_wchar_jis,
mbfl_filt_conv_any_jis_flush
};
const struct mbfl_convert_vtbl vtbl_2022jp_wchar = {
mbfl_no_encoding_2022jp,
mbfl_no_encoding_wchar,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_jis_wchar,
mbfl_filt_conv_common_flush
};
const struct mbfl_convert_vtbl vtbl_wchar_2022jp = {
mbfl_no_encoding_wchar,
mbfl_no_encoding_2022jp,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_wchar_2022jp,
mbfl_filt_conv_any_jis_flush
};
#define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
/*
* JIS => wchar
*/
int
mbfl_filt_conv_jis_wchar(int c, mbfl_convert_filter *filter)
{
int c1, s, w;
retry:
switch (filter->status & 0xf) {
/* case 0x00: ASCII */
/* case 0x10: X 0201 latin */
/* case 0x20: X 0201 kana */
/* case 0x80: X 0208 */
/* case 0x90: X 0212 */
case 0:
if (c == 0x1b) {
filter->status += 2;
} else if (c == 0x0e) { /* "kana in" */
filter->status = 0x20;
} else if (c == 0x0f) { /* "kana out" */
filter->status = 0;
} else if (filter->status == 0x10 && c == 0x5c) { /* YEN SIGN */
CK((*filter->output_function)(0xa5, filter->data));
} else if (filter->status == 0x10 && c == 0x7e) { /* OVER LINE */
CK((*filter->output_function)(0x203e, filter->data));
} else if (filter->status == 0x20 && c > 0x20 && c < 0x60) { /* kana */
CK((*filter->output_function)(0xff40 + c, filter->data));
} else if ((filter->status == 0x80 || filter->status == 0x90) && c > 0x20 && c < 0x7f) { /* kanji first char */
filter->cache = c;
filter->status += 1;
} else if (c >= 0 && c < 0x80) { /* latin, CTLs */
CK((*filter->output_function)(c, filter->data));
} else if (c > 0xa0 && c < 0xe0) { /* GR kana */
CK((*filter->output_function)(0xfec0 + c, filter->data));
} else {
w = c & MBFL_WCSGROUP_MASK;
w |= MBFL_WCSGROUP_THROUGH;
CK((*filter->output_function)(w, filter->data));
}
break;
/* case 0x81: X 0208 second char */
/* case 0x91: X 0212 second char */
case 1:
filter->status &= ~0xf;
c1 = filter->cache;
if (c > 0x20 && c < 0x7f) {
s = (c1 - 0x21)*94 + c - 0x21;
if (filter->status == 0x80) {
if (s >= 0 && s < jisx0208_ucs_table_size) {
w = jisx0208_ucs_table[s];
} else {
w = 0;
}
if (w <= 0) {
w = (c1 << 8) | c;
w &= MBFL_WCSPLANE_MASK;
w |= MBFL_WCSPLANE_JIS0208;
}
} else {
if (s >= 0 && s < jisx0212_ucs_table_size) {
w = jisx0212_ucs_table[s];
} else {
w = 0;
}
if (w <= 0) {
w = (c1 << 8) | c;
w &= MBFL_WCSPLANE_MASK;
w |= MBFL_WCSPLANE_JIS0212;
}
}
CK((*filter->output_function)(w, filter->data));
} else if (c == 0x1b) {
filter->status += 2;
} else if ((c >= 0 && c < 0x21) || c == 0x7f) { /* CTLs */
CK((*filter->output_function)(c, filter->data));
} else {
w = (c1 << 8) | c;
w &= MBFL_WCSGROUP_MASK;
w |= MBFL_WCSGROUP_THROUGH;
CK((*filter->output_function)(w, filter->data));
}
break;
/* ESC */
/* case 0x02: */
/* case 0x12: */
/* case 0x22: */
/* case 0x82: */
/* case 0x92: */
case 2:
if (c == 0x24) { /* '$' */
filter->status++;
} else if (c == 0x28) { /* '(' */
filter->status += 3;
} else {
filter->status &= ~0xf;
CK((*filter->output_function)(0x1b, filter->data));
goto retry;
}
break;
/* ESC $ */
/* case 0x03: */
/* case 0x13: */
/* case 0x23: */
/* case 0x83: */
/* case 0x93: */
case 3:
if (c == 0x40 || c == 0x42) { /* '@' or 'B' */
filter->status = 0x80;
} else if (c == 0x28) { /* '(' */
filter->status++;
} else {
filter->status &= ~0xf;
CK((*filter->output_function)(0x1b, filter->data));
CK((*filter->output_function)(0x24, filter->data));
goto retry;
}
break;
/* ESC $ ( */
/* case 0x04: */
/* case 0x14: */
/* case 0x24: */
/* case 0x84: */
/* case 0x94: */
case 4:
if (c == 0x40 || c == 0x42) { /* '@' or 'B' */
filter->status = 0x80;
} else if (c == 0x44) { /* 'D' */
filter->status = 0x90;
} else {
filter->status &= ~0xf;
CK((*filter->output_function)(0x1b, filter->data));
CK((*filter->output_function)(0x24, filter->data));
CK((*filter->output_function)(0x28, filter->data));
goto retry;
}
break;
/* ESC ( */
/* case 0x05: */
/* case 0x15: */
/* case 0x25: */
/* case 0x85: */
/* case 0x95: */
case 5:
if (c == 0x42 || c == 0x48) { /* 'B' or 'H' */
filter->status = 0;
} else if (c == 0x4a) { /* 'J' */
filter->status = 0x10;
} else if (c == 0x49) { /* 'I' */
filter->status = 0x20;
} else {
filter->status &= ~0xf;
CK((*filter->output_function)(0x1b, filter->data));
CK((*filter->output_function)(0x28, filter->data));
goto retry;
}
break;
default:
filter->status = 0;
break;
}
return c;
}
/*
* wchar => JIS
*/
int
mbfl_filt_conv_wchar_jis(int c, mbfl_convert_filter *filter)
{
int c1, s;
s = 0;
if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) {
s = ucs_a1_jis_table[c - ucs_a1_jis_table_min];
} else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) {
s = ucs_a2_jis_table[c - ucs_a2_jis_table_min];
} else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) {
s = ucs_i_jis_table[c - ucs_i_jis_table_min];
} else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) {
s = ucs_r_jis_table[c - ucs_r_jis_table_min];
}
if (s <= 0) {
c1 = c & ~MBFL_WCSPLANE_MASK;
if (c1 == MBFL_WCSPLANE_JIS0208) {
s = c & MBFL_WCSPLANE_MASK;
} else if (c1 == MBFL_WCSPLANE_JIS0212) {
s = c & MBFL_WCSPLANE_MASK;
s |= 0x8080;
} else if (c == 0xa5) { /* YEN SIGN */
s = 0x1005c;
} else if (c == 0x203e) { /* OVER LINE */
s = 0x1007e;
} else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */
s = 0x2140;
} else if (c == 0xff5e) { /* FULLWIDTH TILDE */
s = 0x2141;
} else if (c == 0x2225) { /* PARALLEL TO */
s = 0x2142;
} else if (c == 0xff0d) { /* FULLWIDTH HYPHEN-MINUS */
s = 0x215d;
} else if (c == 0xffe0) { /* FULLWIDTH CENT SIGN */
s = 0x2171;
} else if (c == 0xffe1) { /* FULLWIDTH POUND SIGN */
s = 0x2172;
} else if (c == 0xffe2) { /* FULLWIDTH NOT SIGN */
s = 0x224c;
}
if (c == 0) {
s = 0;
} else if (s <= 0) {
s = -1;
}
}
if (s >= 0) {
if (s < 0x80) { /* ASCII */
if ((filter->status & 0xff00) != 0) {
CK((*filter->output_function)(0x1b, filter->data)); /* ESC */
CK((*filter->output_function)(0x28, filter->data)); /* '(' */
CK((*filter->output_function)(0x42, filter->data)); /* 'B' */
}
filter->status = 0;
CK((*filter->output_function)(s, filter->data));
} else if (s < 0x100) { /* kana */
if ((filter->status & 0xff00) != 0x100) {
CK((*filter->output_function)(0x1b, filter->data)); /* ESC */
CK((*filter->output_function)(0x28, filter->data)); /* '(' */
CK((*filter->output_function)(0x49, filter->data)); /* 'I' */
}
filter->status = 0x100;
CK((*filter->output_function)(s & 0x7f, filter->data));
} else if (s < 0x8080) { /* X 0208 */
if ((filter->status & 0xff00) != 0x200) {
CK((*filter->output_function)(0x1b, filter->data)); /* ESC */
CK((*filter->output_function)(0x24, filter->data)); /* '$' */
CK((*filter->output_function)(0x42, filter->data)); /* 'B' */
}
filter->status = 0x200;
CK((*filter->output_function)((s >> 8) & 0x7f, filter->data));
CK((*filter->output_function)(s & 0x7f, filter->data));
} else if (s < 0x10000) { /* X 0212 */
if ((filter->status & 0xff00) != 0x300) {
CK((*filter->output_function)(0x1b, filter->data)); /* ESC */
CK((*filter->output_function)(0x24, filter->data)); /* '$' */
CK((*filter->output_function)(0x28, filter->data)); /* '(' */
CK((*filter->output_function)(0x44, filter->data)); /* 'D' */
}
filter->status = 0x300;
CK((*filter->output_function)((s >> 8) & 0x7f, filter->data));
CK((*filter->output_function)(s & 0x7f, filter->data));
} else { /* X 0201 latin */
if ((filter->status & 0xff00) != 0x400) {
CK((*filter->output_function)(0x1b, filter->data)); /* ESC */
CK((*filter->output_function)(0x28, filter->data)); /* '(' */
CK((*filter->output_function)(0x4a, filter->data)); /* 'J' */
}
filter->status = 0x400;
CK((*filter->output_function)(s & 0x7f, filter->data));
}
} else {
if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
CK(mbfl_filt_conv_illegal_output(c, filter));
}
}
return c;
}
/*
* wchar => ISO-2022-JP
*/
int
mbfl_filt_conv_wchar_2022jp(int c, mbfl_convert_filter *filter)
{
int s;
s = 0;
if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) {
s = ucs_a1_jis_table[c - ucs_a1_jis_table_min];
} else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) {
s = ucs_a2_jis_table[c - ucs_a2_jis_table_min];
} else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) {
s = ucs_i_jis_table[c - ucs_i_jis_table_min];
} else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) {
s = ucs_r_jis_table[c - ucs_r_jis_table_min];
}
if (s <= 0) {
if (c == 0xa5) { /* YEN SIGN */
s = 0x1005c;
} else if (c == 0x203e) { /* OVER LINE */
s = 0x1007e;
} else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */
s = 0x2140;
} else if (c == 0xff5e) { /* FULLWIDTH TILDE */
s = 0x2141;
} else if (c == 0x2225) { /* PARALLEL TO */
s = 0x2142;
} else if (c == 0xff0d) { /* FULLWIDTH HYPHEN-MINUS */
s = 0x215d;
} else if (c == 0xffe0) { /* FULLWIDTH CENT SIGN */
s = 0x2171;
} else if (c == 0xffe1) { /* FULLWIDTH POUND SIGN */
s = 0x2172;
} else if (c == 0xffe2) { /* FULLWIDTH NOT SIGN */
s = 0x224c;
}
if (c == 0) {
s = 0;
} else if (s <= 0) {
s = -1;
}
} else if ((s >= 0x80 && s < 0x2121) || (s > 0x8080)) {
s = -1;
}
if (s >= 0) {
if (s < 0x80) { /* ASCII */
if ((filter->status & 0xff00) != 0) {
CK((*filter->output_function)(0x1b, filter->data)); /* ESC */
CK((*filter->output_function)(0x28, filter->data)); /* '(' */
CK((*filter->output_function)(0x42, filter->data)); /* 'B' */
}
filter->status = 0;
CK((*filter->output_function)(s, filter->data));
} else if (s < 0x10000) { /* X 0208 */
if ((filter->status & 0xff00) != 0x200) {
CK((*filter->output_function)(0x1b, filter->data)); /* ESC */
CK((*filter->output_function)(0x24, filter->data)); /* '$' */
CK((*filter->output_function)(0x42, filter->data)); /* 'B' */
}
filter->status = 0x200;
CK((*filter->output_function)((s >> 8) & 0x7f, filter->data));
CK((*filter->output_function)(s & 0x7f, filter->data));
} else { /* X 0201 latin */
if ((filter->status & 0xff00) != 0x400) {
CK((*filter->output_function)(0x1b, filter->data)); /* ESC */
CK((*filter->output_function)(0x28, filter->data)); /* '(' */
CK((*filter->output_function)(0x4a, filter->data)); /* 'J' */
}
filter->status = 0x400;
CK((*filter->output_function)(s & 0x7f, filter->data));
}
} else {
if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
CK(mbfl_filt_conv_illegal_output(c, filter));
}
}
return c;
}
int
mbfl_filt_conv_any_jis_flush(mbfl_convert_filter *filter)
{
/* back to latin */
if ((filter->status & 0xff00) != 0) {
CK((*filter->output_function)(0x1b, filter->data)); /* ESC */
CK((*filter->output_function)(0x28, filter->data)); /* '(' */
CK((*filter->output_function)(0x42, filter->data)); /* 'B' */
}
filter->status &= 0xff;
return 0;
}
static int mbfl_filt_ident_jis(int c, mbfl_identify_filter *filter)
{
retry:
switch (filter->status & 0xf) {
/* case 0x00: ASCII */
/* case 0x10: X 0201 latin */
/* case 0x20: X 0201 kana */
/* case 0x80: X 0208 */
/* case 0x90: X 0212 */
case 0:
if (c == 0x1b) {
filter->status += 2;
} else if (c == 0x0e) { /* "kana in" */
filter->status = 0x20;
} else if (c == 0x0f) { /* "kana out" */
filter->status = 0;
} else if ((filter->status == 0x80 || filter->status == 0x90) && c > 0x20 && c < 0x7f) { /* kanji first char */
filter->status += 1;
} else if (c >= 0 && c < 0x80) { /* latin, CTLs */
;
} else {
filter->flag = 1; /* bad */
}
break;
/* case 0x81: X 0208 second char */
/* case 0x91: X 0212 second char */
case 1:
filter->status &= ~0xf;
if (c == 0x1b) {
goto retry;
} else if (c < 0x21 || c > 0x7e) { /* bad */
filter->flag = 1;
}
break;
/* ESC */
case 2:
if (c == 0x24) { /* '$' */
filter->status++;
} else if (c == 0x28) { /* '(' */
filter->status += 3;
} else {
filter->flag = 1; /* bad */
filter->status &= ~0xf;
goto retry;
}
break;
/* ESC $ */
case 3:
if (c == 0x40 || c == 0x42) { /* '@' or 'B' */
filter->status = 0x80;
} else if (c == 0x28) { /* '(' */
filter->status++;
} else {
filter->flag = 1; /* bad */
filter->status &= ~0xf;
goto retry;
}
break;
/* ESC $ ( */
case 4:
if (c == 0x40 || c == 0x42) { /* '@' or 'B' */
filter->status = 0x80;
} else if (c == 0x44) { /* 'D' */
filter->status = 0x90;
} else {
filter->flag = 1; /* bad */
filter->status &= ~0xf;
goto retry;
}
break;
/* ESC ( */
case 5:
if (c == 0x42 || c == 0x48) { /* 'B' or 'H' */
filter->status = 0;
} else if (c == 0x4a) { /* 'J' */
filter->status = 0x10;
} else if (c == 0x49) { /* 'I' */
filter->status = 0x20;
} else {
filter->flag = 1; /* bad */
filter->status &= ~0xf;
goto retry;
}
break;
default:
filter->status = 0;
break;
}
return c;
}
static int mbfl_filt_ident_2022jp(int c, mbfl_identify_filter *filter)
{
retry:
switch (filter->status & 0xf) {
/* case 0x00: ASCII */
/* case 0x10: X 0201 latin */
/* case 0x80: X 0208 */
case 0:
if (c == 0x1b) {
filter->status += 2;
} else if (filter->status == 0x80 && c > 0x20 && c < 0x7f) { /* kanji first char */
filter->status += 1;
} else if (c >= 0 && c < 0x80) { /* latin, CTLs */
;
} else {
filter->flag = 1; /* bad */
}
break;
/* case 0x81: X 0208 second char */
case 1:
if (c == 0x1b) {
filter->status++;
} else {
filter->status &= ~0xf;
if (c < 0x21 || c > 0x7e) { /* bad */
filter->flag = 1;
}
}
break;
/* ESC */
case 2:
if (c == 0x24) { /* '$' */
filter->status++;
} else if (c == 0x28) { /* '(' */
filter->status += 3;
} else {
filter->flag = 1; /* bad */
filter->status &= ~0xf;
goto retry;
}
break;
/* ESC $ */
case 3:
if (c == 0x40 || c == 0x42) { /* '@' or 'B' */
filter->status = 0x80;
} else {
filter->flag = 1; /* bad */
filter->status &= ~0xf;
goto retry;
}
break;
/* ESC ( */
case 5:
if (c == 0x42) { /* 'B' */
filter->status = 0;
} else if (c == 0x4a) { /* 'J' */
filter->status = 0x10;
} else {
filter->flag = 1; /* bad */
filter->status &= ~0xf;
goto retry;
}
break;
default:
filter->status = 0;
break;
}
return c;
}

View file

@ -0,0 +1,49 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* The source code included in this files was separated from mbfilter_ja.c
* by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
*
*/
#ifndef MBFL_MBFILTER_JIS_H
#define MBFL_MBFILTER_JIS_H
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_jis;
extern const mbfl_encoding mbfl_encoding_2022jp;
extern const struct mbfl_identify_vtbl vtbl_identify_2022jp;
extern const struct mbfl_identify_vtbl vtbl_identify_jis;
extern const struct mbfl_convert_vtbl vtbl_jis_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_jis;
extern const struct mbfl_convert_vtbl vtbl_2022jp_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_2022jp;
int mbfl_filt_conv_jis_wchar(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_wchar_jis(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_wchar_2022jp(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_any_jis_flush(mbfl_convert_filter *filter);
#endif /* MBFL_MBFILTER_JIS_H */

View file

@ -0,0 +1,149 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this part: Den V. Tsopa <tdv@edisoft.ru>
*
*/
/*
* The source code included in this files was separated from mbfilter_ru.c
* by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
*
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "mbfilter.h"
#include "mbfilter_koi8r.h"
#include "unicode_table_koi8r.h"
static int mbfl_filt_ident_koi8r(int c, mbfl_identify_filter *filter);
static const char *mbfl_encoding_koi8r_aliases[] = {"KOI8-R", "KOI8R", NULL};
const mbfl_encoding mbfl_encoding_koi8r = {
mbfl_no_encoding_koi8r,
"KOI8-R",
"KOI8-R",
(const char *(*)[])&mbfl_encoding_koi8r_aliases,
NULL,
MBFL_ENCTYPE_SBCS
};
const struct mbfl_identify_vtbl vtbl_identify_koi8r = {
mbfl_no_encoding_koi8r,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_common_dtor,
mbfl_filt_ident_koi8r
};
const struct mbfl_convert_vtbl vtbl_wchar_koi8r = {
mbfl_no_encoding_wchar,
mbfl_no_encoding_koi8r,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_wchar_koi8r,
mbfl_filt_conv_common_flush
};
const struct mbfl_convert_vtbl vtbl_koi8r_wchar = {
mbfl_no_encoding_koi8r,
mbfl_no_encoding_wchar,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_koi8r_wchar,
mbfl_filt_conv_common_flush
};
#define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
/*
* koi8r => wchar
*/
int
mbfl_filt_conv_koi8r_wchar(int c, mbfl_convert_filter *filter)
{
int s;
if (c >= 0 && c < koi8r_ucs_table_min) {
s = c;
} else if (c >= koi8r_ucs_table_min && c < 0x100) {
s = koi8r_ucs_table[c - koi8r_ucs_table_min];
if (s <= 0) {
s = c;
s &= MBFL_WCSPLANE_MASK;
s |= MBFL_WCSPLANE_KOI8R;
}
} else {
s = c;
s &= MBFL_WCSGROUP_MASK;
s |= MBFL_WCSGROUP_THROUGH;
}
CK((*filter->output_function)(s, filter->data));
return c;
}
/*
* wchar => koi8r
*/
int
mbfl_filt_conv_wchar_koi8r(int c, mbfl_convert_filter *filter)
{
int s, n;
if (c < 0x80) {
s = c;
} else {
s = -1;
n = koi8r_ucs_table_len-1;
while (n >= 0) {
if (c == koi8r_ucs_table[n]) {
s = koi8r_ucs_table_min + n;
break;
}
n--;
}
if (s <= 0 && (c & ~MBFL_WCSPLANE_MASK) == MBFL_WCSPLANE_KOI8R) {
s = c & MBFL_WCSPLANE_MASK;
}
}
if (s >= 0) {
CK((*filter->output_function)(s, filter->data));
} else {
if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
CK(mbfl_filt_conv_illegal_output(c, filter));
}
}
return c;
}
static int mbfl_filt_ident_koi8r(int c, mbfl_identify_filter *filter)
{
if (c >= 0x80 && c < 0xff)
filter->flag = 0;
else
filter->flag = 1; /* not it */
return c;
}

View file

@ -0,0 +1,50 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this part: Den V. Tsopa <tdv@edisoft.ru>
*
*/
/*
* The source code included in this files was separated from mbfilter_ru.h
* by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
*
*/
#ifndef MBFL_MBFILTER_KOI8R_H
#define MBFL_MBFILTER_KOI8R_H
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_koi8r;
extern const struct mbfl_identify_vtbl vtbl_identify_koi8r;
extern const struct mbfl_convert_vtbl vtbl_wchar_koi8r;
extern const struct mbfl_convert_vtbl vtbl_koi8r_wchar;
int mbfl_filt_conv_koi8r_wchar(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_wchar_koi8r(int c, mbfl_convert_filter *filter);
#endif /* MBFL_MBFILTER_KOI8R_H */
/*
* Local variables:
* tab-width: 4
* c-basic-offset: 4
* End:
*/

View file

@ -0,0 +1,233 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* The source code included in this files was separated from mbfilter.c
* by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
*
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "mbfilter.h"
#include "mbfilter_qprint.h"
#include "unicode_prop.h"
static const char *mbfl_encoding_qprint_aliases[] = {"qprint", NULL};
const mbfl_encoding mbfl_encoding_qprint = {
mbfl_no_encoding_qprint,
"Quoted-Printable",
"Quoted-Printable",
(const char *(*)[])&mbfl_encoding_qprint_aliases,
NULL,
MBFL_ENCTYPE_SBCS
};
const struct mbfl_convert_vtbl vtbl_8bit_qprint = {
mbfl_no_encoding_8bit,
mbfl_no_encoding_qprint,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_qprintenc,
mbfl_filt_conv_qprintenc_flush };
const struct mbfl_convert_vtbl vtbl_qprint_8bit = {
mbfl_no_encoding_qprint,
mbfl_no_encoding_8bit,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_qprintdec,
mbfl_filt_conv_qprintdec_flush };
#define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
/*
* any => Quoted-Printable
*/
int mbfl_filt_conv_qprintenc(int c, mbfl_convert_filter *filter)
{
int s, n;
switch (filter->status & 0xff) {
case 0:
filter->cache = c;
filter->status++;
break;
default:
s = filter->cache;
filter->cache = c;
n = (filter->status & 0xff00) >> 8;
if (s == 0) { /* null */
CK((*filter->output_function)(s, filter->data));
filter->status &= ~0xff00;
break;
}
if ((filter->status & MBFL_QPRINT_STS_MIME_HEADER) == 0) {
if (s == 0x0a || (s == 0x0d && c != 0x0a)) { /* line feed */
CK((*filter->output_function)(0x0d, filter->data)); /* CR */
CK((*filter->output_function)(0x0a, filter->data)); /* LF */
filter->status &= ~0xff00;
break;
} else if (s == 0x0d) {
break;
}
}
if ((filter->status & MBFL_QPRINT_STS_MIME_HEADER) == 0 && n >= 72) { /* soft line feed */
CK((*filter->output_function)(0x3d, filter->data)); /* '=' */
CK((*filter->output_function)(0x0d, filter->data)); /* CR */
CK((*filter->output_function)(0x0a, filter->data)); /* LF */
filter->status &= ~0xff00;
}
if (s <= 0 || s >= 0x80 || s == 0x3d /* not ASCII or '=' */
|| ((filter->status & MBFL_QPRINT_STS_MIME_HEADER) != 0 &&
(mbfl_charprop_table[s] & MBFL_CHP_MMHQENC) != 0)) {
/* hex-octet */
CK((*filter->output_function)(0x3d, filter->data)); /* '=' */
n = (s >> 4) & 0xf;
if (n < 10) {
n += 48; /* '0' */
} else {
n += 55; /* 'A' - 10 */
}
CK((*filter->output_function)(n, filter->data));
n = s & 0xf;
if (n < 10) {
n += 48;
} else {
n += 55;
}
CK((*filter->output_function)(n, filter->data));
if ((filter->status & MBFL_QPRINT_STS_MIME_HEADER) == 0) {
filter->status += 0x300;
}
} else {
CK((*filter->output_function)(s, filter->data));
if ((filter->status & MBFL_QPRINT_STS_MIME_HEADER) == 0) {
filter->status += 0x100;
}
}
break;
}
return c;
}
int mbfl_filt_conv_qprintenc_flush(mbfl_convert_filter *filter)
{
/* flush filter cache */
(*filter->filter_function)('\0', filter);
filter->status &= ~0xffff;
filter->cache = 0;
return 0;
}
/*
* Quoted-Printable => any
*/
int mbfl_filt_conv_qprintdec(int c, mbfl_convert_filter *filter)
{
int n;
switch (filter->status) {
case 1:
if ((c >= 0x30 && c <= 0x39) || (c >= 0x41 && c <= 0x46)) { /* 0 - 9 or A - F */
filter->cache = c;
filter->status = 2;
} else if (c == 0x0d) { /* soft line feed */
filter->status = 3;
} else if (c == 0x0a) { /* soft line feed */
filter->status = 0;
} else {
CK((*filter->output_function)(0x3d, filter->data)); /* '=' */
CK((*filter->output_function)(c, filter->data));
filter->status = 0;
}
break;
case 2:
n = filter->cache;
if (n >= 0x30 && n <= 0x39) { /* '0' - '9' */
n -= 48; /* 48 = '0' */
} else {
n -= 55; /* 55 = 'A' - 10 */
}
n <<= 4;
if (c >= 0x30 && c <= 0x39) { /* '0' - '9' */
n += (c - 48);
} else if (c >= 0x41 && c <= 0x46) { /* 'A' - 'F' */
n += (c - 55);
} else {
CK((*filter->output_function)(0x3d, filter->data)); /* '=' */
CK((*filter->output_function)(filter->cache, filter->data));
n = c;
}
CK((*filter->output_function)(n, filter->data));
filter->status = 0;
break;
case 3:
if (c != 0x0a) { /* LF */
CK((*filter->output_function)(c, filter->data));
}
filter->status = 0;
break;
default:
if (c == 0x3d) { /* '=' */
filter->status = 1;
} else {
CK((*filter->output_function)(c, filter->data));
}
break;
}
return c;
}
int mbfl_filt_conv_qprintdec_flush(mbfl_convert_filter *filter)
{
int status, cache;
status = filter->status;
cache = filter->cache;
filter->status = 0;
filter->cache = 0;
/* flush fragments */
if (status == 1) {
CK((*filter->output_function)(0x3d, filter->data)); /* '=' */
} else if (status == 2) {
CK((*filter->output_function)(0x3d, filter->data)); /* '=' */
CK((*filter->output_function)(cache, filter->data));
}
return 0;
}

View file

@ -0,0 +1,44 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* The source code included in this files was separated from mbfilter.c
* by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
*
*/
#ifndef MBFL_MBFILTER_QPRINT_H
#define MBFL_MBFILTER_QPRINT_H
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_qprint;
extern const struct mbfl_convert_vtbl vtbl_8bit_qprint;
extern const struct mbfl_convert_vtbl vtbl_qprint_8bit;
int mbfl_filt_conv_qprintenc(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_qprintenc_flush(mbfl_convert_filter *filter);
int mbfl_filt_conv_qprintdec(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_qprintdec_flush(mbfl_convert_filter *filter);
#endif /* MBFL_MBFILTER_ASCII_H */

View file

@ -4,10 +4,10 @@
+----------------------------------------------------------------------+
| Copyright (c) 2001 The PHP Group |
+----------------------------------------------------------------------+
| This source file is subject to version 3.0 of the PHP license, |
| This source file is subject to version 2.02 of the PHP license, |
| that is bundled with this package in the file LICENSE, and is |
| available through the world-wide-web at the following url: |
| http://www.php.net/license/3_0.txt. |
| available at through the world-wide-web at |
| http://www.php.net/license/2_02.txt. |
| If you did not receive a copy of the PHP license and are unable to |
| obtain it through the world-wide-web, please send a note to |
| license@php.net so we can mail you a copy immediately. |

View file

@ -4,10 +4,10 @@
+----------------------------------------------------------------------+
| Copyright (c) 2001 The PHP Group |
+----------------------------------------------------------------------+
| This source file is subject to version 3.0 of the PHP license, |
| This source file is subject to version 2.02 of the PHP license, |
| that is bundled with this package in the file LICENSE, and is |
| available through the world-wide-web at the following url: |
| http://www.php.net/license/3_0.txt. |
| available at through the world-wide-web at |
| http://www.php.net/license/2_02.txt. |
| If you did not receive a copy of the PHP license and are unable to |
| obtain it through the world-wide-web, please send a note to |
| license@php.net so we can mail you a copy immediately. |

View file

@ -0,0 +1,295 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* The source code included in this files was separated from mbfilter_ja.c
* by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
*
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "mbfilter.h"
#include "mbfilter_sjis.h"
#include "unicode_table_cp932_ext.h"
#include "unicode_table_jis.h"
static int mbfl_filt_ident_sjis(int c, mbfl_identify_filter *filter);
static const unsigned char mblen_table_sjis[] = { /* 0x80-0x9f,0xE0-0xFF */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
};
static const char *mbfl_encoding_sjis_aliases[] = {"x-sjis", "SHIFT-JIS", NULL};
const mbfl_encoding mbfl_encoding_sjis = {
mbfl_no_encoding_sjis,
"SJIS",
"Shift_JIS",
(const char *(*)[])&mbfl_encoding_sjis_aliases,
mblen_table_sjis,
MBFL_ENCTYPE_MBCS
};
const struct mbfl_identify_vtbl vtbl_identify_sjis = {
mbfl_no_encoding_sjis,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_common_dtor,
mbfl_filt_ident_sjis
};
const struct mbfl_convert_vtbl vtbl_sjis_wchar = {
mbfl_no_encoding_sjis,
mbfl_no_encoding_wchar,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_sjis_wchar,
mbfl_filt_conv_common_flush
};
const struct mbfl_convert_vtbl vtbl_wchar_sjis = {
mbfl_no_encoding_wchar,
mbfl_no_encoding_sjis,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_wchar_sjis,
mbfl_filt_conv_common_flush
};
#define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
#define SJIS_ENCODE(c1,c2,s1,s2) \
do { \
s1 = c1; \
s1--; \
s1 >>= 1; \
if ((c1) < 0x5f) { \
s1 += 0x71; \
} else { \
s1 += 0xb1; \
} \
s2 = c2; \
if ((c1) & 1) { \
if ((c2) < 0x60) { \
s2--; \
} \
s2 += 0x20; \
} else { \
s2 += 0x7e; \
} \
} while (0)
#define SJIS_DECODE(c1,c2,s1,s2) \
do { \
s1 = c1; \
if (s1 < 0xa0) { \
s1 -= 0x81; \
} else { \
s1 -= 0xc1; \
} \
s1 <<= 1; \
s1 += 0x21; \
s2 = c2; \
if (s2 < 0x9f) { \
if (s2 < 0x7f) { \
s2++; \
} \
s2 -= 0x20; \
} else { \
s1++; \
s2 -= 0x7e; \
} \
} while (0)
/*
* SJIS => wchar
*/
int
mbfl_filt_conv_sjis_wchar(int c, mbfl_convert_filter *filter)
{
int c1, s1, s2, w;
switch (filter->status) {
case 0:
if (c >= 0 && c < 0x80) { /* latin */
CK((*filter->output_function)(c, filter->data));
} else if (c > 0xa0 && c < 0xe0) { /* kana */
CK((*filter->output_function)(0xfec0 + c, filter->data));
} else if (c > 0x80 && c < 0xfd && c != 0xa0) { /* kanji first char */
filter->status = 1;
filter->cache = c;
} else {
w = c & MBFL_WCSGROUP_MASK;
w |= MBFL_WCSGROUP_THROUGH;
CK((*filter->output_function)(w, filter->data));
}
break;
case 1: /* kanji second char */
filter->status = 0;
c1 = filter->cache;
if (c > 0x39 && c < 0xfd && c != 0x7f) {
SJIS_DECODE(c1, c, s1, s2);
w = (s1 - 0x21)*94 + s2 - 0x21;
if (w >= 0 && w < jisx0208_ucs_table_size) {
w = jisx0208_ucs_table[w];
} else {
w = 0;
}
if (w <= 0) {
if (s1 < 0x7f && s2 < 0x7f) {
w = (s1 << 8) | s2;
w &= MBFL_WCSPLANE_MASK;
w |= MBFL_WCSPLANE_JIS0208;
} else {
w = (c1 << 8) | c;
w &= MBFL_WCSGROUP_MASK;
w |= MBFL_WCSGROUP_THROUGH;
}
}
CK((*filter->output_function)(w, filter->data));
} else if ((c >= 0 && c < 0x21) || c == 0x7f) { /* CTLs */
CK((*filter->output_function)(c, filter->data));
} else {
w = (c1 << 8) | c;
w &= MBFL_WCSGROUP_MASK;
w |= MBFL_WCSGROUP_THROUGH;
CK((*filter->output_function)(w, filter->data));
}
break;
default:
filter->status = 0;
break;
}
return c;
}
/*
* wchar => SJIS
*/
int
mbfl_filt_conv_wchar_sjis(int c, mbfl_convert_filter *filter)
{
int c1, c2, s1, s2;
s1 = 0;
if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) {
s1 = ucs_a1_jis_table[c - ucs_a1_jis_table_min];
} else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) {
s1 = ucs_a2_jis_table[c - ucs_a2_jis_table_min];
} else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) {
s1 = ucs_i_jis_table[c - ucs_i_jis_table_min];
} else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) {
s1 = ucs_r_jis_table[c - ucs_r_jis_table_min];
}
if (s1 <= 0) {
c1 = c & ~MBFL_WCSPLANE_MASK;
if (c1 == MBFL_WCSPLANE_JIS0208) {
s1 = c & MBFL_WCSPLANE_MASK;
} else if (c == 0xa5) { /* YEN SIGN */
s1 = 0x216f; /* FULLWIDTH YEN SIGN */
} else if (c == 0x203e) { /* OVER LINE */
s1 = 0x2131; /* FULLWIDTH MACRON */
} else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */
s1 = 0x2140;
} else if (c == 0xff5e) { /* FULLWIDTH TILDE */
s1 = 0x2141;
} else if (c == 0x2225) { /* PARALLEL TO */
s1 = 0x2142;
} else if (c == 0xff0d) { /* FULLWIDTH HYPHEN-MINUS */
s1 = 0x215d;
} else if (c == 0xffe0) { /* FULLWIDTH CENT SIGN */
s1 = 0x2171;
} else if (c == 0xffe1) { /* FULLWIDTH POUND SIGN */
s1 = 0x2172;
} else if (c == 0xffe2) { /* FULLWIDTH NOT SIGN */
s1 = 0x224c;
}
if (c == 0) {
s1 = 0;
} else if (s1 <= 0) {
s1 = -1;
}
} else if (s1 >= 0x8080) {
s1 = -1;
}
if (s1 >= 0) {
if (s1 < 0x100) { /* latin or kana */
CK((*filter->output_function)(s1, filter->data));
} else { /* kanji */
c1 = (s1 >> 8) & 0xff;
c2 = s1 & 0xff;
SJIS_ENCODE(c1, c2, s1, s2);
CK((*filter->output_function)(s1, filter->data));
CK((*filter->output_function)(s2, filter->data));
}
} else {
if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
CK(mbfl_filt_conv_illegal_output(c, filter));
}
}
return c;
}
static int mbfl_filt_ident_sjis(int c, mbfl_identify_filter *filter)
{
if (filter->status) { /* kanji second char */
if (c < 0x40 || c > 0xfc || c == 0x7f) { /* bad */
filter->flag = 1;
}
filter->status = 0;
} else if (c >= 0 && c < 0x80) { /* latin ok */
;
} else if (c > 0xa0 && c < 0xe0) { /* kana ok */
;
} else if (c > 0x80 && c < 0xf0 && c != 0xa0) { /* kanji first char */
filter->status = 1;
} else { /* bad */
filter->flag = 1;
}
return c;
}

View file

@ -0,0 +1,47 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* The source code included in this files was separated from mbfilter_ja.c
* by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
*
*/
#ifndef MBFL_MBFILTER_SJIS_H
#define MBFL_MBFILTER_SJIS_H
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_sjis;
extern const struct mbfl_identify_vtbl vtbl_identify_sjis;
extern const struct mbfl_convert_vtbl vtbl_sjis_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_sjis;
int mbfl_filt_conv_sjis_wchar(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_wchar_sjis(int c, mbfl_convert_filter *filter);
#endif /* MBFL_MBFILTER_SJIS_H */
/*
* charset=UTF-8
*/

View file

@ -0,0 +1,238 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* The source code included in this files was separated from mbfilter.c
* by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
*
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "mbfilter.h"
#include "mbfilter_ucs2.h"
static const char *mbfl_encoding_ucs2_aliases[] = {"ISO-10646-UCS-2", "UCS2" , "UNICODE", NULL};
const mbfl_encoding mbfl_encoding_ucs2 = {
mbfl_no_encoding_ucs2,
"UCS-2",
"UCS-2",
(const char *(*)[])&mbfl_encoding_ucs2_aliases,
NULL,
MBFL_ENCTYPE_WCS2BE
};
const mbfl_encoding mbfl_encoding_ucs2be = {
mbfl_no_encoding_ucs2be,
"UCS-2BE",
"UCS-2BE",
NULL,
NULL,
MBFL_ENCTYPE_WCS2BE
};
const mbfl_encoding mbfl_encoding_ucs2le = {
mbfl_no_encoding_ucs2le,
"UCS-2LE",
"UCS-2LE",
NULL,
NULL,
MBFL_ENCTYPE_WCS2LE
};
const struct mbfl_convert_vtbl vtbl_ucs2_wchar = {
mbfl_no_encoding_ucs2,
mbfl_no_encoding_wchar,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_ucs2_wchar,
mbfl_filt_conv_common_flush
};
const struct mbfl_convert_vtbl vtbl_wchar_ucs2 = {
mbfl_no_encoding_wchar,
mbfl_no_encoding_ucs2,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_wchar_ucs2be,
mbfl_filt_conv_common_flush
};
const struct mbfl_convert_vtbl vtbl_ucs2be_wchar = {
mbfl_no_encoding_ucs2be,
mbfl_no_encoding_wchar,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_ucs2be_wchar,
mbfl_filt_conv_common_flush
};
const struct mbfl_convert_vtbl vtbl_wchar_ucs2be = {
mbfl_no_encoding_wchar,
mbfl_no_encoding_ucs2be,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_wchar_ucs2be,
mbfl_filt_conv_common_flush
};
const struct mbfl_convert_vtbl vtbl_ucs2le_wchar = {
mbfl_no_encoding_ucs2le,
mbfl_no_encoding_wchar,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_ucs2le_wchar,
mbfl_filt_conv_common_flush
};
const struct mbfl_convert_vtbl vtbl_wchar_ucs2le = {
mbfl_no_encoding_wchar,
mbfl_no_encoding_ucs2le,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_wchar_ucs2le,
mbfl_filt_conv_common_flush
};
#define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
/*
* UCS-2 => wchar
*/
int mbfl_filt_conv_ucs2_wchar(int c, mbfl_convert_filter *filter)
{
int n, endian;
endian = filter->status & 0xff00;
switch (filter->status & 0xff) {
case 0:
if (endian) {
n = c & 0xff;
} else {
n = (c & 0xff) << 8;
}
filter->cache = n;
filter->status++;
break;
default:
if (endian) {
n = (c & 0xff) << 8;
} else {
n = c & 0xff;
}
n |= filter->cache;
if (n == 0xfffe) {
if (endian) {
filter->status = 0; /* big-endian */
} else {
filter->status = 0x100; /* little-endian */
}
CK((*filter->output_function)(0xfeff, filter->data));
} else {
filter->status &= ~0xff;
CK((*filter->output_function)(n, filter->data));
}
break;
}
return c;
}
/*
* UCS-2BE => wchar
*/
int mbfl_filt_conv_ucs2be_wchar(int c, mbfl_convert_filter *filter)
{
int n;
if (filter->status == 0) {
filter->status = 1;
n = (c & 0xff) << 8;
filter->cache = n;
} else {
filter->status = 0;
n = (c & 0xff) | filter->cache;
CK((*filter->output_function)(n, filter->data));
}
return c;
}
/*
* wchar => UCS-2BE
*/
int mbfl_filt_conv_wchar_ucs2be(int c, mbfl_convert_filter *filter)
{
if (c >= 0 && c < MBFL_WCSPLANE_UCS2MAX) {
CK((*filter->output_function)((c >> 8) & 0xff, filter->data));
CK((*filter->output_function)(c & 0xff, filter->data));
} else {
if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
CK(mbfl_filt_conv_illegal_output(c, filter));
}
}
return c;
}
/*
* UCS-2LE => wchar
*/
int mbfl_filt_conv_ucs2le_wchar(int c, mbfl_convert_filter *filter)
{
int n;
if (filter->status == 0) {
filter->status = 1;
n = c & 0xff;
filter->cache = n;
} else {
filter->status = 0;
n = ((c & 0xff) << 8) | filter->cache;
CK((*filter->output_function)(n, filter->data));
}
return c;
}
/*
* wchar => UCS-2LE
*/
int mbfl_filt_conv_wchar_ucs2le(int c, mbfl_convert_filter *filter)
{
if (c >= 0 && c < MBFL_WCSPLANE_UCS2MAX) {
CK((*filter->output_function)(c & 0xff, filter->data));
CK((*filter->output_function)((c >> 8) & 0xff, filter->data));
} else {
if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
CK(mbfl_filt_conv_illegal_output(c, filter));
}
}
return c;
}

View file

@ -0,0 +1,51 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* The source code included in this files was separated from mbfilter.c
* by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
*
*/
#ifndef MBFL_MBFILTER_UCS2_H
#define MBFL_MBFILTER_UCS2_H
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_ucs2;
extern const mbfl_encoding mbfl_encoding_ucs2be;
extern const mbfl_encoding mbfl_encoding_ucs2le;
extern const struct mbfl_convert_vtbl vtbl_ucs2_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_ucs2;
extern const struct mbfl_convert_vtbl vtbl_ucs2be_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_ucs2be;
extern const struct mbfl_convert_vtbl vtbl_ucs2le_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_ucs2le;
int mbfl_filt_conv_ucs2_wchar(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_ucs2be_wchar(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_wchar_ucs2be(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_ucs2le_wchar(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_wchar_ucs2le(int c, mbfl_convert_filter *filter);
#endif /* MBFL_MBFILTER_UCS2_H */

View file

@ -0,0 +1,275 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* The source code included in this files was separated from mbfilter.c
* by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
*
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "mbfilter.h"
#include "mbfilter_ucs4.h"
static const char *mbfl_encoding_ucs4_aliases[] = {"ISO-10646-UCS-4", "UCS4", NULL};
const mbfl_encoding mbfl_encoding_ucs4 = {
mbfl_no_encoding_ucs4,
"UCS-4",
"UCS-4",
(const char *(*)[])&mbfl_encoding_ucs4_aliases,
NULL,
MBFL_ENCTYPE_WCS4BE
};
const mbfl_encoding mbfl_encoding_ucs4be = {
mbfl_no_encoding_ucs4be,
"UCS-4BE",
"UCS-4BE",
NULL,
NULL,
MBFL_ENCTYPE_WCS4BE
};
const mbfl_encoding mbfl_encoding_ucs4le = {
mbfl_no_encoding_ucs4le,
"UCS-4LE",
"UCS-4LE",
NULL,
NULL,
MBFL_ENCTYPE_WCS4LE
};
const struct mbfl_convert_vtbl vtbl_ucs4_wchar = {
mbfl_no_encoding_ucs4,
mbfl_no_encoding_wchar,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_ucs4_wchar,
mbfl_filt_conv_common_flush
};
const struct mbfl_convert_vtbl vtbl_wchar_ucs4 = {
mbfl_no_encoding_wchar,
mbfl_no_encoding_ucs4,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_wchar_ucs4be,
mbfl_filt_conv_common_flush
};
const struct mbfl_convert_vtbl vtbl_ucs4be_wchar = {
mbfl_no_encoding_ucs4be,
mbfl_no_encoding_wchar,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_ucs4be_wchar,
mbfl_filt_conv_common_flush
};
const struct mbfl_convert_vtbl vtbl_wchar_ucs4be = {
mbfl_no_encoding_wchar,
mbfl_no_encoding_ucs4be,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_wchar_ucs4be,
mbfl_filt_conv_common_flush
};
const struct mbfl_convert_vtbl vtbl_ucs4le_wchar = {
mbfl_no_encoding_ucs4le,
mbfl_no_encoding_wchar,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_ucs4le_wchar,
mbfl_filt_conv_common_flush
};
const struct mbfl_convert_vtbl vtbl_wchar_ucs4le = {
mbfl_no_encoding_wchar,
mbfl_no_encoding_ucs4le,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_wchar_ucs4le,
mbfl_filt_conv_common_flush
};
#define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
/*
* UCS-4 => wchar
*/
int mbfl_filt_conv_ucs4_wchar(int c, mbfl_convert_filter *filter)
{
int n, endian;
endian = filter->status & 0xff00;
switch (filter->status & 0xff) {
case 0:
if (endian) {
n = c & 0xff;
} else {
n = (c & 0xff) << 24;
}
filter->cache = n;
filter->status++;
break;
case 1:
if (endian) {
n = (c & 0xff) << 8;
} else {
n = (c & 0xff) << 16;
}
filter->cache |= n;
filter->status++;
break;
case 2:
if (endian) {
n = (c & 0xff) << 16;
} else {
n = (c & 0xff) << 8;
}
filter->cache |= n;
filter->status++;
break;
default:
if (endian) {
n = (c & 0xff) << 24;
} else {
n = c & 0xff;
}
n |= filter->cache;
if ((n & 0xffff) == 0 && ((n >> 16) & 0xffff) == 0xfffe) {
if (endian) {
filter->status = 0; /* big-endian */
} else {
filter->status = 0x100; /* little-endian */
}
CK((*filter->output_function)(0xfeff, filter->data));
} else {
filter->status &= ~0xff;
CK((*filter->output_function)(n, filter->data));
}
break;
}
return c;
}
/*
* UCS-4BE => wchar
*/
int mbfl_filt_conv_ucs4be_wchar(int c, mbfl_convert_filter *filter)
{
int n;
if (filter->status == 0) {
filter->status = 1;
n = (c & 0xff) << 24;
filter->cache = n;
} else if (filter->status == 1) {
filter->status = 2;
n = (c & 0xff) << 16;
filter->cache |= n;
} else if (filter->status == 2) {
filter->status = 3;
n = (c & 0xff) << 8;
filter->cache |= n;
} else {
filter->status = 0;
n = (c & 0xff) | filter->cache;
CK((*filter->output_function)(n, filter->data));
}
return c;
}
/*
* wchar => UCS-4BE
*/
int mbfl_filt_conv_wchar_ucs4be(int c, mbfl_convert_filter *filter)
{
if (c >= 0 && c < MBFL_WCSGROUP_UCS4MAX) {
CK((*filter->output_function)((c >> 24) & 0xff, filter->data));
CK((*filter->output_function)((c >> 16) & 0xff, filter->data));
CK((*filter->output_function)((c >> 8) & 0xff, filter->data));
CK((*filter->output_function)(c & 0xff, filter->data));
} else {
if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
CK(mbfl_filt_conv_illegal_output(c, filter));
}
}
return c;
}
/*
* UCS-4LE => wchar
*/
int mbfl_filt_conv_ucs4le_wchar(int c, mbfl_convert_filter *filter)
{
int n;
if (filter->status == 0) {
filter->status = 1;
n = (c & 0xff);
filter->cache = n;
} else if (filter->status == 1) {
filter->status = 2;
n = (c & 0xff) << 8;
filter->cache |= n;
} else if (filter->status == 2) {
filter->status = 3;
n = (c & 0xff) << 16;
filter->cache |= n;
} else {
filter->status = 0;
n = ((c & 0xff) << 24) | filter->cache;
CK((*filter->output_function)(n, filter->data));
}
return c;
}
/*
* wchar => UCS-4LE
*/
int mbfl_filt_conv_wchar_ucs4le(int c, mbfl_convert_filter *filter)
{
if (c >= 0 && c < MBFL_WCSGROUP_UCS4MAX) {
CK((*filter->output_function)(c & 0xff, filter->data));
CK((*filter->output_function)((c >> 8) & 0xff, filter->data));
CK((*filter->output_function)((c >> 16) & 0xff, filter->data));
CK((*filter->output_function)((c >> 24) & 0xff, filter->data));
} else {
if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
CK(mbfl_filt_conv_illegal_output(c, filter));
}
}
return c;
}

View file

@ -0,0 +1,49 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* The source code included in this files was separated from mbfilter.c
* by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
*
*/
#ifndef MBFL_MBFILTER_UCS4_H
#define MBFL_MBFILTER_UCS4_H
extern const mbfl_encoding mbfl_encoding_ucs4;
extern const mbfl_encoding mbfl_encoding_ucs4le;
extern const mbfl_encoding mbfl_encoding_ucs4be;
extern const struct mbfl_convert_vtbl vtbl_ucs4_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_ucs4;
extern const struct mbfl_convert_vtbl vtbl_ucs4be_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_ucs4be;
extern const struct mbfl_convert_vtbl vtbl_ucs4le_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_ucs4le;
int mbfl_filt_conv_ucs4_wchar(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_ucs4be_wchar(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_wchar_ucs4be(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_ucs4le_wchar(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_wchar_ucs4le(int c, mbfl_convert_filter *filter);
#endif /* MBFL_MBFILTER_UCS4_H */

View file

@ -0,0 +1,266 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* The source code included in this files was separated from mbfilter_kr.c
* by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
*
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "mbfilter.h"
#include "mbfilter_uhc.h"
#include "unicode_table_uhc.h"
static int mbfl_filt_ident_uhc(int c, mbfl_identify_filter *filter);
static const unsigned char mblen_table_uhc[] = { /* 0x81-0xFE */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
};
static const char *mbfl_encoding_uhc_aliases[] = {"CP949", NULL};
const mbfl_encoding mbfl_encoding_uhc = {
mbfl_no_encoding_uhc,
"UHC",
"UHC",
(const char *(*)[])&mbfl_encoding_uhc_aliases,
mblen_table_uhc,
MBFL_ENCTYPE_MBCS
};
const struct mbfl_identify_vtbl vtbl_identify_uhc = {
mbfl_no_encoding_uhc,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_common_dtor,
mbfl_filt_ident_uhc
};
const struct mbfl_convert_vtbl vtbl_uhc_wchar = {
mbfl_no_encoding_uhc,
mbfl_no_encoding_wchar,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_uhc_wchar,
mbfl_filt_conv_common_flush
};
const struct mbfl_convert_vtbl vtbl_wchar_uhc = {
mbfl_no_encoding_wchar,
mbfl_no_encoding_uhc,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_wchar_uhc,
mbfl_filt_conv_common_flush
};
#define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
/*
* UHC => wchar
*/
int
mbfl_filt_conv_uhc_wchar(int c, mbfl_convert_filter *filter)
{
int c1, w = 0, flag = 0;
switch (filter->status) {
case 0:
if (c >= 0 && c < 0x80) { /* latin */
CK((*filter->output_function)(c, filter->data));
} else if (c > 0x80 && c < 0xff && c != 0xc9) { /* dbcs lead byte */
filter->status = 1;
filter->cache = c;
} else {
w = c & MBFL_WCSGROUP_MASK;
w |= MBFL_WCSGROUP_THROUGH;
CK((*filter->output_function)(w, filter->data));
}
break;
case 1: /* dbcs second byte */
filter->status = 0;
c1 = filter->cache;
if ( c1 >= 0x81 && c1 <= 0xa0){
w = (c1 - 0x81)*190 + (c - 0x41);
if (w >= 0 && w < uhc1_ucs_table_size) {
flag = 1;
w = uhc1_ucs_table[w];
} else {
w = 0;
}
} else if ( c1 >= 0xa1 && c1 <= 0xc6){
w = (c1 - 0xa1)*190 + (c - 0x41);
if (w >= 0 && w < uhc2_ucs_table_size) {
flag = 2;
w = uhc2_ucs_table[w];
} else {
w = 0;
}
} else if ( c1 >= 0xc7 && c1 <= 0xfe){
w = (c1 - 0xc7)*94 + (c - 0xa1);
if (w >= 0 && w < uhc3_ucs_table_size) {
flag = 3;
w = uhc3_ucs_table[w];
} else {
w = 0;
}
}
if (flag > 0){
if (w <= 0) {
w = (c1 << 8) | c;
w &= MBFL_WCSPLANE_MASK;
w |= MBFL_WCSPLANE_UHC;
}
CK((*filter->output_function)(w, filter->data));
} else {
if ((c >= 0 && c < 0x21) || c == 0x7f) { /* CTLs */
CK((*filter->output_function)(c, filter->data));
} else {
w = (c1 << 8) | c;
w &= MBFL_WCSGROUP_MASK;
w |= MBFL_WCSGROUP_THROUGH;
CK((*filter->output_function)(w, filter->data));
}
}
break;
default:
filter->status = 0;
break;
}
return c;
}
/*
* wchar => UHC
*/
int
mbfl_filt_conv_wchar_uhc(int c, mbfl_convert_filter *filter)
{
int c1, s;
s = 0;
if (c >= ucs_a1_uhc_table_min && c < ucs_a1_uhc_table_max) {
s = ucs_a1_uhc_table[c - ucs_a1_uhc_table_min];
} else if (c >= ucs_a2_uhc_table_min && c < ucs_a2_uhc_table_max) {
s = ucs_a2_uhc_table[c - ucs_a2_uhc_table_min];
} else if (c >= ucs_a3_uhc_table_min && c < ucs_a3_uhc_table_max) {
s = ucs_a3_uhc_table[c - ucs_a3_uhc_table_min];
} else if (c >= ucs_i_uhc_table_min && c < ucs_i_uhc_table_max) {
s = ucs_i_uhc_table[c - ucs_i_uhc_table_min];
} else if (c >= ucs_s_uhc_table_min && c < ucs_s_uhc_table_max) {
s = ucs_s_uhc_table[c - ucs_s_uhc_table_min];
} else if (c >= ucs_r1_uhc_table_min && c < ucs_r1_uhc_table_max) {
s = ucs_r1_uhc_table[c - ucs_r1_uhc_table_min];
} else if (c >= ucs_r2_uhc_table_min && c < ucs_r2_uhc_table_max) {
s = ucs_r2_uhc_table[c - ucs_r2_uhc_table_min];
}
if (s <= 0) {
c1 = c & ~MBFL_WCSPLANE_MASK;
if (c1 == MBFL_WCSPLANE_UHC) {
s = c & MBFL_WCSPLANE_MASK;
}
if (c == 0) {
s = 0;
} else if (s <= 0) {
s = -1;
}
}
if (s >= 0) {
if (s < 0x80) { /* latin */
CK((*filter->output_function)(s, filter->data));
} else {
CK((*filter->output_function)((s >> 8) & 0xff, filter->data));
CK((*filter->output_function)(s & 0xff, filter->data));
}
} else {
if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
CK(mbfl_filt_conv_illegal_output(c, filter));
}
}
return c;
}
static int mbfl_filt_ident_uhc(int c, mbfl_identify_filter *filter)
{
switch (filter->status) {
case 0: /* latin */
if (c >= 0 && c < 0x80) { /* ok */
;
} else if (c >= 0x81 && c <= 0xa0) { /* dbcs first char */
filter->status= 1;
} else if (c >= 0xa1 && c <= 0xc6) { /* dbcs first char */
filter->status= 2;
} else if (c >= 0xc7 && c <= 0xfe) { /* dbcs first char */
filter->status= 3;
} else { /* bad */
filter->flag = 1;
}
case 1:
case 2:
if (c < 0x41 || (c > 0x5a && c < 0x61)
|| (c > 0x7a && c < 0x81) || c > 0xfe) { /* bad */
filter->flag = 1;
}
filter->status = 0;
break;
case 3:
if (c < 0xa1 || c > 0xfe) { /* bad */
filter->flag = 1;
}
filter->status = 0;
break;
default:
filter->status = 0;
break;
}
return c;
}

View file

@ -0,0 +1,43 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* The source code included in this files was separated from mbfilter_kr.h
* by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
*
*/
#ifndef MBFL_MBFILTER_UHC_H
#define MBFL_MBFILTER_UHC_H
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_uhc;
extern const struct mbfl_identify_vtbl vtbl_identify_uhc;
extern const struct mbfl_convert_vtbl vtbl_uhc_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_uhc;
int mbfl_filt_conv_uhc_wchar(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_wchar_uhc(int c, mbfl_convert_filter *filter);
#endif /* MBFL_MBFILTER_UHC_H */

View file

@ -0,0 +1,309 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* The source code included in this files was separated from mbfilter.c
* by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
*
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "mbfilter.h"
#include "mbfilter_utf16.h"
static const char *mbfl_encoding_utf16_aliases[] = {"utf16", NULL};
const mbfl_encoding mbfl_encoding_utf16 = {
mbfl_no_encoding_utf16,
"UTF-16",
"UTF-16",
(const char *(*)[])&mbfl_encoding_utf16_aliases,
NULL,
MBFL_ENCTYPE_MWC2BE
};
const mbfl_encoding mbfl_encoding_utf16be = {
mbfl_no_encoding_utf16be,
"UTF-16BE",
"UTF-16BE",
NULL,
NULL,
MBFL_ENCTYPE_MWC2BE
};
const mbfl_encoding mbfl_encoding_utf16le = {
mbfl_no_encoding_utf16le,
"UTF-16LE",
"UTF-16LE",
NULL,
NULL,
MBFL_ENCTYPE_MWC2LE
};
const struct mbfl_convert_vtbl vtbl_utf16_wchar = {
mbfl_no_encoding_utf16,
mbfl_no_encoding_wchar,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_utf16_wchar,
mbfl_filt_conv_common_flush
};
const struct mbfl_convert_vtbl vtbl_wchar_utf16 = {
mbfl_no_encoding_wchar,
mbfl_no_encoding_utf16,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_wchar_utf16be,
mbfl_filt_conv_common_flush
};
const struct mbfl_convert_vtbl vtbl_utf16be_wchar = {
mbfl_no_encoding_utf16be,
mbfl_no_encoding_wchar,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_utf16be_wchar,
mbfl_filt_conv_common_flush
};
const struct mbfl_convert_vtbl vtbl_wchar_utf16be = {
mbfl_no_encoding_wchar,
mbfl_no_encoding_utf16be,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_wchar_utf16be,
mbfl_filt_conv_common_flush
};
const struct mbfl_convert_vtbl vtbl_utf16le_wchar = {
mbfl_no_encoding_utf16le,
mbfl_no_encoding_wchar,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_utf16le_wchar,
mbfl_filt_conv_common_flush
};
const struct mbfl_convert_vtbl vtbl_wchar_utf16le = {
mbfl_no_encoding_wchar,
mbfl_no_encoding_utf16le,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_wchar_utf16le,
mbfl_filt_conv_common_flush
};
#define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
/*
* UTF-16 => wchar
*/
int mbfl_filt_conv_utf16_wchar(int c, mbfl_convert_filter *filter)
{
int n, endian;
endian = filter->status & 0xff00;
switch (filter->status & 0xff) {
case 0:
if (endian) {
n = c & 0xff;
} else {
n = (c & 0xff) << 8;
}
filter->cache |= n;
filter->status++;
break;
default:
if (endian) {
n = (c & 0xff) << 8;
} else {
n = c & 0xff;
}
n |= filter->cache & 0xffff;
filter->status &= ~0xff;
if (n == 0xfffe) {
if (endian) {
filter->status = 0; /* big-endian */
} else {
filter->status = 0x100; /* little-endian */
}
CK((*filter->output_function)(0xfeff, filter->data));
} else if (n >= 0xd800 && n < 0xdc00) {
filter->cache = ((n & 0x3ff) << 16) + 0x400000;
} else if (n >= 0xdc00 && n < 0xe000) {
n &= 0x3ff;
n |= (filter->cache & 0xfff0000) >> 6;
filter->cache = 0;
if (n >= MBFL_WCSPLANE_SUPMIN && n < MBFL_WCSPLANE_SUPMAX) {
CK((*filter->output_function)(n, filter->data));
} else { /* illegal character */
n &= MBFL_WCSGROUP_MASK;
n |= MBFL_WCSGROUP_THROUGH;
CK((*filter->output_function)(n, filter->data));
}
} else {
filter->cache = 0;
CK((*filter->output_function)(n, filter->data));
}
break;
}
return c;
}
/*
* UTF-16BE => wchar
*/
int mbfl_filt_conv_utf16be_wchar(int c, mbfl_convert_filter *filter)
{
int n;
switch (filter->status) {
case 0:
filter->status = 1;
n = (c & 0xff) << 8;
filter->cache |= n;
break;
default:
filter->status = 0;
n = (filter->cache & 0xff00) | (c & 0xff);
if (n >= 0xd800 && n < 0xdc00) {
filter->cache = ((n & 0x3ff) << 16) + 0x400000;
} else if (n >= 0xdc00 && n < 0xe000) {
n &= 0x3ff;
n |= (filter->cache & 0xfff0000) >> 6;
filter->cache = 0;
if (n >= MBFL_WCSPLANE_SUPMIN && n < MBFL_WCSPLANE_SUPMAX) {
CK((*filter->output_function)(n, filter->data));
} else { /* illegal character */
n &= MBFL_WCSGROUP_MASK;
n |= MBFL_WCSGROUP_THROUGH;
CK((*filter->output_function)(n, filter->data));
}
} else {
filter->cache = 0;
CK((*filter->output_function)(n, filter->data));
}
break;
}
return c;
}
/*
* wchar => UTF-16BE
*/
int mbfl_filt_conv_wchar_utf16be(int c, mbfl_convert_filter *filter)
{
int n;
if (c >= 0 && c < MBFL_WCSPLANE_UCS2MAX) {
CK((*filter->output_function)((c >> 8) & 0xff, filter->data));
CK((*filter->output_function)(c & 0xff, filter->data));
} else if (c >= MBFL_WCSPLANE_SUPMIN && c < MBFL_WCSPLANE_SUPMAX) {
n = ((c >> 10) - 0x40) | 0xd800;
CK((*filter->output_function)((n >> 8) & 0xff, filter->data));
CK((*filter->output_function)(n & 0xff, filter->data));
n = (c & 0x3ff) | 0xdc00;
CK((*filter->output_function)((n >> 8) & 0xff, filter->data));
CK((*filter->output_function)(n & 0xff, filter->data));
} else {
if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
CK(mbfl_filt_conv_illegal_output(c, filter));
}
}
return c;
}
/*
* UTF-16LE => wchar
*/
int mbfl_filt_conv_utf16le_wchar(int c, mbfl_convert_filter *filter)
{
int n;
switch (filter->status) {
case 0:
filter->status = 1;
n = c & 0xff;
filter->cache |= n;
break;
default:
filter->status = 0;
n = (filter->cache & 0xff) | ((c & 0xff) << 8);
if (n >= 0xd800 && n < 0xdc00) {
filter->cache = ((n & 0x3ff) << 16) + 0x400000;
} else if (n >= 0xdc00 && n < 0xe000) {
n &= 0x3ff;
n |= (filter->cache & 0xfff0000) >> 6;
filter->cache = 0;
if (n >= MBFL_WCSPLANE_SUPMIN && n < MBFL_WCSPLANE_SUPMAX) {
CK((*filter->output_function)(n, filter->data));
} else { /* illegal character */
n &= MBFL_WCSGROUP_MASK;
n |= MBFL_WCSGROUP_THROUGH;
CK((*filter->output_function)(n, filter->data));
}
} else {
filter->cache = 0;
CK((*filter->output_function)(n, filter->data));
}
break;
}
return c;
}
/*
* wchar => UTF-16LE
*/
int mbfl_filt_conv_wchar_utf16le(int c, mbfl_convert_filter *filter)
{
int n;
if (c >= 0 && c < MBFL_WCSPLANE_UCS2MAX) {
CK((*filter->output_function)(c & 0xff, filter->data));
CK((*filter->output_function)((c >> 8) & 0xff, filter->data));
} else if (c >= MBFL_WCSPLANE_SUPMIN && c < MBFL_WCSPLANE_SUPMAX) {
n = ((c >> 10) - 0x40) | 0xd800;
CK((*filter->output_function)(n & 0xff, filter->data));
CK((*filter->output_function)((n >> 8) & 0xff, filter->data));
n = (c & 0x3ff) | 0xdc00;
CK((*filter->output_function)(n & 0xff, filter->data));
CK((*filter->output_function)((n >> 8) & 0xff, filter->data));
} else {
if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
CK(mbfl_filt_conv_illegal_output(c, filter));
}
}
return c;
}

View file

@ -0,0 +1,49 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* The source code included in this files was separated from mbfilter.c
* by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
*
*/
#ifndef MBFL_MBFILTER_UTF16_H
#define MBFL_MBFILTER_UTF16_H
extern const mbfl_encoding mbfl_encoding_utf16;
extern const mbfl_encoding mbfl_encoding_utf16be;
extern const mbfl_encoding mbfl_encoding_utf16le;
extern const struct mbfl_convert_vtbl vtbl_utf16_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_utf16;
extern const struct mbfl_convert_vtbl vtbl_utf16be_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_utf16be;
extern const struct mbfl_convert_vtbl vtbl_utf16le_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_utf16le;
int mbfl_filt_conv_utf16_wchar(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_utf16be_wchar(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_wchar_utf16be(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_utf16le_wchar(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_wchar_utf16le(int c, mbfl_convert_filter *filter);
#endif /* MBFL_MBFILTER_UTF16_H */

Some files were not shown because too many files have changed in this diff Show more