- Upgrade bundled oniguruma regex library to 2.2.2 (Rui, Moriyoshi)

. Supports various encodings such as BIG5, GB2312 and ISO-8859-*
  . Fixes bug #26677 (mbstring compile errors with IRIX)
  . Many thanks to K.Kosako.
- Remove redundant files that are not relevant to the build.
This commit is contained in:
Moriyoshi Koizumi 2004-02-28 20:45:27 +00:00
parent dc03dbc699
commit 11b0f469f0
39 changed files with 12031 additions and 6161 deletions

View file

@ -80,7 +80,32 @@ int main() { return foo(10, "", 3.14); }
oniguruma/regexec.c
oniguruma/reggnu.c
oniguruma/regparse.c
oniguruma/regenc.c
oniguruma/regposerr.c
oniguruma/enc/ascii.c
oniguruma/enc/utf8.c
oniguruma/enc/euc_jp.c
oniguruma/enc/euc_tw.c
oniguruma/enc/euc_kr.c
oniguruma/enc/sjis.c
oniguruma/enc/iso8859_1.c
oniguruma/enc/iso8859_2.c
oniguruma/enc/iso8859_3.c
oniguruma/enc/iso8859_4.c
oniguruma/enc/iso8859_5.c
oniguruma/enc/iso8859_6.c
oniguruma/enc/iso8859_7.c
oniguruma/enc/iso8859_8.c
oniguruma/enc/iso8859_9.c
oniguruma/enc/iso8859_10.c
oniguruma/enc/iso8859_11.c
oniguruma/enc/iso8859_13.c
oniguruma/enc/iso8859_14.c
oniguruma/enc/iso8859_15.c
oniguruma/enc/iso8859_16.c
oniguruma/enc/koi8.c
oniguruma/enc/koi8_r.c
oniguruma/enc/big5.c
])
fi
])

View file

@ -610,10 +610,10 @@ static PHP_INI_MH(OnUpdate_mbstring_internal_encoding)
MBSTRG(current_internal_encoding) = no_encoding;
#if HAVE_MBREGEX
{
php_mb_reg_char_encoding mbctype;
OnigEncoding mbctype;
mbctype = php_mb_regex_name2mbctype(new_value);
if (mbctype == REGCODE_UNDEF) {
mbctype = REGCODE_EUCJP;
if (mbctype == ONIG_ENCODING_UNDEF) {
mbctype = ONIG_ENCODING_EUC_JP;
}
MBSTRG(current_mbctype) = MBSTRG(default_mbctype) = mbctype;
}
@ -995,7 +995,7 @@ PHP_MINFO_FUNCTION(mbstring)
php_info_print_table_end();
php_info_print_table_start();
php_info_print_table_colspan_header(2, "mbstring extension makes use of \"streamable kanji code filter and converter\", which is distributed under the GNU Lesser General Public License version 2.1.");
php_info_print_table_header(1, "mbstring extension makes use of \"streamable kanji code filter and converter\", which is distributed under the GNU Lesser General Public License version 2.1.");
php_info_print_table_end();
DISPLAY_INI_ENTRIES();

View file

@ -1,11 +1,361 @@
History
2004/02/27: Version 2.2.2
2004/02/27: [impl] fix the position of onig_stat_print().
2004/02/27: [impl] define ONIG_RUBY_DEFINE_GLOBAL_FUNCTION() in regint.h
for ignored by RDoc.
2004/02/26: Version 2.2.1
2004/02/26: [bug] invalid definition at onig_error_code_to_str()
in the case of NOT HAVE_STDARG_PROTOTYPES.
2004/02/25: Version 2.2.0
2004/02/25: [test] success in ruby 1.8.0 (2003-08-08) [i386-mswin32].
2004/02/24: [test] success in ruby 1.9.0 (2004-02-24) [i686-linux].
2004/02/24: [bug] undefined IS_BLANK() and IS_GRAPH() was used in
onigenc_is_code_ctype() in the case of Ruby M17N.
2004/02/24: [new] support ISO-8859-16. (ONIG_ENCODING_ISO_8859_16)
2004/02/24: [bug] should not fold match for 0xdf in iso8859_6.c.
2004/02/24: [new] support ISO-8859-14. (ONIG_ENCODING_ISO_8859_14)
2004/02/23: [new] support ISO-8859-13. (ONIG_ENCODING_ISO_8859_13)
2004/02/23: [new] support ISO-8859-10. (ONIG_ENCODING_ISO_8859_10)
2004/02/20: [bug] fix iso_8859_4_mbc_is_case_ambig().
2004/02/20: [new] support ISO-8859-9. (ONIG_ENCODING_ISO_8859_9)
2004/02/19: [bug] correct ctype tables for ISO-8859-3, ISO-8859-4,
ISO-8859-6, ISO-8859-7, ISO-8859-8, KOI8_R.
2004/02/18: [bug] wrong replaced name OnigSyntaxGnuOnigex.
2004/02/17: [spec] check capture status for empty infinite loop.
[ruby-dev:20224] etc...
ex. /(?:\1a|())*/.match("a"),
/(?:()|()|()|(x)|()|())*\2b\5/.match("b")
add USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK.
add OP_NULL_CHECK_END_MEMST, OP_NULL_CHECK_END_MEMST_PUSH.
add stack type STK_NULL_CHECK_END.
2004/02/13: [impl] add OnigEncodingEUC_CN to enc/euc_kr.c.
2004/02/13: [bug] (thanks Simon Strandgaard)
parsing of nested repeat was invalid.
ex. /ab{2,3}*/ was /(?:a(?:b{2,3}))*/,
should be /a(?:b{2,3}*)/
2004/02/12: [bug] (thanks Simon Strandgaard)
OP_REPEAT_INC_NG process in match_at() is wrong.
ex. bad match /a.{0,2}?a/ =~ "0aXXXa0"
2004/02/12: [bug] (thanks Simon Strandgaard)
wrong fetch after (?x) option. ex. "(?x)\ta .\n+b"
2004/02/12: [bug] (thanks Simon Strandgaard)
[\^] is not a empty char class.
2004/02/09: [new] add onig_set_syntax_op(), onig_set_syntax_op2(),
onig_set_syntax_behavior(), onig_set_syntax_options().
2004/02/06: [dist] add a new target 'site' to Makefile.in.
2004/02/06: [dist] add index.html.
2004/02/03: [bug] oniggnu.h was not installed by 'make install'.
2004/02/02: Version 2.1.0
2004/02/02: [test] success in ruby 1.9.0 (2004-02-02) [i686-linux].
2004/02/02: [test] success in ruby 1.8.0 (2003-08-08) [i386-cygwin].
2004/02/02: [test] success in ruby 1.8.0 (2003-08-08) [i386-mswin32].
2004/02/02: [new] support ISO-8859-11. (ONIG_ENCODING_ISO_8859_11)
2004/02/02: [new] support ISO-8859-5. (ONIG_ENCODING_ISO_8859_5)
2004/02/02: [impl] should check single byte encoding or not in and_cclass()
and or_cclass().
2004/01/30: [dist] add oniggnu.h.
2004/01/30: [bug] ISO-8859-7 0xb7 (middle dot) is Punct type.
2004/01/30: [new] support ISO-8859-8. (ONIG_ENCODING_ISO_8859_8)
2004/01/29: [new] support ISO-8859-7. (ONIG_ENCODING_ISO_8859_7)
2004/01/29: [new] support ISO-8859-6. (ONIG_ENCODING_ISO_8859_6)
2004/01/28: [new] support KOI8-R. (ONIG_ENCODING_KOI8_R)
2004/01/28: [new] support KOI8. (ONIG_ENCODING_KOI8)
2004/01/27: [dist] rename enc/isotable.c to enc/mktable.c.
2004/01/27: [new] support ISO-8859-4. (ONIG_ENCODING_ISO_8859_4)
2004/01/26: [new] support ISO-8859-3. (ONIG_ENCODING_ISO_8859_3)
2004/01/26: [bug] EncISO_8859_{1,15}_CtypeTable[256] was wrong.
(0x80 - 0xff is not ASCII)
2004/01/23: [new] support ISO-8859-2. (ONIG_ENCODING_ISO_8859_2)
2004/01/23: [dist] add enc/isotable.c.
2004/01/22; [new] support EUC-TW. (ONIG_ENCODING_EUC_TW)
2004/01/22: [bug] definition of GET_ALIGNMENT_PAD_SIZE() and
ALIGNMENT_RIGHT() was wrong.
type casting should be unsigned int, not int.
2004/01/22: [impl] add defined(__x86_64) || defined(__x86_64__)
to unaligned word access condition. (AMD64 ?)
2004/01/21: [dist] rename enc/eucjp.c to enc/euc_jp.c.
2004/01/21; [new] support EUC-KR. (ONIG_ENCODING_EUC_KR)
2004/01/20: [test] success in ruby 1.8.0 (2003-08-08) [i386-cygwin].
2004/01/20: [dist] change Makefile.in.
2004/01/20: [spec] add \p{...}, \P{...} in char class.
2004/01/20: [new] character property operators \p{...}, \P{...}.
supported in ONIG_SYNTAX_JAVA and ONIG_SYNTAX_PERL.
2004/01/19: [spec] allow /a{,n}/ as /a{0,n}/. (but don't allow /a{,}/)
2004/01/19: [dist] rename onigcomp200.h to onigcmpt200.h.
2004/01/19: [dist] update re.c.168.patch. svn add re.c.181.patch.
2004/01/16: [dist] update sample/*.c for new API.
2004/01/16: [dist] add onigcomp200.h. (for old API compatibility)
2004/01/16: [dist] update documents API, RE and RE.ja.
2004/01/16: [spec] change prefix REG_ -> ONIG_, regex_ onig_,
ENC_ -> ONIGENC, enc_ -> onigenc_.
2004/01/15: [impl] rename ENC_IS_MBC_E_WORD() to ENC_IS_MBC_WORD().
rename ENC_CTYPE_SUPPORT_LEVEL_SB_ONLY to
ENC_CTYPE_SUPPORT_LEVEL_SB.
2004/01/14: [impl] rename UNALIGNED_WORD_ACCESS to
PLATFORM_UNALIGNED_WORD_ACCESS.
2004/01/14: [impl] change MATCH_STACK_LIMIT_SIZE value from 200000 to 500000.
2004/01/13: [impl] remove ENC_CODE_TO_MBC_FIRST(enc,code) in regenc.h.
remove code_to_mbc_first member in RegCharEncodingType.
2004/01/13: [impl] remove head byte bitset information in cclass->mbuf.
2003/12/26: [impl] change macro name ismb_xxxx() in enc/*.c for
escape conflict.
2003/12/24: Version 2.0.0
2003/12/24: [spec] ignore case option is effective to numbered char.
ex. /\x61/i =~ "A"
2003/12/24: [test] success in ruby 1.8.1 (2003-12-24) [i686-linux].
2003/12/24: [test] success in ruby 1.8.0 (2003-08-08) [i386-cygwin].
2003/12/24: [test] success in ruby 1.8.0 (2003-08-08) [i386-mswin32].
2003/12/24: [test] success in regex.c compile test on ruby-m17n.
(but can't make miniruby because re.c patch fail.)
2003/12/24: [bug] (thanks H.Miyamoto) /[\W]/ was wrong in 1.9.5.
2003/12/22: [spec] implement fold match on UTF-8 encoding.
2003/12/19: [impl] add ctype_support_level and ctype_add_codes() member to
RegCharEncoding type.
2003/12/19: [impl] add add_ctype_to_cc() in regparse.c.
2003/12/19: [impl] add enc_is_code_ctype() in REG_RUBY_M17N case.
2003/12/19: [impl] change ENC_CODE_TO_MBC() interface.
2003/12/18: [new] implement fold match. (variable number of char
match in ignore case mode.)
ex. German alphabet ess-tsett(U+00DF) match "SS" and "ss".
2003/12/17: [impl] refactoring of encoding system.
2003/12/17: [impl] add enc_init() in regenc.c.
2003/12/17: [new] support Big5. (REG_ENCODING_BIG5)
2003/12/16: [impl] change CodePoint from unsigned int to unsigned long.
2003/12/16: [new] support ISO 8859-15. (REG_ENCODING_ISO_8859_15)
2003/12/16: [impl] change P_() macro definition condition for Win32.
2003/12/16: [dist] add sample/encode.c
2003/12/16: [new] support ISO 8859-1. (REG_ENCODING_ISO_8859_1)
2003/12/15: [impl] rename IS_ENC_XXXX to ENC_IS_XXXX.
2003/12/15: [impl] rename RegDefaultCharEncoding to EncDefaultCharEncoding.
2003/12/15: [impl] divide encoding files. (enc/ascii.c, enc/utf8.c etc...)
2003/12/15: [bug] unexpected infinite loop in regex_snprintf_with_pattern().
change local var. type char* to UChar*.
2003/12/15: [impl] remove REG_MBLEN_TABLE[].
2003/12/15: [spec] rename function prefix regex_get_prev_char_head(),
regex_get_left_adjust_char_head() and
regex_get_right_adjust_char_head() to enc_xxxxxx().
2003/12/15: [impl] rename function prefixes in regenc.h from regex_ to enc_.
2003/12/12: [impl] remove USE_SBMB_CLASS.
2003/12/12: [impl] rename mb -> mbc, mblen() to enc_len().
2003/12/12: [impl] rename WCINT to CodePoint.
2003/12/11: [impl] delete IS_XXXX() ctype macros from regint.h.
2003/12/11: [impl] add enc->wc_is_ctype() and RegAsciiCtypeTable[256].
2003/12/11: [impl] remove RegAsciiCaseAmbigTable.
2003/12/10: [impl] use ENC_TO_LOWER() for ignore case comparison.
2003/12/08: [impl] *** re-defined RegCharEncoding in oniguruma.h. ***
2003/12/08: [impl] add USE_POSIX_REGION_OPTION to regint.h.
2003/12/08: [impl] add IS_ENC_WORD() to regenc.h.
2003/12/05: [impl] rename IS_CODE_XXXX() to IS_ENC_XXXX().
2003/12/05: [impl] delete IS_CODE_WORD() from regenc.h.
2003/12/04: [spec] rename REG_SYN_OP_BACK_REF to REG_SYN_OP_DECIMAL_BACKREF.
2003/12/04: [spec] add (REG_SYN_OP_ESC_W_WORD | REG_SYN_OP_ESC_B_WORD_BOUND |
REG_SYN_OP_ESC_LTGT_WORD_BEGIN_END | REG_SYN_OP_BACK_REF)
to RegSyntaxGrep.
2003/12/04: [spec] remove REG_ENCODING_DEFAULT and REGCODE_DEFAULT.
2003/12/04: [spec] move declarations of regex_get_default_encoding() and
regex_set_default_encoding() from oniguruma.h to regenc.h.
2003/12/03: [new] add regex_get_default_encoding() and
regex_set_default_encoding().
2003/12/03: [spec] REG_ENCODING_DEFAULT meaning is changed.
(current default value, not initial default value.)
2003/12/03: [spec] REGCODE_XXX is obsoleted. use REG_ENCODING_XXX.
2003/12/02: [memo] alias svnst='svn status | grep -v "^\?"'
2003/12/02: [spec] move regex_set_default_trans_table() declaration
from oniguruma.h to regenc.h. (obsoleted API)
2003/12/02: [impl] move variables RegDefaultCharEncoding, DefaultTransTable and
AmbiguityTable to regenc.c.
2003/12/01: [impl] add regex_continuous_sbmb() to regenc.c.
2003/12/01: [dist] add regenc.h and regenc.c.
2003/11/18: [dist] change testconv.rb.
2003/11/18: [bug] (thanks Masaru Tsuda)
memory leak in parse_subexp().
2003/11/18: [bug] (thanks Masaru Tsuda)
memory leak in names_clear() and parse_char_class().
2003/11/17: [bug] memory leak in parse_char_class().
2003/11/17: [bug] (thanks Masaru Tsuda)
OptExactInfo length should not over OPT_EXACT_MAXLEN.
(concat_opt_exact_info_str())
2003/11/12: Version 1.9.5
2003/11/12: [test] success in ruby 1.8.0 (2003-08-08) [i386-cygwin].
2003/11/12: [test] success in ruby 1.8.1 (2003-11-11) [i686-linux].
2003/11/12: [spec] add definition of REG_INEFFECTIVE_META_CHAR.
2003/11/11: [dist] add a sample program sample/sql.c.
2003/11/11: [new] add variable meta character.
regex_set_meta_char()
2003/11/11: [spec] add syntax op. REG_SYN_OP_VARIABLE_META_CHARS.
2003/11/11: [spec] rename REG_SYN_OP_ESC_CAPITAL_Q_QUOTE to
REG_SYN_OP2_ESC_CAPITAL_Q_QUOTE,
REG_SYN_OP_QMARK_GROUP_EFFECT to
REG_SYN_OP2_QMARK_GROUP_EFFECT.
2003/11/06: [impl] define THREAD_PASS as rb_thread_schedule() in Ruby mode.
2003/11/05: [spec] add syntax behavior REG_SYN_WARN_REDUNDANT_NESTED_REPEAT.
2003/11/05: [spec] rename REG_SYN_WARN_FOR_CC_OP_NOT_ESCAPED to
REG_SYN_WARN_CC_OP_NOT_ESCAPED.
2003/11/04: [new] add regex_set_warn_func() and regex_set_verb_warn_func().
2003/10/30: [new] add regex_name_to_backref_number().
(for multiplex definition name, see sample/names.c)
2003/10/30: [spec] add name_end and reg argument to callback function of
regex_foreach_name(). (see sample/names.c)
2003/10/29: [spec] add syntax behavior REG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME.
add error code REGERR_MULTIPLEX_DEFINED_NAME.
2003/10/14: [dist] modify sample/simple.c.
2003/10/03: [bug] (thanks nobu) [ruby-dev:21472]
sub-anchor of optimization map info was wrong
in concat_left_node_opt_info().
ex. /^(x?y)/ = "xy" fail.
2003/09/17: Version 1.9.4
2003/09/17: [spec] change specification of char-class range in ignore case mode
follows with Ruby 1.8(2003-09-17).
ex. /[H-c]/i ==> (H-Z, 0x5b-0x60, a-c)/i
==> H-Z, h-z, 0x5b-0x60, a-c, A-C
2003/09/16: [bug] (thanks Guy Decoux)
remove env->option == option check in parse_effect().
change env->option for dynamic option in parse_exp().
(ex. bad match /(?i)(?-i)a/ =~ "A")
2003/09/12: [spec] rename REG_SYN_ALLOW_RANGE_OP_IN_CC to
REG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC,
REG_SYN_ESCAPE_IN_CC to REG_SYN_BACKSLASH_ESCAPE_IN_CC.
2003/09/11: [bug] change to IS_SYNTAX_OP2 at REG_SYN_OP2_ESC_GNU_BUF_ANCHOR.
2003/09/09: [spec] rename REG_SYN_OP2_ESC_M_BAR_META to
REG_SYN_OP2_ESC_CAPITAL_M_BAR_META,
REG_SYN_OP_ESC_Q_QUOTE to REG_SYN_OP_ESC_CAPITAL_Q_QUOTE,
REG_SYN_OP_ESC_SUBEXP to REG_SYN_OP_ESC_LPAREN_SUBEXP,
REG_SYN_OP_ESC_BUF_ANCHOR to REG_SYN_OP_ESC_AZ_BUF_ANCHOR,
REG_SYN_OP_ESC_GNU_BUF_ANCHOR to
REG_SYN_OP2_ESC_GNU_BUF_ANCHOR,
REG_SYN_OP_ESC_CONTROL_CHAR to REG_SYN_OP_ESC_CONTROL_CHARS,
REG_SYN_OP_ESC_WORD to REG_SYN_OP_ESC_W_WORD,
REG_SYN_OP_ESC_WORD_BEGIN_END to
REG_SYN_OP_ESC_LTGT_WORD_BEGIN_END,
REG_SYN_OP_ESC_WORD_BOUND to REG_SYN_OP_ESC_B_WORD_BOUND,
REG_SYN_OP_ESC_WHITE_SPACE to REG_SYN_OP_ESC_S_WHITE_SPACE,
REG_SYN_OP_ESC_DIGIT to REG_SYN_OP_ESC_D_DIGIT,
REG_SYN_OP_CC to REG_SYN_OP_BRACKET_CC,
REG_SYN_OP2_CCLASS_SET to REG_SYN_OP2_CCLASS_SET_OP,
REG_SYN_CONTEXT_INDEP_OPS to
REG_SYN_CONTEXT_INDEP_REPEAT_OPS,
REG_SYN_CONTEXT_INVALID_REPEAT_OPS to
REG_SYN_CONTEXT_INVALID_REPEAT_OPS.
add REG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR.
2003/09/08: [spec] rename REG_SYN_OP_ANYCHAR to REG_SYN_OP_DOT_ANYCHAR,
REG_SYN_OP_0INF to REG_SYN_OP_ASTERISK_ZERO_INF,
REG_SYN_OP_ESC_0INF to REG_SYN_OP_ESC_ASTERISK_ZERO_INF,
REG_SYN_OP_1INF to REG_SYN_OP_PLUS_ONE_INF,
REG_SYN_OP_ESC_1INF to REG_SYN_OP_ESC_PLUS_ONE_INF,
REG_SYN_OP_0INF to REG_SYN_OP_QMARK_ZERO_ONE,
REG_SYN_OP_ESC_0INF to REG_SYN_OP_ESC_QMARK_ZERO_ONE,
REG_SYN_OP_INTERVAL to REG_SYN_OP_BRACE_INTERVAL,
REG_SYN_OP_ESC_INTERVAL to REG_SYN_OP_ESC_BRACE_INTERVAL,
REG_SYN_OP_SUBEXP to REG_SYN_OP_LPAREN_SUBEXP,
REG_SYN_OP_ALT to REG_SYN_OP_VBAR_ALT,
REG_SYN_OP_ESC_ALT to REG_SYN_OP_ESC_VBAR_ALT,
REG_SYN_OP_NON_GREEDY to REG_SYN_OP_QMARK_NON_GREEDY,
REG_SYN_OP_SUBEXP_EFFECT to REG_SYN_OP_QMARK_GROUP_EFFECT,
REG_SYN_OP2_POSSESSIVE_{REPEAT,INTERVAL} to
REG_SYN_OP2_PLUS_POSSESSIVE_{REPEAT,INTERVAL},
REG_SYN_OP2_SUBEXP_CALL to REG_SYN_OP2_ESC_G_SUBEXP_CALL,
REG_SYN_OP2_NAMED_GROUP to REG_SYN_OP2_QMARK_LT_NAMED_GROUP
and REG_SYN_OP2_ESC_K_NAMED_BACKREF.
2003/09/02: [tune] call reduce_nested_qualifier() after disabling capture for
no-name group in noname_disable_map().
ex. /(a+)*(?<name>...)/
2003/09/02: [impl] include <stdio.h> is forgotten to erase in regcomp.c.
2003/09/01: [dist] update doc/RE and doc/RE.ja.
2003/08/26: [bug] (thanks Guy Decoux)
should not double free node at the case TK_CC_CC_OPEN
in parse_char_class().
2003/08/19: Version 1.9.3
2003/08/19: [inst] change re.c.180.patch.
2003/08/19: [impl] rename 'list of captures' to 'capture history'.
2003/08/19: [dist] add doc/RE.ja. (Japanese)
2003/08/19: [new] add regex_copy_syntax().
2003/08/19: [spec] rename REG_SYN_OP2_ATMARK_LIST_OF_CAPTURES to
REG_SYN_OP2_ATMARK_CAPTURE_HISTORY.
2003/08/18: [spec] (thanks nobu)
don't use IMPORT in oniguruma.h and onigposix.h.
2003/08/18: [impl] (thanks nobu) change error output to stdout in testconv.rb.
2003/08/18: [inst] (thanks nobu) lacked $(srcdir) in Makefile.in.
2003/08/18: [bug] REG_MBLEN_TABLE[SJIS][0xFD-0xFF] should be 1.
2003/08/18: [bug] (thanks nobu) mbctab_sjis[0x80] should be 0.
2003/08/18: [bug] (thanks nobu)
single/multi-byte decision was wrong in parse_char_class().
add regex_wc2mblen().
should not set fetched to 1 in TK_RAW_BYTE case.
2003/08/18: [bug] should update BitSet in the case inc_n >= 0
in add_wc_range_to_buf().
2003/08/13: [bug] change re.c.180.patch for fix rb_reg_to_s() in re.c.
2003/08/11: [bug] should clear region->list in regex_region_resize().
2003/08/08: Version 1.9.2
2003/08/08: [test] success in ruby 1.8.0 (2003-08-08) on Windows 2000
VC++ 6.0 and Cygwin.
2003/08/08: [impl] don't define macro vsnprintf for WIN32 platform,
because definition is added in win32\win32.h.
2003/08/08: [test] success in ruby 1.8.0 and ruby 1.6.8(2003-08-03) on Linux.
2003/08/08: [dist] change re.c.180.patch and re.c.168.patch.
2003/08/08: [new] (thanks akr)
implemented list of captures. (?@...), (?@<name>...)
2003/08/07: [dist] add sample/listcap.c.
2003/08/06: [bug] OP_MEMORY_END_PUSH_REC case in match_at().
renewal of mem_start_stk[] should be after
STACK_PUSH_MEM_END() call.
2003/07/29: [new] add regex_get_encoding(), regex_get_options() and
regex_get_syntax().
2003/07/25: [spec] (thanks akr)
change group(...) to shy-group(?:...) if named group is
used in the pattern.
add REG_SYN_CAPTURE_ONLY_NAMED_GROUP.
2003/07/24: [spec] rename REG_OPTION_CAPTURE_ONLY_NAMED_GROUP to
REG_OPTION_DONT_CAPTURE_GROUP.
add REG_OPTION_CAPTURE_GROUP.
2003/07/17: [spec] rename REG_SYN_OP2_NAMED_SUBEXP to REG_SYN_OP2_NAMED_GROUP.
2003/07/17: [spec] add REGERR_EMPTY_GROUP_NAME.
2003/07/17: [spec] rename REGERR_INVALID_SUBEXP_NAME
to REGERR_INVALID_CHAR_IN_GROUP_NAME.
2003/07/17: [spec] restrict usable chars of group name to alphabet, digit,
'_' or multibyte-char in fetch_name(). [ruby-dev:20706]
2003/07/16: [impl] minor change of sample/names.c.
2003/07/14: [impl] rename USE_NAMED_SUBEXP to USE_NAMED_GROUP.
2003/07/14: [bug] add fetch_name() for USE_NAMED_SUBEXP off case.
2003/07/14: [API] add regex_number_of_names().
2003/07/08: [impl] change error message for undefined group number call.
'undefined group reference: /(a)\g<2>/'
--> 'undefined group <2> reference: /(a)\g<2>/'
2003/07/08: [dist] modify doc/RE.
2003/07/07: [impl] OP_SET_OPTION is not needed in compiled code.
add IS_DYNAMIC_OPTION() to regint.h.
2003/07/07: [spec] called group should not ignore outside option (?i:...).
ex. /(?i:(?<n>(a)\2)){0}\g<n>/.match("aA")
add opcode OP_BACKREFN_IC and OP_BACKREF_MULTI_IC.
set option status to effect memory in optimize_node_left().
2003/07/07: [impl] add opcode OP_ANYCHAR_ML, OP_ANYCHAR_ML_STAR and
OP_ANYCHAR_ML_START_PEEK_NEXT.
2003/07/07: [bug] (thanks nobu) REG_MBLEN_TABLE[SJIS][0x80] should be 1.
2003/07/07: [spec] rename REG_SYN_OP_QUOTE to REG_SYN_OP_ESC_Q_QUOTE.
2003/07/04: Version 1.9.1
2003/07/04: [new] add REG_OPTION_CAPTURE_ONLY_NAMED_GROUP. (thanks .NET)
2003/07/04: [spec] check mbuf member in the case of
REG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC in parse_char_class().
2003/07/04: [impl] typo REG_SYN_WARN_FOR_CC_OP_NOT_ESCAPEED.
2003/07/04: [spec] typo REG_SYN_WARN_FOR_CC_OP_NOT_ESCAPEED.
should be REG_SYN_WARN_FOR_CC_OP_NOT_ESCAPED.
2003/07/04: [bug] conflict values on REG_SYN_WARN_FOR_CC_OP_NOT_ESCAPEED and
REG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC. (thanks nobu)
@ -39,7 +389,7 @@ History
2003/06/20: Version 1.9.0
2003/06/20: [spec] \Q...\E is not effective on REG_SYNTAX_RUBY.
2003/06/20: [spec] \Q...\E is not effective on REG_SYNTAX_RUBY. (thanks akr)
2003/06/19: [inst] rename regex.h to oniguruma.h.
2003/06/18: [impl] change REG_EXTERN setting condition. (__CYGWIN__)
2003/06/18: [bug] return wrong result UTF-8 case in regex_mb2wc().
@ -55,7 +405,7 @@ History
2003/06/13: [bug] should use -DIMPORT for link with DLL in win32/Makefile.
2003/06/13: [dist] add sample/names.c
2003/06/12: [bug] range should be from - 1 in not_wc_range_buf().
2003/06/12: [spec] should warn for '-' befor '&&' operator in char-class.
2003/06/12: [spec] should warn for '-' before '&&' operator in char-class.
2003/06/12: [new] add REG_SYNTAX_PERL.
2003/06/12: [spec] add syntax behavior REG_SYN_WARN_FOR_CC_OP_NOT_ESCAPEED.
2003/06/12: [spec] invalid POSIX bracket should be error. ex. [[:upper :]]
@ -507,6 +857,7 @@ History
--
[bug: bug fix]
[API: API change/new/delete]
[new: new feature]
[spec: specification change]
[impl: implementation change]
@ -515,3 +866,6 @@ History
[dist: distribution change]
[test: test]
[memo: memo]
--
<create tag>
svn copy http://localhost/repos/trunk/oniguruma http://localhost/repos/tags/oniguruma/X.X.X -m "onigdXXXXXXXX"

View file

@ -1,48 +0,0 @@
INSTALL-RUBY 2003/06/12
The way of installing into Ruby is shown.
First, prepare for the source of Ruby.
(http://www.ruby-lang.org/)
A. Unix or Cygwin platform
B. Win32 platform (VC++)
A. Unix or Cygwin platform
(in oniguruma directory)
1. ./configure --with-rubydir=<ruby-source-dir>
2. make 16 # for Ruby 1.6.8
or
make 18 # for Ruby 1.8.0
Or you can specify ruby source directory.
(ex. make 16 RUBYDIR=../ruby)
(in ruby source directory)
3. ./configure (** If it doesn't go yet. **)
4. make clean
5. make
* test (ASCII and EUC-JP)
(in oniguruma directory)
6. make rtest
Or you can specify ruby program directory.
(ex. make rtest RUBYDIR=/usr/local/bin)
B. Win32 platform (VC++)
* Requirement: Visual C++, patch.exe
(in oniguruma directory)
1. copy win32\Makefile Makefile
2. nmake 16 RUBYDIR=<ruby-source-dir> # for Ruby 1.6.8
or
nmake 18 RUBYDIR=<ruby-source-dir> # for Ruby 1.8.0
3. Follow <ruby-source-dir>\win32\README.win32 description...
// END

View file

@ -1,188 +0,0 @@
# Oni Guruma Makefile
product_name = oniguruma
dist_tag = `date '+%Y%m%d'`
SHELL = /bin/sh
AUTOCONF = autoconf
CPPFLAGS =
CFLAGS = @CFLAGS@ @STATISTICS@
LDFLAGS =
LOADLIBES =
AR = ar
ARFLAGS = rc
RANLIB = @RANLIB@
INSTALL = install -c
CP = cp -p
CC = @CC@
DEFS = @DEFS@ -DNOT_RUBY
RUBYDIR = @RUBYDIR@
WIN32 = win32
DOC = doc
srcdir = @srcdir@
VPATH = @srcdir@
prefix = @prefix@
exec_prefix = @exec_prefix@
libdir = $(exec_prefix)/lib
includedir = $(prefix)/include
subdirs =
libname = libonig.a
onigintheaders = regint.h regparse.h
onigheaders = oniguruma.h $(onigintheaders)
posixheaders = onigposix.h
headers = $(posixheaders) $(onigheaders)
onigobjs = regerror.o regparse.o regcomp.o regexec.o reggnu.o
posixobjs = regposix.o regposerr.o
libobjs = $(onigobjs) $(posixobjs)
onigsources = regerror.c regparse.c regcomp.c regexec.c reggnu.c
posixsources = regposix.c regposerr.c
libsources = $(posixsources) $(onigsources)
rubysources = regex.c $(onigsources)
patchfiles = re.c.168.patch re.c.180.patch
distfiles = README COPYING INSTALL-RUBY HISTORY \
.cvsignore Makefile.in configure.in config.h.in configure \
$(headers) $(libsources) regex.c $(patchfiles) \
test.rb testconv.rb $(testc).c
win32distfiles = $(WIN32)/Makefile $(WIN32)/config.h $(WIN32)/testc.c
docfiles = $(DOC)/API $(DOC)/RE
samplefiles = sample/*.c
testc = testc
testp = testp
makeargs = $(MFLAGS) CPPFLAGS='$(CPPFLAGS)' CFLAGS='$(CFLAGS)' CC='$(CC)'
.SUFFIXES:
.SUFFIXES: .o .c .h .ps .dvi .info .texinfo
.c.o:
$(CC) $(CFLAGS) $(CPPFLAGS) $(DEFS) -I. -I$(srcdir) -c $<
# targets
default: all
all: $(libname)
$(libname): $(libobjs)
rm -f $(libname)
$(AR) $(ARFLAGS) $(libname) $(libobjs)
$(RANLIB) $(libname)
regparse.o: regparse.c $(onigheaders) config.h
regcomp.o: regcomp.c $(onigheaders) config.h
regexec.o: regexec.c regint.h oniguruma.h config.h
reggnu.o: reggnu.c regint.h oniguruma.h config.h
regerror.o: regerror.c regint.h oniguruma.h config.h
regposix.o: regposix.c $(posixheaders) oniguruma.h config.h
regposerr.o: regposerr.c $(posixheaders) config.h
install: all
test -d $(libdir) || mkdir $(libdir)
test -d $(includedir) || mkdir $(includedir)
$(INSTALL) $(libname) $(libdir)/$(libname)
$(RANLIB) $(libdir)/$(libname)
$(INSTALL) $(srcdir)/oniguruma.h $(includedir)/oniguruma.h
$(INSTALL) $(srcdir)/onigposix.h $(includedir)/onigposix.h
uninstall:
-rm -f $(libdir)/$(libname)
-rm -f $(includedir)/oniguruma.h
# Ruby test
rtest:
$(RUBYDIR)/ruby -w -Ke test.rb
# C library test
ctest: $(testc)
./$(testc)
# POSIX C library test
ptest: $(testp)
./$(testp)
$(testc): $(testc).c $(libname)
$(CC) $(CFLAGS) -o $@ $(testc).c $(libname)
$(testp): $(testc).c $(libname)
$(CC) -DPOSIX_TEST $(CFLAGS) -o $@ $(testc).c $(libname)
$(testc).c: test.rb testconv.rb
ruby -Ke testconv.rb < test.rb > $@
$(WIN32)/$(testc).c: test.rb testconv.rb
ruby -Ke testconv.rb -win < test.rb | nkf -cs > $@
clean:
rm -f *.o $(libname) $(testc) $(testp) $(testc) *~ win32/*~
distclean: clean
rm -f Makefile config.status
16: cpruby
patch -d $(RUBYDIR) -p0 < re.c.168.patch
18: cpruby
patch -d $(RUBYDIR) -p0 < re.c.180.patch
# backup file suffix
SORIG = ruby_orig
cpruby:
$(CP) $(RUBYDIR)/regex.c $(RUBYDIR)/regex.c.$(SORIG)
$(CP) $(RUBYDIR)/regex.h $(RUBYDIR)/regex.h.$(SORIG)
$(CP) $(RUBYDIR)/re.c $(RUBYDIR)/re.c.$(SORIG)
$(CP) $(rubysources) $(onigintheaders) $(RUBYDIR)
$(CP) oniguruma.h $(RUBYDIR)/regex.h
rback:
$(CP) $(RUBYDIR)/regex.c.$(SORIG) $(RUBYDIR)/regex.c
$(CP) $(RUBYDIR)/regex.h.$(SORIG) $(RUBYDIR)/regex.h
$(CP) $(RUBYDIR)/re.c.$(SORIG) $(RUBYDIR)/re.c
samples:
$(CC) $(CFLAGS) -I. -o sample/simple sample/simple.c $(libname)
$(CC) $(CFLAGS) -I. -o sample/posix sample/posix.c $(libname)
$(CC) $(CFLAGS) -I. -o sample/names sample/names.c $(libname)
configure: configure.in
$(AUTOCONF)
config.status: configure
$(SHELL) ./config.status --recheck
Makefile: Makefile.in config.status
$(SHELL) ./config.status
# Prevent GNU make 3 from overflowing arg limit on system V.
.NOEXPORT:
manifest:
for file in $(distfiles); do echo $$file; done
distdir = $(product_name)
dist_auto: $(testc).c $(WIN32)/$(testc).c
dist: configure dist_auto
rm -rf $(distdir)
mkdir $(distdir)
mkdir $(distdir)/$(DOC)
mkdir $(distdir)/$(WIN32)
mkdir $(distdir)/sample
ln $(distfiles) $(distdir)
ln $(docfiles) $(distdir)/$(DOC)
ln $(win32distfiles) $(distdir)/$(WIN32)
ln $(samplefiles) $(distdir)/sample
tar chf - $(distdir) | gzip > onigd$(dist_tag).tar.gz
rm -rf $(distdir)

View file

@ -1,30 +1,43 @@
README 2003/07/04
README 2004/02/25
Oniguruma ---- (C) K.Kosako <kosako@sofnec.co.jp>
http://www.ruby-lang.org/cgi-bin/cvsweb.cgi/oniguruma/
http://www.freebsd.org/cgi/cvsweb.cgi/ports/devel/oniguruma/
Oniguruma is a regular expression library.
The characteristics of this library is that different character encodings
Oniguruma is a regular expressions library.
The characteristics of this library is that different character encoding
for every regular expression object can be specified.
(Supported character encodings: ASCII, UTF-8, EUC-JP, Shift_JIS)
Supported character encodings:
ASCII, UTF-8,
EUC-JP, EUC-TW, EUC-KR, EUC-CN,
Shift_JIS, Big5, KOI8, KOI8-R,
ISO-8859-1, ISO-8859-2, ISO-8859-3, ISO-8859-4, ISO-8859-5,
ISO-8859-6, ISO-8859-7, ISO-8859-8, ISO-8859-9, ISO-8859-10,
ISO-8859-11, ISO-8859-13, ISO-8859-14, ISO-8859-15, ISO-8859-16
There are two ways of using of it in this program.
* Built-in regular expression engine of Ruby
* C library (supported APIs: GNU regex, POSIX, Oniguruma native)
------------------------------------------------------------
Install
A. Install into Ruby
(A) Install into Ruby
See INSTALL-RUBY.
(character encodings: ASCII, UTF-8, EUC-JP, Shift_JIS)
B. C library
B1. Unix, Cygwin
(B) Install C library
(B-1) Unix and Cygwin platform
1. ./configure
2. make
@ -32,20 +45,20 @@ B. C library
(* uninstall: make uninstall)
* test (EUC-JP)
* test (ASCII/EUC-JP)
4. make ctest
B2. Win32 platform (VC++)
(B-2) Win32 platform (VC++)
1. copy win32\config.h config.h
2. copy win32\Makefile Makefile
1. copy win32\Makefile Makefile
2. copy win32\config.h config.h
3. nmake
onig_s.lib: static link library
onig.dll: dynamic link library
onig_s.lib: static link library
onig.dll: dynamic link library
* test (Shift_JIS)
* test (ASCII/Shift_JIS)
4. copy win32\testc.c testc.c
5. nmake ctest
@ -58,30 +71,72 @@ License
It follows the BSD license in the case of the one except for it.
Source Files
oniguruma.h Oniguruma and GNU regex API header file
regint.h internal definitions
regparse.h internal definitions for regparse.c and regcomp.c
regparse.c parsing functions.
regcomp.c compiling and optimization functions
regerror.c error message function
regex.c source files wrapper for Ruby
regexec.c search and match functions
reggnu.c GNU regex API functions
Regular Expressions
onigposix.h POSIX API header file
regposerr.c POSIX API error message function (regerror)
regposix.c POSIX API functions
See doc/RE (or doc/RE.ja for Japanese).
Sample Programs
sample/simple.c example of the minimum (native API)
sample/posix.c POSIX API sample.
sample/names.c example of the named group callback.
sample/encode.c example of some encodings.
sample/listcap.c example of the capture history.
sample/posix.c POSIX API sample.
sample/sql.c example of the variable meta characters.
(SQL-like pattern matching)
Regular expression
Source Files
oniguruma.h Oniguruma API header file. (public)
oniggnu.h GNU regex API header file. (public)
onigcmpt200.h Oniguruma API backward compatibility header file. (public)
(for 2.0.0 or more older version)
regenc.h character encodings framework header file.
regint.h internal definitions
regparse.h internal definitions for regparse.c and regcomp.c
regcomp.c compiling and optimization functions
regenc.c character encodings framework.
regerror.c error message function
regex.c source files wrapper for Ruby
regexec.c search and match functions
regparse.c parsing functions.
reggnu.c GNU regex API functions
onigposix.h POSIX API header file. (public)
regposerr.c POSIX error message function.
regposix.c POSIX functions.
enc/mktable.c character type table generator.
enc/ascii.c ASCII encoding.
enc/iso8859_1.c ISO-8859-1 encoding. (Latin-1)
enc/iso8859_2.c ISO-8859-2 encoding. (Latin-2)
enc/iso8859_3.c ISO-8859-3 encoding. (Latin-3)
enc/iso8859_4.c ISO-8859-4 encoding. (Latin-4)
enc/iso8859_5.c ISO-8859-5 encoding. (Cyrillic)
enc/iso8859_6.c ISO-8859-6 encoding. (Arabic)
enc/iso8859_7.c ISO-8859-7 encoding. (Greek)
enc/iso8859_8.c ISO-8859-8 encoding. (Hebrew)
enc/iso8859_9.c ISO-8859-9 encoding. (Latin-5 or Turkish)
enc/iso8859_10.c ISO-8859-10 encoding. (Latin-6 or Nordic)
enc/iso8859_11.c ISO-8859-11 encoding. (Thai)
enc/iso8859_13.c ISO-8859-13 encoding. (Latin-7 or Baltic Rim)
enc/iso8859_14.c ISO-8859-14 encoding. (Latin-8 or Celtic)
enc/iso8859_15.c ISO-8859-15 encoding. (Latin-9 or West European with Euro)
enc/iso8859_16.c ISO-8859-16 encoding.
(Latin-10 or South-Eastern European with Euro)
enc/utf8.c UTF-8 encoding.
enc/euc_jp.c EUC-JP encoding.
enc/euc_tw.c EUC-TW encoding.
enc/euc_kr.c EUC-KR, EUC-CN encoding.
enc/sjis.c Shift_JIS encoding.
enc/koi8.c KOI8 encoding.
enc/koi8_r.c KOI8-R encoding.
enc/big5.c Big5 encoding.
See doc/RE.
API differences with Japanized GNU regex(version 0.12) of Ruby
@ -93,22 +148,18 @@ API differences with Japanized GNU regex(version 0.12) of Ruby
ToDo
1 support 16-bit and 31-bit encodings. (UCS-2, UCS-4, UTF-16)
(each encoding has meta-character code table?)
1 support 16-bit encodings. (UTF-16)
2 different encoding pattern with target.
(ex. ASCII/UTF-16, UTF-16 BE and UTF-16 LE)
3 add enc/name.c (onigenc_get_enc_by_name(name))
2 if-then-else. (?(condition)then), (?(condition)then|else)
? variable meta characters.
? implement syntax behavior REG_SYN_CONTEXT_INDEP_ANCHORS.
? pattern encoding different with target.
(ex. UCS-2 Big Endian and UCS-2 Little Endian)
? better acess to hash table.
? transmission stopper. (return ONIG_STOP from match_at())
? implement syntax behavior ONIG_SYN_CONTEXT_INDEP_ANCHORS.
? better acess to hash table (st.c).
non null-terminated key version st_lookup().
(but it needs to modify st.[ch])
? character set specific POSIX bracket extensions. ([:hiragana:])
? grep-like tool 'onigrep'. (variable syntax option etc..)
? check invalid wide char value in WC2MB, WC2MB_FIRST on Ruby M17N.
? define THREAD_PASS in regint.h as rb_thread_pass().
? grep-like tool 'onigrep'.
? return parse tree of regexp pattern to application.
?? /a{n}?/ should be interpreted as /(?:a{n})?/.
?? \h hexadecimal digit char ([0-9a-fA-F]), \H not \h.
and I'm thankful to Akinori MUSHA.

5414
ext/mbstring/oniguruma/configure vendored Executable file

File diff suppressed because it is too large Load diff

View file

@ -1,70 +0,0 @@
dnl Process this file with autoconf to produce a configure script.
AC_INIT(regex.c)
AC_CONFIG_HEADER(config.h)
dnl default value for RUBYDIR
RUBYDIR=".."
AC_ARG_WITH(rubydir,
[ --with-rubydir=RUBYDIR specify value for RUBYDIR (default ..)],
[ RUBYDIR=$withval ])
AC_SUBST(RUBYDIR)
dnl default value for STATISTICS
STATISTICS=""
AC_ARG_WITH(statistics,
[ --with-statistics take matching time statistical data],
[ STATISTICS=-DREG_DEBUG_STATISTICS ])
AC_SUBST(STATISTICS)
dnl Checks for programs.
AC_PROG_CC
AC_PROG_RANLIB
dnl AC_PROG_INSTALL
dnl Checks for libraries.
dnl Checks for header files.
AC_HEADER_STDC
AC_CHECK_HEADERS(stdlib.h string.h strings.h sys/time.h unistd.h sys/times.h)
dnl Checks for typedefs, structures, and compiler characteristics.
AC_CHECK_SIZEOF(int, 4)
AC_CHECK_SIZEOF(short, 2)
AC_CHECK_SIZEOF(long, 4)
AC_C_CONST
AC_HEADER_TIME
dnl Checks for library functions.
AC_FUNC_ALLOCA
AC_FUNC_MEMCMP
AC_CACHE_CHECK(for prototypes, cv_have_prototypes,
[AC_TRY_COMPILE([int foo(int x) { return 0; }], [return foo(10);],
cv_have_prototypes=yes,
cv_have_prototypes=no)])
if test "$cv_have_prototypes" = yes; then
AC_DEFINE(HAVE_PROTOTYPES)
fi
AC_CACHE_CHECK(for variable length prototypes and stdarg.h, cv_stdarg,
[AC_TRY_COMPILE([
#include <stdarg.h>
int foo(int x, ...) {
va_list va;
va_start(va, x);
va_arg(va, int);
va_arg(va, char *);
va_arg(va, double);
return 0;
}
], [return foo(10, "", 3.14);],
cv_stdarg=yes,
cv_stdarg=no)])
if test "$cv_stdarg" = yes; then
AC_DEFINE(HAVE_STDARG_PROTOTYPES)
fi
AC_SUBST()
AC_OUTPUT(Makefile)

View file

@ -1,279 +0,0 @@
Oniguruma API 2003/07/04
declared in regex.h.
# int regex_init(void)
Initialize library.
You don't have to call it explicitly, because it is called in regex_new().
# int regex_error_code_to_str(UChar* err_buf, int err_code, ...)
Return error message string length.
arguments
1 err_buf: error message buffer.
(required size: REG_MAX_ERROR_MESSAGE_LEN)
2 err_code: error code returned from other API functions.
3 err_info (optional): error info returned from regex_new()
and regex_recompile().
# int regex_new(regex_t** reg, UChar* pattern, UChar* pattern_end,
RegOptionType option, RegCharEncoding code, RegSyntaxType* syntax,
RegErrorInfo* err_info)
Create new regex object(regex_t).
normal return: REG_NORMAL
arguments
1 reg: return regex object's address.
2 pattern: regex pattern string.
3 pattern_end: terminate address of pattern. (pattern + pattern length)
4 option: compile time options.
REG_OPTION_NONE no option
REG_OPTION_SINGLELINE '^' -> '\A', '$' -> '\z', '\Z' -> '\z'
REG_OPTION_MULTILINE '.' match with newline
REG_OPTION_IGNORECASE ignore case (case-insensitive)
REG_OPTION_EXTEND extended pattern form
REG_OPTION_FIND_LONGEST find longest match
REG_OPTION_FIND_NOT_EMPTY ignore empty match
REG_OPTION_NEGATE_SINGLELINE
clear REG_OPTION_SINGLELINE which is default on
in REG_SYNTAX_POSIX_XXX, REG_SYNTAX_PERL and REG_SYNTAX_JAVA.
REG_OPTION_CAPTURE_ONLY_NAMED_GROUP named group only captured.
5 code: character encoding.
REGCODE_ASCII ASCII
REGCODE_UTF8 UTF-8
REGCODE_EUCJP EUC-JP
REGCODE_SJIS Shift_JIS
REGCODE_DEFAULT ASCII
6 syntax: pointer to pattern syntax definition.
REG_SYNTAX_POSIX_BASIC POSIX Basic RE
REG_SYNTAX_POSIX_EXTENDED POSIX Extended RE
REG_SYNTAX_EMACS Emacs
REG_SYNTAX_GREP grep
REG_SYNTAX_GNU_REGEX GNU regex
REG_SYNTAX_JAVA Java (Sun java.util.regex)
REG_SYNTAX_PERL Perl
REG_SYNTAX_RUBY Ruby
REG_SYNTAX_DEFAULT default (== Ruby)
regex_set_default_syntax()
or any RegSyntaxType data pointer defined by user.
7 err_info: address for return optional error info.
use this value as 3rd argument of regex_error_code_to_str().
# void regex_free(regex_t* reg)
Free memory used by regex object.
arguments
1 reg: regex object.
# int regex_recompile(regex_t* reg, UChar* pattern, UChar* pattern_end,
RegOptionType option, RegCharEncoding code, RegSyntaxType* syntax,
RegErrorInfo* err_info)
Recompile regex object.
normal return: REG_NORMAL
arguments
1 reg: regex object.
Another arguments are same with regex_new().
# int regex_search(regex_t* reg, UChar* str, UChar* end, UChar* start,
UChar* range, RegRegion* region, RegOptionType option)
Search string and return search result and matching region.
normal return: match position offset (i.e. p - str >= 0)
not found: REG_MISMATCH (< 0)
arguments
1 reg: regex object
2 str: target string
3 end: terminate address of target string
4 start: search start address of target string
5 range: search terminate address of target string
6 region: address for return group match range info (NULL is allowed)
7 option: search time option
REG_OPTION_NOTBOL string head(str) isn't considered as begin of line
REG_OPTION_NOTEOL string end (end) isn't considered as end of line
REG_OPTION_POSIX_REGION region argument is regmatch_t[] of POSIX API.
# int regex_match(regex_t* reg, UChar* str, UChar* end, UChar* at,
RegRegion* region, RegOptionType option)
Match string and return result and matching region.
normal return: match length (i.e. p - at >= 0)
not match: REG_MISMATCH (< 0)
arguments
1 reg: regex object
2 str: target string
3 end: terminate address of target string
4 at: match address of target string
5 region: address for return group match range info (NULL is allowed)
6 option: search time option
REG_OPTION_NOTBOL string head(str) isn't considered as begin of line
REG_OPTION_NOTEOL string end (end) isn't considered as end of line
REG_OPTION_POSIX_REGION region argument is regmatch_t[] of POSIX API.
# RegRegion* regex_region_new(void)
Create a region.
# void regex_region_free(RegRegion* region, int free_self)
Free memory used by region.
arguments
1 region: target region
2 free_self: [1: free all, 0: free memory used in region but not self]
# void regex_region_copy(RegRegion* to, RegRegion* from)
Copy contents of region.
arguments
1 to: target region
2 from: source region
# void regex_region_clear(RegRegion* region)
Clear contents of region.
arguments
1 region: target region
# int regex_region_resize(RegRegion* region, int n)
Resize group range area of region.
normal return: REG_NORMAL
arguments
1 region: target region
2 n: new size
# int regex_name_to_group_numbers(regex_t* reg, UChar* name, UChar* name_end,
int** num_list)
Return group number list of name.
Named subexp is defined by (?<name>....).
normal return: number of groups for the name.
(ex. /(?<x>..)...(?<x>..)/ ==> 2)
name not found: -1
arguments
1 reg: regex object.
2 name: subexp-name.
3 name_end: terminate address of subexp-name.
4 num_list: return list of group number.
# int regex_foreach_names(regex_t* reg, int (*func)(UChar*,int,int*,void*),
void* arg)
Iterate function call for all names.
normal return: 0
error: func's return value.
arguments
1 reg: regex object.
2 func: called function.
func(name, <number of groups>, <group number's list>, arg);
if func return non 0 value, iteration is stopped.
3 arg: argument for func.
# UChar* regex_get_prev_char_head(RegCharEncoding code, UChar* start, UChar* s)
Return previous character head address.
arguments
1 code: character encoding
2 start: string address
3 s: target address of string
# UChar* regex_get_left_adjust_char_head(RegCharEncoding code,
UChar* start, UChar* s)
Return left-adjusted head address of a character.
arguments
1 code: character encoding
2 start: string address
3 s: target address of string
# UChar* regex_get_right_adjust_char_head(RegCharEncoding code,
UChar* start, UChar* s)
Return right-adjusted head address of a character.
arguments
1 code: character encoding
2 start: string address
3 s: target address of string
# int regex_set_default_syntax(RegSyntaxType* syntax)
Set default syntax.
arguments
1 syntax: pointer to pattern syntax definition.
# void regex_set_default_trans_table(UChar* table)
Set default case transformation table.
arguments
1 table: case transformation table
(* this function will be obsoleted in future version)
# int regex_end(void)
The use of this library is finished.
normal return: REG_NORMAL
# const char* regex_version(void)
Return version string. (ex. "1.8.6")
// END

View file

@ -1,224 +0,0 @@
Oniguruma Regular Expressions 2003/07/04
syntax: REG_SYNTAX_RUBY (default)
1. Syntax elements
\ escape
| alternation
(...) group
[...] character class
2. Characters
\t horizontal tab (0x09)
\v vertical tab (0x0B)
\n newline (0x0A)
\r return (0x0D)
\b back space (0x08) (* in character class only)
\f form feed (0x0C)
\a bell (0x07)
\e escape (0x1B)
\nnn octal char
\xHH hexadecimal char
\x{7HHHHHHH} wide hexadecimal char
\cx control char
\C-x control char
\M-x meta (x|0x80)
\M-\C-x meta control char
3. Character types
. any character (except newline)
\w word character (alphanumeric, "_" and multibyte char)
\W non-word char
\s whitespace char (\t, \n, \v, \f, \r, \x20)
\S non-whitespace char
\d digit char
\D non-digit char
4. Quantifier
greedy
? 1 or 0 times
* 0 or more times
+ 1 or more times
{n,m} at least n but not more than m times
{n,} at least n times
{n} n times
reluctant
?? 1 or 0 times
*? 0 or more times
+? 1 or more times
{n,m}? at least n but not more than m times
{n,}? at least n times
possessive (greedy and does not backtrack after repeated)
?+ 1 or 0 times
*+ 0 or more times
++ 1 or more times
5. Anchors
^ beginning of the line
$ end of the line
\b word boundary
\B not word boundary
\A beginning of string
\Z end of string, or before newline at the end
\z end of string
\G previous end-of-match position
6. POSIX character class ([:xxxxx:], negate [:^xxxxx:])
alnum alphabet or digit char
alpha alphabet
ascii code value: [0 - 127]
blank \t, \x20
cntrl
digit 0-9
graph
lower
print
punct
space \t, \n, \v, \f, \r, \x20
upper
xdigit 0-9, a-f, A-F
7. Operators in character class
[...] group (character class in character class)
&& intersection
(lowest precedence operator in character class)
ex. [a-w&&[^c-g]z] ==> ([a-w] and ([^c-g] or z)) ==> [abh-w]
8. Extended expressions
(?#...) comment
(?imx-imx) option on/off
i: ignore case
m: multi-line (dot(.) match newline)
x: extended form
(?imx-imx:subexp) option on/off for subexp
(?:subexp) not captured
(?=subexp) look-ahead
(?!subexp) negative look-ahead
(?<=subexp) look-behind
(?<!subexp) negative look-behind
Subexp of look-behind must be fixed character length.
But different character length is allowed in top level
alternatives only.
ex. (?<=a|bc) is OK. (?<=aaa(?:b|cd)) is not allowed.
(?>subexp) don't backtrack
(?<name>subexp) define named group
(name can not include '>', ')', '\' and NUL character)
9. Back reference
\n back reference by group number (n >= 1)
\k<name> back reference by group name
10. Subexp call ("Tanaka Akira special")
\g<name> call by group name
\g<n> call by group number (only if 'n' is not defined as name)
-----------------------------
11. Original extensions
+ named group (?<name>...)
+ named backref \k<name>
+ subexp call \g<name>, \g<group-num>
12. Lacked features compare with perl 5.8.0
+ [:word:]
+ \N{name}
+ \l,\u,\L,\U, \P, \X, \C
+ (?{code})
+ (??{code})
+ (?(condition)yes-pat|no-pat)
+ \Q...\E (* This is effective on REG_SYNTAX_PERL and REG_SYNTAX_JAVA)
13. Syntax depend options
+ REG_SYNTAX_RUBY (default)
(?m): dot(.) match newline
+ REG_SYNTAX_PERL, REG_SYNTAX_JAVA
(?s): dot(.) match newline
(?m): ^ match after newline, $ match before newline
14. Differences with Japanized GNU regex(version 0.12) of Ruby
+ add look behind
(?<=fixed-char-length-pattern), (?<!fixed-char-length-pattern)
(in negative-look-behind, capture group isn't allowed,
shy group(?:) is allowed.)
+ add possessive quantifier. ?+, *+, ++
+ add operations in character class. [], &&
+ add named group and subexp call.
+ octal or hexadecimal number sequence can be treated as
a multibyte code char in char-class, if multibyte encoding is specified.
(ex. [\xa1\xa2], [\xa1\xa7-\xa4\xa1])
+ effect range of isolated option is to next ')'.
ex. (?:(?i)a|b) is interpreted as (?:(?i:a|b)), not (?:(?i:a)|b).
+ isolated option is not transparent to previous pattern.
ex. a(?i)* is a syntax error pattern.
+ allowed incompleted left brace as an usual char.
ex. /{/, /({)/, /a{2,3/ etc...
+ negative POSIX bracket [:^xxxx:] is supported.
+ POSIX bracket [:ascii:] is added.
+ repeat of look-ahead is not allowd.
ex. /(?=a)*/, /(?!b){5}/
14. Problems
+ Invalid first byte in UTF-8 is allowed.
(which is the same as GNU regex of Ruby)
/./u =~ "\xa3"
Of course, although it is possible to validate,
it will become later than now.
+ Zero-length match in infinite repeat stops the repeat,
and captured group status isn't checked as stop condition.
/()*\1/ =~ "" #=> match
/(?:()|())*\1\2/ =~ "" #=> fail
/(?:\1a|())*/ =~ "a" #=> match with ""
+ Ignore case option is not effect to an octal or hexadecimal
numbered char, but it becomes effective if it appears in the char class.
This doesn't have consistency, though they are the specifications
which are the same as GNU regex of Ruby.
/\x61/i.match("A") # => nil
/[\x61]/i.match("A") # => match
// END

View file

@ -0,0 +1,304 @@
/**********************************************************************
onigcmpt200.h - Oniguruma (regular expression library)
Copyright (C) 2004 K.Kosako (kosako@sofnec.co.jp)
**********************************************************************/
#ifndef ONIGCMPT200_H
#define ONIGCMPT200_H
/* constants */
#define REG_MAX_ERROR_MESSAGE_LEN ONIG_MAX_ERROR_MESSAGE_LEN
#define RegCharEncoding OnigEncoding
#define REG_ENCODING_ASCII ONIG_ENCODING_ASCII
#define REG_ENCODING_ISO_8859_1 ONIG_ENCODING_ISO_8859_1
#define REG_ENCODING_ISO_8859_15 ONIG_ENCODING_ISO_8859_15
#define REG_ENCODING_UTF8 ONIG_ENCODING_UTF8
#define REG_ENCODING_EUC_JP ONIG_ENCODING_EUC_JP
#define REG_ENCODING_SJIS ONIG_ENCODING_SJIS
#define REG_ENCODING_BIG5 ONIG_ENCODING_BIG5
#define REG_ENCODING_UNDEF ONIG_ENCODING_UNDEF
/* Don't use REGCODE_XXXX. (obsoleted) */
#define REGCODE_UNDEF REG_ENCODING_UNDEF
#define REGCODE_ASCII REG_ENCODING_ASCII
#define REGCODE_UTF8 REG_ENCODING_UTF8
#define REGCODE_EUCJP REG_ENCODING_EUC_JP
#define REGCODE_SJIS REG_ENCODING_SJIS
typedef unsigned char* RegTransTableType;
#define RegOptionType OnigOptionType
#define RegDistance OnigDistance
#define REG_OPTION_DEFAULT ONIG_OPTION_DEFAULT
/* options */
#define REG_OPTION_NONE ONIG_OPTION_NONE
#define REG_OPTION_SINGLELINE ONIG_OPTION_SINGLELINE
#define REG_OPTION_MULTILINE ONIG_OPTION_MULTILINE
#define REG_OPTION_IGNORECASE ONIG_OPTION_IGNORECASE
#define REG_OPTION_EXTEND ONIG_OPTION_EXTEND
#define REG_OPTION_FIND_LONGEST ONIG_OPTION_FIND_LONGEST
#define REG_OPTION_FIND_NOT_EMPTY ONIG_OPTION_FIND_NOT_EMPTY
#define REG_OPTION_NEGATE_SINGLELINE ONIG_OPTION_NEGATE_SINGLELINE
#define REG_OPTION_DONT_CAPTURE_GROUP ONIG_OPTION_DONT_CAPTURE_GROUP
#define REG_OPTION_CAPTURE_GROUP ONIG_OPTION_CAPTURE_GROUP
#define REG_OPTION_NOTBOL ONIG_OPTION_NOTBOL
#define REG_OPTION_NOTEOL ONIG_OPTION_NOTEOL
#define REG_OPTION_POSIX_REGION ONIG_OPTION_POSIX_REGION
#define REG_OPTION_ON ONIG_OPTION_ON
#define REG_OPTION_OFF ONIG_OPTION_OFF
#define IS_REG_OPTION_ON ONIG_IS_OPTION_ON
/* syntax */
#define RegSyntaxType OnigSyntaxType
#define RegSyntaxPosixBasic OnigSyntaxPosixBasic
#define RegSyntaxPosixExtended OnigSyntaxPosixExtended
#define RegSyntaxEmacs OnigSyntaxEmacs
#define RegSyntaxGrep OnigSyntaxGrep
#define RegSyntaxGnuRegex OnigSyntaxGnuRegex
#define RegSyntaxJava OnigSyntaxJava
#define RegSyntaxPerl OnigSyntaxPerl
#define RegSyntaxRuby OnigSyntaxRuby
#define REG_SYNTAX_POSIX_BASIC ONIG_SYNTAX_POSIX_BASIC
#define REG_SYNTAX_POSIX_EXTENDED ONIG_SYNTAX_POSIX_EXTENDED
#define REG_SYNTAX_EMACS ONIG_SYNTAX_EMACS
#define REG_SYNTAX_GREP ONIG_SYNTAX_GREP
#define REG_SYNTAX_GNU_REGEX ONIG_SYNTAX_GNU_REGEX
#define REG_SYNTAX_JAVA ONIG_SYNTAX_JAVA
#define REG_SYNTAX_PERL ONIG_SYNTAX_PERL
#define REG_SYNTAX_RUBY ONIG_SYNTAX_RUBY
#define REG_SYNTAX_DEFAULT ONIG_SYNTAX_DEFAULT
#define RegDefaultSyntax OnigDefaultSyntax
/* syntax (operators) */
#define REG_SYN_OP_VARIABLE_META_CHARACTERS \
ONIG_SYN_OP_VARIABLE_META_CHARACTERS
#define REG_SYN_OP_DOT_ANYCHAR \
ONIG_SYN_OP_DOT_ANYCHAR
#define REG_SYN_OP_ASTERISK_ZERO_INF \
ONIG_SYN_OP_ASTERISK_ZERO_INF
#define REG_SYN_OP_ESC_ASTERISK_ZERO_INF \
ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF
#define REG_SYN_OP_PLUS_ONE_INF \
ONIG_SYN_OP_PLUS_ONE_INF
#define REG_SYN_OP_ESC_PLUS_ONE_INF \
ONIG_SYN_OP_ESC_PLUS_ONE_INF
#define REG_SYN_OP_QMARK_ZERO_ONE \
ONIG_SYN_OP_QMARK_ZERO_ONE
#define REG_SYN_OP_ESC_QMARK_ZERO_ONE \
ONIG_SYN_OP_ESC_QMARK_ZERO_ONE
#define REG_SYN_OP_BRACE_INTERVAL \
ONIG_SYN_OP_BRACE_INTERVAL
#define REG_SYN_OP_ESC_BRACE_INTERVAL \
ONIG_SYN_OP_ESC_BRACE_INTERVAL
#define REG_SYN_OP_VBAR_ALT \
ONIG_SYN_OP_VBAR_ALT
#define REG_SYN_OP_ESC_VBAR_ALT \
ONIG_SYN_OP_ESC_VBAR_ALT
#define REG_SYN_OP_LPAREN_SUBEXP \
ONIG_SYN_OP_LPAREN_SUBEXP
#define REG_SYN_OP_ESC_LPAREN_SUBEXP \
ONIG_SYN_OP_ESC_LPAREN_SUBEXP
#define REG_SYN_OP_ESC_AZ_BUF_ANCHOR \
ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR
#define REG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR \
ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR
#define REG_SYN_OP_DECIMAL_BACKREF \
ONIG_SYN_OP_DECIMAL_BACKREF
#define REG_SYN_OP_BRACKET_CC \
ONIG_SYN_OP_BRACKET_CC
#define REG_SYN_OP_ESC_W_WORD \
ONIG_SYN_OP_ESC_W_WORD
#define REG_SYN_OP_ESC_LTGT_WORD_BEGIN_END \
ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END
#define REG_SYN_OP_ESC_B_WORD_BOUND \
ONIG_SYN_OP_ESC_B_WORD_BOUND
#define REG_SYN_OP_ESC_S_WHITE_SPACE \
ONIG_SYN_OP_ESC_S_WHITE_SPACE
#define REG_SYN_OP_ESC_D_DIGIT \
ONIG_SYN_OP_ESC_D_DIGIT
#define REG_SYN_OP_LINE_ANCHOR \
ONIG_SYN_OP_LINE_ANCHOR
#define REG_SYN_OP_POSIX_BRACKET \
ONIG_SYN_OP_POSIX_BRACKET
#define REG_SYN_OP_QMARK_NON_GREEDY \
ONIG_SYN_OP_QMARK_NON_GREEDY
#define REG_SYN_OP_ESC_CONTROL_CHARS \
ONIG_SYN_OP_ESC_CONTROL_CHARS
#define REG_SYN_OP_ESC_C_CONTROL \
ONIG_SYN_OP_ESC_C_CONTROL
#define REG_SYN_OP_ESC_OCTAL3 \
ONIG_SYN_OP_ESC_OCTAL3
#define REG_SYN_OP_ESC_X_HEX2 \
ONIG_SYN_OP_ESC_X_HEX2
#define REG_SYN_OP_ESC_X_BRACE_HEX8 \
ONIG_SYN_OP_ESC_X_BRACE_HEX8
#define REG_SYN_OP2_ESC_CAPITAL_Q_QUOTE \
ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE
#define REG_SYN_OP2_QMARK_GROUP_EFFECT \
ONIG_SYN_OP2_QMARK_GROUP_EFFECT
#define REG_SYN_OP2_OPTION_PERL \
ONIG_SYN_OP2_OPTION_PERL
#define REG_SYN_OP2_OPTION_RUBY \
ONIG_SYN_OP2_OPTION_RUBY
#define REG_SYN_OP2_PLUS_POSSESSIVE_REPEAT \
ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT
#define REG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL \
ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL
#define REG_SYN_OP2_CCLASS_SET_OP \
ONIG_SYN_OP2_CCLASS_SET_OP
#define REG_SYN_OP2_QMARK_LT_NAMED_GROUP \
ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP
#define REG_SYN_OP2_ESC_K_NAMED_BACKREF \
ONIG_SYN_OP2_ESC_K_NAMED_BACKREF
#define REG_SYN_OP2_ESC_G_SUBEXP_CALL \
ONIG_SYN_OP2_ESC_G_SUBEXP_CALL
#define REG_SYN_OP2_ATMARK_CAPTURE_HISTORY \
ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY
#define REG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL \
ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL
#define REG_SYN_OP2_ESC_CAPITAL_M_BAR_META \
ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META
#define REG_SYN_OP2_ESC_V_VTAB \
ONIG_SYN_OP2_ESC_V_VTAB
#define REG_SYN_OP2_ESC_U_HEX4 \
ONIG_SYN_OP2_ESC_U_HEX4
#define REG_SYN_OP2_ESC_GNU_BUF_ANCHOR \
ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR
#define REG_SYN_CONTEXT_INDEP_ANCHORS \
ONIG_SYN_CONTEXT_INDEP_ANCHORS
#define REG_SYN_CONTEXT_INDEP_REPEAT_OPS \
ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS
#define REG_SYN_CONTEXT_INVALID_REPEAT_OPS \
ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS
#define REG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP \
ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP
#define REG_SYN_ALLOW_INVALID_INTERVAL \
ONIG_SYN_ALLOW_INVALID_INTERVAL
#define REG_SYN_STRICT_CHECK_BACKREF \
ONIG_SYN_STRICT_CHECK_BACKREF
#define REG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND \
ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND
#define REG_SYN_CAPTURE_ONLY_NAMED_GROUP \
ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP
#define REG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME \
ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME
#define REG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC \
ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC
#define REG_SYN_BACKSLASH_ESCAPE_IN_CC \
ONIG_SYN_BACKSLASH_ESCAPE_IN_CC
#define REG_SYN_ALLOW_EMPTY_RANGE_IN_CC \
ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC
#define REG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC \
ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC
#define REG_SYN_WARN_CC_OP_NOT_ESCAPED \
ONIG_SYN_WARN_CC_OP_NOT_ESCAPED
#define REG_SYN_WARN_REDUNDANT_NESTED_REPEAT \
ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT
/* meta character specifiers (regex_set_meta_char()) */
#define REG_META_CHAR_ESCAPE ONIG_META_CHAR_ESCAPE
#define REG_META_CHAR_ANYCHAR ONIG_META_CHAR_ANYCHAR
#define REG_META_CHAR_ANYTIME ONIG_META_CHAR_ANYTIME
#define REG_META_CHAR_ZERO_OR_ONE_TIME ONIG_META_CHAR_ZERO_OR_ONE_TIME
#define REG_META_CHAR_ONE_OR_MORE_TIME ONIG_META_CHAR_ONE_OR_MORE_TIME
#define REG_META_CHAR_ANYCHAR_ANYTIME ONIG_META_CHAR_ANYCHAR_ANYTIME
#define REG_INEFFECTIVE_META_CHAR ONIG_INEFFECTIVE_META_CHAR
/* error codes */
#define REG_IS_PATTERN_ERROR ONIG_IS_PATTERN_ERROR
/* normal return */
#define REG_NORMAL ONIG_NORMAL
#define REG_MISMATCH ONIG_MISMATCH
#define REG_NO_SUPPORT_CONFIG ONIG_NO_SUPPORT_CONFIG
/* internal error */
#define REGERR_MEMORY ONIGERR_MEMORY
#define REGERR_MATCH_STACK_LIMIT_OVER ONIGERR_MATCH_STACK_LIMIT_OVER
#define REGERR_TYPE_BUG ONIGERR_TYPE_BUG
#define REGERR_PARSER_BUG ONIGERR_PARSER_BUG
#define REGERR_STACK_BUG ONIGERR_STACK_BUG
#define REGERR_UNDEFINED_BYTECODE ONIGERR_UNDEFINED_BYTECODE
#define REGERR_UNEXPECTED_BYTECODE ONIGERR_UNEXPECTED_BYTECODE
#define REGERR_DEFAULT_ENCODING_IS_NOT_SETTED \
ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED
#define REGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR \
ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR
/* general error */
#define REGERR_INVALID_ARGUMENT ONIGERR_INVALID_ARGUMENT
/* errors related to thread */
#define REGERR_OVER_THREAD_PASS_LIMIT_COUNT \
ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT
/* must be smaller than BIT_STATUS_BITS_NUM (unsigned int * 8) */
#define REG_MAX_CAPTURE_HISTORY_GROUP ONIG_MAX_CAPTURE_HISTORY_GROUP
#define REG_IS_CAPTURE_HISTORY_GROUP ONIG_IS_CAPTURE_HISTORY_GROUP
#define REG_REGION_NOTPOS ONIG_REGION_NOTPOS
#define RegRegion OnigRegion
#define RegErrorInfo OnigErrorInfo
#define RegRepeatRange OnigRepeatRange
#define RegWarnFunc OnigWarnFunc
#define regex_null_warn onig_null_warn
#define REG_NULL_WARN ONIG_NULL_WARN
/* regex_t state */
#define REG_STATE_NORMAL ONIG_STATE_NORMAL
#define REG_STATE_SEARCHING ONIG_STATE_SEARCHING
#define REG_STATE_COMPILING ONIG_STATE_COMPILING
#define REG_STATE_MODIFY ONIG_STATE_MODIFY
#define REG_STATE ONIG_STATE
/* Oniguruma Native API */
#define regex_init onig_init
#define regex_error_code_to_str onig_error_code_to_str
#define regex_set_warn_func onig_set_warn_func
#define regex_set_verb_warn_func onig_set_verb_warn_func
#define regex_new onig_new
#define regex_free onig_free
#define regex_recompile onig_recompile
#define regex_search onig_search
#define regex_match onig_match
#define regex_region_new onig_region_new
#define regex_region_free onig_region_free
#define regex_region_copy onig_region_copy
#define regex_region_clear onig_region_clear
#define regex_region_resize onig_region_resize
#define regex_name_to_group_numbers onig_name_to_group_numbers
#define regex_name_to_backref_number onig_name_to_backref_number
#define regex_foreach_name onig_foreach_name
#define regex_number_of_names onig_number_of_names
#define regex_get_encoding onig_get_encoding
#define regex_get_options onig_get_options
#define regex_get_syntax onig_get_syntax
#define regex_set_default_syntax onig_set_default_syntax
#define regex_copy_syntax onig_copy_syntax
#define regex_set_meta_char onig_set_meta_char
#define regex_end onig_end
#define regex_version onig_version
/* encoding API */
#define enc_get_prev_char_head onigenc_get_prev_char_head
#define enc_get_left_adjust_char_head onigenc_get_left_adjust_char_head
#define enc_get_right_adjust_char_head onigenc_get_right_adjust_char_head
/* obsoleted API */
#define regex_get_prev_char_head onigenc_get_prev_char_head
#define regex_get_left_adjust_char_head onigenc_get_left_adjust_char_head
#define regex_get_right_adjust_char_head onigenc_get_right_adjust_char_head
#endif /* ONIGCMPT200_H */

View file

@ -0,0 +1,77 @@
/**********************************************************************
oniggnu.h - Oniguruma (regular expression library)
Copyright (C) 2004 K.Kosako (kosako@sofnec.co.jp)
**********************************************************************/
#ifndef ONIGGNU_H
#define ONIGGNU_H
#include "oniguruma.h"
#define MBCTYPE_ASCII 0
#define MBCTYPE_EUC 1
#define MBCTYPE_SJIS 2
#define MBCTYPE_UTF8 3
/* GNU regex options */
#ifndef RE_NREGS
#define RE_NREGS ONIG_NREGION
#endif
#define RE_OPTION_IGNORECASE ONIG_OPTION_IGNORECASE
#define RE_OPTION_EXTENDED ONIG_OPTION_EXTEND
#define RE_OPTION_MULTILINE ONIG_OPTION_MULTILINE
#define RE_OPTION_SINGLELINE ONIG_OPTION_SINGLELINE
#define RE_OPTION_LONGEST ONIG_OPTION_FIND_LONGEST
#define RE_OPTION_POSIXLINE (RE_OPTION_MULTILINE|RE_OPTION_SINGLELINE)
#ifdef RUBY_PLATFORM
#define re_mbcinit ruby_re_mbcinit
#define re_compile_pattern ruby_re_compile_pattern
#define re_recompile_pattern ruby_re_recompile_pattern
#define re_free_pattern ruby_re_free_pattern
#define re_adjust_startpos ruby_re_adjust_startpos
#define re_search ruby_re_search
#define re_match ruby_re_match
#define re_set_casetable ruby_re_set_casetable
#define re_copy_registers ruby_re_copy_registers
#define re_free_registers ruby_re_free_registers
#define register_info_type ruby_register_info_type
#define re_error_code_to_str ruby_error_code_to_str
#define ruby_error_code_to_str onig_error_code_to_str
#define ruby_re_copy_registers onig_region_copy
#else
#define re_error_code_to_str onig_error_code_to_str
#define re_copy_registers onig_region_copy
#endif
#ifdef ONIG_RUBY_M17N
ONIG_EXTERN
void re_mbcinit P_((OnigEncoding));
#else
ONIG_EXTERN
void re_mbcinit P_((int));
#endif
ONIG_EXTERN
int re_compile_pattern P_((const char*, int, struct re_pattern_buffer*, char* err_buf));
ONIG_EXTERN
int re_recompile_pattern P_((const char*, int, struct re_pattern_buffer*, char* err_buf));
ONIG_EXTERN
void re_free_pattern P_((struct re_pattern_buffer*));
ONIG_EXTERN
int re_adjust_startpos P_((struct re_pattern_buffer*, const char*, int, int, int));
ONIG_EXTERN
int re_search P_((struct re_pattern_buffer*, const char*, int, int, int, struct re_registers*));
ONIG_EXTERN
int re_match P_((struct re_pattern_buffer*, const char *, int, int, struct re_registers*));
ONIG_EXTERN
void re_set_casetable P_((const char*));
ONIG_EXTERN
void re_free_registers P_((struct re_registers*));
ONIG_EXTERN
int re_alloc_pattern P_((struct re_pattern_buffer**)); /* added */
#endif /* ONIGGNU_H */

View file

@ -2,7 +2,7 @@
onigposix.h - Oniguruma (regular expression library)
Copyright (C) 2003 K.Kosako (kosako@sofnec.co.jp)
Copyright (C) 2003-2004 K.Kosako (kosako@sofnec.co.jp)
**********************************************************************/
#ifndef ONIGPOSIX_H
@ -13,7 +13,7 @@
#define REG_NEWLINE (1<<1)
#define REG_NOTBOL (1<<2)
#define REG_NOTEOL (1<<3)
#define REG_EXTENDED (1<<4) /* if not setted, Basic Regular Expression */
#define REG_EXTENDED (1<<4) /* if not setted, Basic Onigular Expression */
#define REG_NOSUB (1<<5)
/* POSIX error codes */
@ -38,11 +38,10 @@
#define REG_EONIG_THREAD 17
/* character encodings (for reg_set_encoding()) */
/* These value must be same with MBCTYPE_XXXX in oniguruma.h.*/
#define REG_ENCODING_ASCII 0
#define REG_ENCODING_EUC_JP 1
#define REG_ENCODING_SJIS 2
#define REG_ENCODING_UTF8 3
#define REG_POSIX_ENCODING_ASCII 0
#define REG_POSIX_ENCODING_EUC_JP 1
#define REG_POSIX_ENCODING_SJIS 2
#define REG_POSIX_ENCODING_UTF8 3
#include <stdlib.h>
@ -63,73 +62,75 @@ typedef struct {
#ifndef P_
#ifdef __STDC__
#if defined(__STDC__) || defined(_WIN32)
# define P_(args) args
#else
# define P_(args) ()
#endif
#endif
#ifndef REG_EXTERN
#ifndef ONIG_EXTERN
#if defined(_WIN32) && !defined(__CYGWIN__)
#if defined(EXPORT) || defined(RUBY_EXPORT)
#define REG_EXTERN extern __declspec(dllexport)
#elif defined(IMPORT)
#define REG_EXTERN extern __declspec(dllimport)
#define ONIG_EXTERN extern __declspec(dllexport)
#else
#define ONIG_EXTERN extern __declspec(dllimport)
#endif
#endif
#endif
#ifndef REG_EXTERN
#define REG_EXTERN extern
#ifndef ONIG_EXTERN
#define ONIG_EXTERN extern
#endif
#ifndef ONIGURUMA_H
typedef unsigned int RegOptionType;
typedef unsigned int OnigOptionType;
/* syntax */
typedef struct {
unsigned int op;
unsigned int op2;
unsigned int behavior;
RegOptionType options; /* default option */
} RegSyntaxType;
OnigOptionType options; /* default option */
} OnigSyntaxType;
REG_EXTERN RegSyntaxType RegSyntaxPosixBasic;
REG_EXTERN RegSyntaxType RegSyntaxPosixExtended;
REG_EXTERN RegSyntaxType RegSyntaxEmacs;
REG_EXTERN RegSyntaxType RegSyntaxGrep;
REG_EXTERN RegSyntaxType RegSyntaxGnuRegex;
REG_EXTERN RegSyntaxType RegSyntaxJava;
REG_EXTERN RegSyntaxType RegSyntaxPerl;
REG_EXTERN RegSyntaxType RegSyntaxRuby;
ONIG_EXTERN OnigSyntaxType OnigSyntaxPosixBasic;
ONIG_EXTERN OnigSyntaxType OnigSyntaxPosixExtended;
ONIG_EXTERN OnigSyntaxType OnigSyntaxEmacs;
ONIG_EXTERN OnigSyntaxType OnigSyntaxGrep;
ONIG_EXTERN OnigSyntaxType OnigSyntaxGnuRegex;
ONIG_EXTERN OnigSyntaxType OnigSyntaxJava;
ONIG_EXTERN OnigSyntaxType OnigSyntaxPerl;
ONIG_EXTERN OnigSyntaxType OnigSyntaxRuby;
/* predefined syntaxes (see regcomp.c) */
#define REG_SYNTAX_POSIX_BASIC (&RegSyntaxPosixBasic)
#define REG_SYNTAX_POSIX_EXTENDED (&RegSyntaxPosixExtended)
#define REG_SYNTAX_EMACS (&RegSyntaxEmacs)
#define REG_SYNTAX_GREP (&RegSyntaxGrep)
#define REG_SYNTAX_GNU_REGEX (&RegSyntaxGnuRegex)
#define REG_SYNTAX_JAVA (&RegSyntaxJava)
#define REG_SYNTAX_PERL (&RegSyntaxPerl)
#define REG_SYNTAX_RUBY (&RegSyntaxRuby)
/* predefined syntaxes (see regparse.c) */
#define ONIG_SYNTAX_POSIX_BASIC (&OnigSyntaxPosixBasic)
#define ONIG_SYNTAX_POSIX_EXTENDED (&OnigSyntaxPosixExtended)
#define ONIG_SYNTAX_EMACS (&OnigSyntaxEmacs)
#define ONIG_SYNTAX_GREP (&OnigSyntaxGrep)
#define ONIG_SYNTAX_GNU_REGEX (&OnigSyntaxGnuRegex)
#define ONIG_SYNTAX_JAVA (&OnigSyntaxJava)
#define ONIG_SYNTAX_PERL (&OnigSyntaxPerl)
#define ONIG_SYNTAX_RUBY (&OnigSyntaxRuby)
/* default syntax */
#define REG_SYNTAX_DEFAULT RegDefaultSyntax
#define ONIG_SYNTAX_DEFAULT OnigDefaultSyntax
REG_EXTERN RegSyntaxType* RegDefaultSyntax;
ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax;
REG_EXTERN int regex_set_default_syntax P_((RegSyntaxType* syntax));
ONIG_EXTERN int onig_set_default_syntax P_((OnigSyntaxType* syntax));
ONIG_EXTERN void onig_copy_syntax P_((OnigSyntaxType* to, OnigSyntaxType* from));
#endif /* ONIGURUMA_H */
REG_EXTERN int regcomp P_((regex_t* reg, const char* pat, int options));
REG_EXTERN int regexec P_((regex_t* reg, const char* str, size_t nmatch, regmatch_t* matches, int options));
REG_EXTERN void regfree P_((regex_t* reg));
REG_EXTERN size_t regerror P_((int code, const regex_t* reg, char* buf, size_t size));
ONIG_EXTERN int regcomp P_((regex_t* reg, const char* pat, int options));
ONIG_EXTERN int regexec P_((regex_t* reg, const char* str, size_t nmatch, regmatch_t* matches, int options));
ONIG_EXTERN void regfree P_((regex_t* reg));
ONIG_EXTERN size_t regerror P_((int code, const regex_t* reg, char* buf, size_t size));
/* extended API */
REG_EXTERN void reg_set_encoding P_((int enc));
REG_EXTERN int reg_name_to_group_numbers P_((regex_t* reg, unsigned char* name, unsigned char* name_end, int** nums));
REG_EXTERN int reg_foreach_name P_((regex_t* reg, int (*func)(unsigned char*,int,int*,void*), void* arg));
ONIG_EXTERN void reg_set_encoding P_((int enc));
ONIG_EXTERN int reg_name_to_group_numbers P_((regex_t* reg, unsigned char* name, unsigned char* name_end, int** nums));
ONIG_EXTERN int reg_foreach_name P_((regex_t* reg, int (*func)(unsigned char*,unsigned char*,int,int*,regex_t*,void*), void* arg));
ONIG_EXTERN int reg_number_of_names P_((regex_t* reg));
#endif /* ONIGPOSIX_H */

File diff suppressed because it is too large Load diff

View file

@ -1,46 +1,7 @@
#ifndef _PHP_MBREGEX_COMPAT_H
#define _PHP_MBREGEX_COMPAT_H
#define RegCharEncoding php_mb_reg_char_encoding
#define RegRegion php_mb_reg_region
#define RegDefaultCharEncoding php_mb_reg_default_char_encoding
#define REG_MBLEN_TABLE PHP_MBSTR_REG_MBLEN_TABLE
#define RegSyntaxType php_mb_reg_syntax_type
#define RegOptionType php_mb_reg_option_type
#define re_registers php_mb_re_registers
#define RegErrorInfo php_mb_reg_error_info
#define re_pattern_buffer php_mb_re_pattern_buffer
#define regex_t php_mb_regex_t
#define regex_init php_mb_regex_init
#define regex_new php_mb_regex_new
#define regex_free php_mb_regex_free
#define regex_recompile php_mb_regex_recompile
#define regex_search php_mb_regex_search
#define regex_match php_mb_regex_match
#define regex_region_new php_mb_regex_region_new
#define regex_region_free php_mb_regex_region_free
#define regex_region_copy php_mb_regex_region_copy
#define regex_region_clear php_mb_regex_region_clear
#define regex_region_resize php_mb_regex_region_resize
#define regex_name_to_group_numbers php_mb_regex_name_to_group_numbers
#define regex_foreach_names php_mb_regex_foreach_names
#define regex_get_prev_char_head php_mb_regex_get_prev_char_head
#define regex_get_left_adjust_char_head php_mb_get_left_adjust_char_head
#define regex_get_right_adjust_char_head php_mb_get_right_adjust_char_head
#define regex_set_default_trans_table php_mb_get_default_trans_table
#define regex_set_default_syntax php_mb_regex_set_default_syntax
#define regex_end php_mb_regex_end
#define re_mbcinit php_mb_re_mbcinit
#define re_compile_pattern php_mb_re_compile_pattern
#define re_recompile_pattern php_mb_re_recompile_pattern
#define re_free_pattern php_mb_re_free_pattern
#define re_adjust_startpos php_mb_re_adjust_startpos
#define re_search php_mb_re_search
#define re_match php_mb_re_match
#define re_set_casetable php_mb_re_set_casetable
#define php_mbregex_region_copy php_mb_re_copy_registers
#define re_free_registers php_mb_re_free_registers
#define register_info_type php_mb_register_info_type
#define regex_error_code_to_str php_mb_regex_error_code_to_str
#endif /* _PHP_MBREGEX_COMPAT_H */

View file

@ -1,56 +0,0 @@
--- re.c.ruby_orig Tue Feb 4 15:52:29 2003
+++ re.c Tue Mar 18 19:37:49 2003
@@ -380,7 +380,8 @@ make_regexp(s, len, flag)
int len, flag;
{
Regexp *rp;
- char *err;
+ char err[REG_MAX_ERROR_MESSAGE_LEN];
+ int r;
/* Handle escaped characters first. */
@@ -389,16 +390,17 @@ make_regexp(s, len, flag)
from that.
*/
- rp = ALLOC(Regexp);
- MEMZERO((char *)rp, Regexp, 1);
- rp->buffer = ALLOC_N(char, 16);
- rp->allocated = 16;
- rp->fastmap = ALLOC_N(char, 256);
+ r = re_alloc_pattern(&rp);
+ if (r) {
+ re_error_code_to_str(err, r);
+ rb_reg_raise(s, len, err, 0);
+ }
+
if (flag) {
rp->options = flag;
}
- err = re_compile_pattern(s, len, rp);
- if (err != NULL) {
+ r = re_compile_pattern(s, len, rp, err);
+ if (r != 0) {
rb_reg_raise(s, len, err, 0);
}
@@ -532,14 +534,14 @@ rb_reg_prepare_re(re)
}
if (need_recompile) {
- char *err;
+ char err[REG_MAX_ERROR_MESSAGE_LEN];
+ int r;
if (FL_TEST(re, KCODE_FIXED))
kcode_set_option(re);
rb_reg_check(re);
- RREGEXP(re)->ptr->fastmap_accurate = 0;
- err = re_compile_pattern(RREGEXP(re)->str, RREGEXP(re)->len, RREGEXP(re)->ptr);
- if (err != NULL) {
+ r = re_recompile_pattern(RREGEXP(re)->str, RREGEXP(re)->len, RREGEXP(re)->ptr, err);
+ if (r != 0) {
rb_reg_raise(RREGEXP(re)->str, RREGEXP(re)->len, err, re);
}
}

View file

@ -1,66 +0,0 @@
--- re.c.ruby_orig Fri Feb 7 15:35:26 2003
+++ re.c Tue Mar 18 18:51:21 2003
@@ -444,7 +444,7 @@ rb_reg_to_s(re)
kcode_set_option(re);
rp = ALLOC(Regexp);
MEMZERO((char *)rp, Regexp, 1);
- err = re_compile_pattern(++ptr, len -= 2, rp) != 0;
+ err = (re_compile_pattern(++ptr, len -= 2, rp, NULL) != 0);
kcode_reset_option();
re_free_pattern(rp);
}
@@ -538,7 +538,8 @@ make_regexp(s, len, flags)
int flags;
{
Regexp *rp;
- char *err;
+ char err[REG_MAX_ERROR_MESSAGE_LEN];
+ int r;
/* Handle escaped characters first. */
@@ -547,17 +548,18 @@ make_regexp(s, len, flags)
from that.
*/
- rp = ALLOC(Regexp);
- MEMZERO((char *)rp, Regexp, 1);
- rp->buffer = ALLOC_N(char, 16);
- rp->allocated = 16;
- rp->fastmap = ALLOC_N(char, 256);
+ r = re_alloc_pattern(&rp);
+ if (r) {
+ re_error_code_to_str((UChar* )err, r);
+ rb_reg_raise(s, len, err, 0);
+ }
+
if (flags) {
rp->options = flags;
}
- err = re_compile_pattern(s, len, rp);
+ r = re_compile_pattern(s, len, rp, err);
- if (err != NULL) {
+ if (r != 0) {
rb_reg_raise(s, len, err, 0);
}
return rp;
@@ -692,14 +694,14 @@ rb_reg_prepare_re(re)
}
if (need_recompile) {
- char *err;
+ char err[REG_MAX_ERROR_MESSAGE_LEN];
+ int r;
if (FL_TEST(re, KCODE_FIXED))
kcode_set_option(re);
rb_reg_check(re);
- RREGEXP(re)->ptr->fastmap_accurate = 0;
- err = re_compile_pattern(RREGEXP(re)->str, RREGEXP(re)->len, RREGEXP(re)->ptr);
- if (err != NULL) {
+ r = re_recompile_pattern(RREGEXP(re)->str, RREGEXP(re)->len, RREGEXP(re)->ptr, err);
+ if (r != 0) {
rb_reg_raise(RREGEXP(re)->str, RREGEXP(re)->len, err, re);
}
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,586 @@
/**********************************************************************
regenc.c - Oniguruma (regular expression library)
Copyright (C) 2003-2004 K.Kosako (kosako@sofnec.co.jp)
**********************************************************************/
#include "regenc.h"
OnigEncoding OnigEncDefaultCharEncoding = ONIG_ENCODING_INIT_DEFAULT;
extern int
onigenc_init()
{
return 0;
}
extern OnigEncoding
onigenc_get_default_encoding()
{
return OnigEncDefaultCharEncoding;
}
extern int
onigenc_set_default_encoding(OnigEncoding enc)
{
OnigEncDefaultCharEncoding = enc;
return 0;
}
extern UChar*
onigenc_get_right_adjust_char_head(OnigEncoding enc, UChar* start, UChar* s)
{
UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);
if (p < s) {
p += enc_len(enc, *p);
}
return p;
}
extern UChar*
onigenc_get_right_adjust_char_head_with_prev(OnigEncoding enc,
UChar* start, UChar* s, UChar** prev)
{
UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);
if (p < s) {
if (prev) *prev = p;
p += enc_len(enc, *p);
}
else {
if (prev) *prev = (UChar* )NULL; /* Sorry */
}
return p;
}
extern UChar*
onigenc_get_prev_char_head(OnigEncoding enc, UChar* start, UChar* s)
{
if (s <= start)
return (UChar* )NULL;
return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1);
}
extern UChar*
onigenc_step_back(OnigEncoding enc, UChar* start, UChar* s, int n)
{
while (ONIG_IS_NOT_NULL(s) && n-- > 0) {
if (s <= start)
return (UChar* )NULL;
s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1);
}
return s;
}
#ifndef ONIG_RUBY_M17N
#ifndef NOT_RUBY
#define USE_APPLICATION_TO_LOWER_CASE_TABLE
#endif
UChar* OnigEncAsciiToLowerCaseTable = (UChar* )0;
#ifndef USE_APPLICATION_TO_LOWER_CASE_TABLE
static UChar BuiltInAsciiToLowerCaseTable[] = {
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
'\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
'\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
'\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
'\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
'\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
'\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
'\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
'\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
'\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
'\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
'\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
'\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
'\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
'\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
'\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
'\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
'\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
'\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
'\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
'\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
'\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
'\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',
};
#endif /* not USE_APPLICATION_TO_LOWER_CASE_TABLE */
unsigned short OnigEncAsciiCtypeTable[256] = {
0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
0x1004, 0x1106, 0x1104, 0x1104, 0x1104, 0x1104, 0x1004, 0x1004,
0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
0x1142, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58,
0x1c58, 0x1c58, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
0x10d0, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1a51,
0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51,
0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51,
0x1a51, 0x1a51, 0x1a51, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x18d0,
0x10d0, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1871,
0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871,
0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871,
0x1871, 0x1871, 0x1871, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x1004,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
};
extern void
onigenc_set_default_caseconv_table(UChar* table)
{
if (table == (UChar* )0) {
#ifndef USE_APPLICATION_TO_LOWER_CASE_TABLE
table = BuiltInAsciiToLowerCaseTable;
#else
return ;
#endif
}
if (table != OnigEncAsciiToLowerCaseTable) {
OnigEncAsciiToLowerCaseTable = table;
}
}
extern UChar*
onigenc_get_left_adjust_char_head(OnigEncoding enc, UChar* start, UChar* s)
{
return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);
}
extern int
onigenc_nothing_get_all_fold_match_code(OnigCodePoint** codes)
{
return 0;
}
extern int
onigenc_nothing_get_fold_match_info(UChar* p, UChar* end,
OnigEncFoldMatchInfo** info)
{
return -1;
}
extern int
onigenc_nothing_get_ctype_code_range(int ctype, int* nsb, int* nmb,
OnigCodePointRange* sbr[], OnigCodePointRange* mbr[])
{
return -1;
}
/* for single byte encodings */
extern int
onigenc_ascii_mbc_to_lower(UChar* p, UChar* lower)
{
*lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
return 1; /* return byte length of converted char to lower */
}
extern int
onigenc_ascii_mbc_is_case_ambig(UChar* p)
{
return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);
}
extern OnigCodePoint
onigenc_single_byte_mbc_to_code(UChar* p, UChar* end)
{
return (OnigCodePoint )(*p);
}
extern int
onigenc_single_byte_code_to_mbclen(OnigCodePoint code)
{
return 1;
}
extern int
onigenc_single_byte_code_to_mbc_first(OnigCodePoint code)
{
return (code & 0xff);
}
extern int
onigenc_single_byte_code_to_mbc(OnigCodePoint code, UChar *buf)
{
*buf = (code & 0xff);
return 1;
}
extern UChar*
onigenc_single_byte_left_adjust_char_head(UChar* start, UChar* s)
{
return s;
}
extern int
onigenc_single_byte_is_allowed_reverse_match(UChar* s, UChar* end)
{
return TRUE;
}
extern OnigCodePoint
onigenc_mbn_mbc_to_code(OnigEncoding enc, UChar* p, UChar* end)
{
int c, i, len;
OnigCodePoint n;
c = *p++;
len = enc_len(enc, c);
n = c;
if (len == 1) return n;
for (i = 1; i < len; i++) {
if (p >= end) break;
c = *p++;
n <<= 8; n += c;
}
return n;
}
extern int
onigenc_mbn_mbc_to_lower(OnigEncoding enc, UChar* p, UChar* lower)
{
int len;
if (ONIGENC_IS_MBC_ASCII(p)) {
*lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
return 1;
}
else {
len = enc_len(enc, *p);
if (lower != p) {
/* memcpy(lower, p, len); */
int i;
for (i = 0; i < len; i++) {
*lower++ = *p++;
}
}
return len; /* return byte length of converted to lower char */
}
}
extern int
onigenc_mbn_mbc_is_case_ambig(UChar* p)
{
if (ONIGENC_IS_MBC_ASCII(p))
return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);
return FALSE;
}
extern int
onigenc_mb2_code_to_mbclen(OnigCodePoint code)
{
if ((code & 0xff00) != 0) return 2;
else return 1;
}
extern int
onigenc_mb4_code_to_mbclen(OnigCodePoint code)
{
if ((code & 0xff000000) != 0) return 4;
else if ((code & 0xff0000) != 0) return 3;
else if ((code & 0xff00) != 0) return 2;
else return 1;
}
extern int
onigenc_mb2_code_to_mbc_first(OnigCodePoint code)
{
int first;
if ((code & 0xff00) != 0) {
first = (code >> 8) & 0xff;
}
else {
return (int )code;
}
return first;
}
extern int
onigenc_mb4_code_to_mbc_first(OnigCodePoint code)
{
int first;
if ((code & 0xff000000) != 0) {
first = (code >> 24) & 0xff;
}
else if ((code & 0xff0000) != 0) {
first = (code >> 16) & 0xff;
}
else if ((code & 0xff00) != 0) {
first = (code >> 8) & 0xff;
}
else {
return (int )code;
}
return first;
}
extern int
onigenc_mb2_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)
{
UChar *p = buf;
if ((code & 0xff00) != 0) {
*p++ = ((code >> 8) & 0xff);
}
*p++ = (code & 0xff);
#if 1
if (enc_len(enc, buf[0]) != (p - buf))
return ONIGERR_INVALID_WIDE_CHAR_VALUE;
#endif
return p - buf;
}
extern int
onigenc_mb4_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)
{
UChar *p = buf;
if ((code & 0xff000000) != 0) {
*p++ = ((code >> 24) & 0xff);
}
if ((code & 0xff0000) != 0) {
*p++ = ((code >> 16) & 0xff);
}
if ((code & 0xff00) != 0) {
*p++ = ((code >> 8) & 0xff);
}
*p++ = (code & 0xff);
#if 1
if (enc_len(enc, buf[0]) != (p - buf))
return ONIGERR_INVALID_WIDE_CHAR_VALUE;
#endif
return p - buf;
}
extern int
onigenc_mb2_code_is_ctype(OnigEncoding enc, OnigCodePoint code,
unsigned int ctype)
{
if ((ctype & ONIGENC_CTYPE_WORD) != 0) {
if (code < 128)
return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
else {
int first = onigenc_mb2_code_to_mbc_first(code);
return (enc_len(enc, first) > 1 ? TRUE : FALSE);
}
ctype &= ~ONIGENC_CTYPE_WORD;
if (ctype == 0) return FALSE;
}
if (code < 128)
return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
else
return FALSE;
}
extern int
onigenc_mb4_code_is_ctype(OnigEncoding enc, OnigCodePoint code,
unsigned int ctype)
{
if ((ctype & ONIGENC_CTYPE_WORD) != 0) {
if (code < 128)
return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
else {
int first = onigenc_mb4_code_to_mbc_first(code);
return (enc_len(enc, first) > 1 ? TRUE : FALSE);
}
ctype &= ~ONIGENC_CTYPE_WORD;
if (ctype == 0) return FALSE;
}
if (code < 128)
return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
else
return FALSE;
}
extern int
onigenc_get_all_fold_match_code_ss_0xdf(OnigCodePoint** codes)
{
static OnigCodePoint list[] = { 0xdf };
*codes = list;
return 1;
}
extern int
onigenc_get_fold_match_info_ss_0xdf(UChar* p, UChar* end,
OnigEncFoldMatchInfo** info)
{
/* German alphabet ess-tsett(U+00DF) */
static OnigEncFoldMatchInfo ss = {
3,
{ 1, 2, 2 },
{ "\337", "ss", "SS" } /* 0337: 0xdf */
};
if (p >= end) return -1;
if (*p == 0xdf) {
*info = &ss;
return 1;
}
else if (p + 1 < end) {
if ((*p == 'S' && *(p+1) == 'S') ||
(*p == 's' && *(p+1) == 's')) {
*info = &ss;
return 2;
}
}
return -1; /* is not a fold string. */
}
#else /* ONIG_RUBY_M17N */
extern int
onigenc_is_code_ctype(OnigEncoding enc, OnigCodePoint code, int ctype)
{
switch (ctype) {
case ONIGENC_CTYPE_ALPHA:
return m17n_isalpha(enc, code);
break;
case ONIGENC_CTYPE_BLANK:
return ONIGENC_IS_CODE_BLANK(enc, (int )(code));
break;
case ONIGENC_CTYPE_CNTRL:
return m17n_iscntrl(enc, code);
break;
case ONIGENC_CTYPE_DIGIT:
return m17n_isdigit(enc, code);
break;
case ONIGENC_CTYPE_GRAPH:
return ONIGENC_IS_CODE_GRAPH(enc, (int )(code));
break;
case ONIGENC_CTYPE_LOWER:
return m17n_islower(enc, code);
break;
case ONIGENC_CTYPE_PRINT:
return m17n_isprint(enc, code);
break;
case ONIGENC_CTYPE_PUNCT:
return m17n_ispunct(enc, code);
break;
case ONIGENC_CTYPE_SPACE:
return m17n_isspace(enc, code);
break;
case ONIGENC_CTYPE_UPPER:
return m17n_isupper(enc, code);
break;
case ONIGENC_CTYPE_XDIGIT:
return m17n_isxdigit(enc, code);
break;
case ONIGENC_CTYPE_WORD:
return m17n_iswchar(enc, code);
break;
case ONIGENC_CTYPE_ASCII:
return (code < 128 ? TRUE : FALSE);
break;
case ONIGENC_CTYPE_ALNUM:
return m17n_isalnum(enc, code);
break;
default:
break;
}
return 0;
}
extern int
onigenc_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)
{
int c, len;
m17n_mbcput(enc, code, buf);
c = m17n_firstbyte(enc, code);
len = enc_len(enc, c);
return len;
}
extern int
onigenc_mbc_to_lower(OnigEncoding enc, UChar* p, UChar* buf)
{
unsigned int c, low;
c = m17n_codepoint(enc, p, p + enc_len(enc, *p));
low = m17n_tolower(enc, c);
m17n_mbcput(enc, low, buf);
return m17n_codelen(enc, low);
}
extern int
onigenc_mbc_is_case_ambig(OnigEncoding enc, UChar* p)
{
unsigned int c = m17n_codepoint(enc, p, p + enc_len(enc, *p));
if (m17n_isupper(enc, c) || m17n_islower(enc, c))
return TRUE;
return FALSE;
}
extern UChar*
onigenc_get_left_adjust_char_head(OnigEncoding enc, UChar* start, UChar* s)
{
UChar *p;
int len;
if (s <= start) return s;
p = s;
while (!m17n_islead(enc, *p) && p > start) p--;
while (p + (len = enc_len(enc, *p)) < s) {
p += len;
}
if (p + len == s) return s;
return p;
}
extern int
onigenc_is_allowed_reverse_match(OnigEncoding enc, UChar* s, UChar* end)
{
return ONIGENC_IS_SINGLEBYTE(enc);
}
extern void
onigenc_set_default_caseconv_table(UChar* table) { }
#endif /* ONIG_RUBY_M17N */

View file

@ -0,0 +1,97 @@
/**********************************************************************
regenc.h - Oniguruma (regular expression library)
Copyright (C) 2003-2004 K.Kosako (kosako@sofnec.co.jp)
**********************************************************************/
#ifndef REGENC_H
#define REGENC_H
#ifndef ONIG_SOURCE_IS_WRAPPED
#include "config.h"
#endif
#include "oniguruma.h"
#ifndef NULL
#define NULL ((void* )0)
#endif
#ifndef TRUE
#define TRUE 1
#endif
#ifndef FALSE
#define FALSE 0
#endif
/* error codes */
/* internal error */
#define ONIGERR_MEMORY -5
#define ONIGERR_TYPE_BUG -6
/* syntax error [-400, -999] */
#define ONIGERR_INVALID_WIDE_CHAR_VALUE -400
#define ONIGERR_TOO_BIG_WIDE_CHAR_VALUE -401
#define ONIG_NEWLINE '\n'
#define ONIG_IS_NEWLINE(c) ((c) == ONIG_NEWLINE)
#define ONIG_IS_NULL(p) (((void*)(p)) == (void*)0)
#define ONIG_IS_NOT_NULL(p) (((void*)(p)) != (void*)0)
#define ONIG_CHECK_NULL_RETURN(p) if (ONIG_IS_NULL(p)) return NULL
#define ONIG_CHECK_NULL_RETURN_VAL(p,val) if (ONIG_IS_NULL(p)) return (val)
#ifdef ONIG_RUBY_M17N
#define ONIG_ENCODING_INIT_DEFAULT ONIG_ENCODING_UNDEF
#else /* ONIG_RUBY_M17N */
#define ONIG_ENCODING_INIT_DEFAULT ONIG_ENCODING_ASCII
/* for encoding system implementation (internal) */
ONIG_EXTERN int onigenc_nothing_get_all_fold_match_code P_((OnigCodePoint** codes));
ONIG_EXTERN int onigenc_nothing_get_fold_match_info P_((UChar* p, UChar* end, OnigEncFoldMatchInfo** info));
ONIG_EXTERN int onigenc_nothing_get_ctype_code_range P_((int ctype, int* nsb, int* nmb, OnigCodePointRange* sbr[], OnigCodePointRange* mbr[]));
/* methods for single byte encoding */
ONIG_EXTERN int onigenc_ascii_mbc_to_lower P_((UChar* p, UChar* lower));
ONIG_EXTERN int onigenc_ascii_mbc_is_case_ambig P_((UChar* p));
ONIG_EXTERN OnigCodePoint onigenc_single_byte_mbc_to_code P_((UChar* p, UChar* end));
ONIG_EXTERN int onigenc_single_byte_code_to_mbclen P_((OnigCodePoint code));
ONIG_EXTERN int onigenc_single_byte_code_to_mbc_first P_((OnigCodePoint code));
ONIG_EXTERN int onigenc_single_byte_code_to_mbc P_((OnigCodePoint code, UChar *buf));
ONIG_EXTERN UChar* onigenc_single_byte_left_adjust_char_head P_((UChar* start, UChar* s));
ONIG_EXTERN int onigenc_single_byte_is_allowed_reverse_match P_((UChar* s, UChar* end));
/* methods for multi byte encoding */
ONIG_EXTERN OnigCodePoint onigenc_mbn_mbc_to_code P_((OnigEncoding enc, UChar* p, UChar* end));
ONIG_EXTERN int onigenc_mbn_mbc_to_lower P_((OnigEncoding enc, UChar* p, UChar* lower));
ONIG_EXTERN int onigenc_mbn_mbc_is_case_ambig P_((UChar* p));
ONIG_EXTERN int onigenc_mb2_code_to_mbclen P_((OnigCodePoint code));
ONIG_EXTERN int onigenc_mb2_code_to_mbc_first P_((OnigCodePoint code));
ONIG_EXTERN int onigenc_mb2_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code, UChar *buf));
ONIG_EXTERN int onigenc_mb2_code_is_ctype P_((OnigEncoding enc, OnigCodePoint code, unsigned int ctype));
ONIG_EXTERN int onigenc_mb4_code_to_mbclen P_((OnigCodePoint code));
ONIG_EXTERN int onigenc_mb4_code_to_mbc_first P_((OnigCodePoint code));
ONIG_EXTERN int onigenc_mb4_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code, UChar *buf));
ONIG_EXTERN int onigenc_mb4_code_is_ctype P_((OnigEncoding enc, OnigCodePoint code, unsigned int ctype));
ONIG_EXTERN int onigenc_get_all_fold_match_code_ss_0xdf P_((OnigCodePoint** codes));
ONIG_EXTERN int onigenc_get_fold_match_info_ss_0xdf P_((UChar* p, UChar* end, OnigEncFoldMatchInfo** info));
#endif /* is not ONIG_RUBY_M17N */
ONIG_EXTERN OnigEncoding OnigEncDefaultCharEncoding;
ONIG_EXTERN UChar* OnigEncAsciiToLowerCaseTable;
ONIG_EXTERN unsigned short OnigEncAsciiCtypeTable[];
#define ONIGENC_ASCII_CODE_TO_LOWER_CASE(c) OnigEncAsciiToLowerCaseTable[c]
#define ONIGENC_IS_ASCII_CODE_CTYPE(code,ctype) \
((OnigEncAsciiCtypeTable[code] & ctype) != 0)
#define ONIGENC_IS_ASCII_CODE_CASE_AMBIG(code) \
ONIGENC_IS_ASCII_CODE_CTYPE(code, (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER))
#endif /* REGENC_H */

View file

@ -2,7 +2,7 @@
regerror.c - Oniguruma (regular expression library)
Copyright (C) 2002-2003 K.Kosako (kosako@sofnec.co.jp)
Copyright (C) 2002-2004 K.Kosako (kosako@sofnec.co.jp)
**********************************************************************/
#include "regint.h"
@ -17,118 +17,136 @@
#endif
extern char*
regex_error_code_to_format(int code)
onig_error_code_to_format(int code)
{
char *p;
if (code >= 0) return (char* )0;
switch (code) {
case REG_MISMATCH:
case ONIG_MISMATCH:
p = "mismatch"; break;
case REG_NO_SUPPORT_CONFIG:
case ONIG_NO_SUPPORT_CONFIG:
p = "no support in this configuration"; break;
case REGERR_MEMORY:
case ONIGERR_MEMORY:
p = "fail to memory allocation"; break;
case REGERR_MATCH_STACK_LIMIT_OVER:
case ONIGERR_MATCH_STACK_LIMIT_OVER:
p = "match-stack limit over"; break;
case REGERR_TYPE_BUG:
case ONIGERR_TYPE_BUG:
p = "undefined type (bug)"; break;
case REGERR_PARSER_BUG:
case ONIGERR_PARSER_BUG:
p = "internal parser error (bug)"; break;
case REGERR_STACK_BUG:
case ONIGERR_STACK_BUG:
p = "stack error (bug)"; break;
case REGERR_UNDEFINED_BYTECODE:
case ONIGERR_UNDEFINED_BYTECODE:
p = "undefined bytecode (bug)"; break;
case REGERR_UNEXPECTED_BYTECODE:
case ONIGERR_UNEXPECTED_BYTECODE:
p = "unexpected bytecode (bug)"; break;
case REGERR_DEFAULT_ENCODING_IS_NOT_SETTED:
case ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED:
p = "default multibyte-encoding is not setted"; break;
case REGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR:
case ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR:
p = "can't convert to wide-char on specified multibyte-encoding"; break;
case REGERR_END_PATTERN_AT_LEFT_BRACE:
case ONIGERR_INVALID_ARGUMENT:
p = "invalid argument"; break;
case ONIGERR_END_PATTERN_AT_LEFT_BRACE:
p = "end pattern at left brace"; break;
case REGERR_END_PATTERN_AT_LEFT_BRACKET:
case ONIGERR_END_PATTERN_AT_LEFT_BRACKET:
p = "end pattern at left bracket"; break;
case REGERR_EMPTY_CHAR_CLASS:
case ONIGERR_EMPTY_CHAR_CLASS:
p = "empty char-class"; break;
case REGERR_PREMATURE_END_OF_CHAR_CLASS:
case ONIGERR_PREMATURE_END_OF_CHAR_CLASS:
p = "premature end of char-class"; break;
case REGERR_END_PATTERN_AT_BACKSLASH:
case ONIGERR_END_PATTERN_AT_BACKSLASH:
p = "end pattern at backslash"; break;
case REGERR_END_PATTERN_AT_META:
case ONIGERR_END_PATTERN_AT_META:
p = "end pattern at meta"; break;
case REGERR_END_PATTERN_AT_CONTROL:
case ONIGERR_END_PATTERN_AT_CONTROL:
p = "end pattern at control"; break;
case REGERR_META_CODE_SYNTAX:
case ONIGERR_META_CODE_SYNTAX:
p = "illegal meta-code syntax"; break;
case REGERR_CONTROL_CODE_SYNTAX:
case ONIGERR_CONTROL_CODE_SYNTAX:
p = "illegal control-code syntax"; break;
case REGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE:
case ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE:
p = "char-class value at end of range"; break;
case REGERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE:
case ONIGERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE:
p = "char-class value at start of range"; break;
case REGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS:
case ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS:
p = "unmatched range specifier in char-class"; break;
case REGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED:
case ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED:
p = "target of repeat operator is not specified"; break;
case REGERR_TARGET_OF_REPEAT_OPERATOR_INVALID:
case ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID:
p = "target of repeat operator is invalid"; break;
case REGERR_NESTED_REPEAT_OPERATOR:
case ONIGERR_NESTED_REPEAT_OPERATOR:
p = "nested repeat operator"; break;
case REGERR_UNMATCHED_CLOSE_PARENTHESIS:
case ONIGERR_UNMATCHED_CLOSE_PARENTHESIS:
p = "unmatched close parenthesis"; break;
case REGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS:
case ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS:
p = "end pattern with unmatched parenthesis"; break;
case REGERR_END_PATTERN_IN_GROUP:
case ONIGERR_END_PATTERN_IN_GROUP:
p = "end pattern in group"; break;
case REGERR_UNDEFINED_GROUP_OPTION:
case ONIGERR_UNDEFINED_GROUP_OPTION:
p = "undefined group option"; break;
case REGERR_INVALID_POSIX_BRACKET_TYPE:
case ONIGERR_INVALID_POSIX_BRACKET_TYPE:
p = "invalid POSIX bracket type"; break;
case REGERR_INVALID_LOOK_BEHIND_PATTERN:
case ONIGERR_INVALID_LOOK_BEHIND_PATTERN:
p = "invalid pattern in look-behind"; break;
case REGERR_INVALID_REPEAT_RANGE_PATTERN:
case ONIGERR_INVALID_REPEAT_RANGE_PATTERN:
p = "invalid repeat range {lower,upper}"; break;
case REGERR_TOO_BIG_NUMBER:
case ONIGERR_TOO_BIG_NUMBER:
p = "too big number"; break;
case REGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE:
case ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE:
p = "too big number for repeat range"; break;
case REGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE:
case ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE:
p = "upper is smaller than lower in repeat range"; break;
case REGERR_EMPTY_RANGE_IN_CHAR_CLASS:
case ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS:
p = "empty range in char class"; break;
case REGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE:
case ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE:
p = "mismatch multibyte code length in char-class range"; break;
case REGERR_TOO_MANY_MULTI_BYTE_RANGES:
case ONIGERR_TOO_MANY_MULTI_BYTE_RANGES:
p = "too many multibyte code ranges are specified"; break;
case REGERR_TOO_SHORT_MULTI_BYTE_STRING:
case ONIGERR_TOO_SHORT_MULTI_BYTE_STRING:
p = "too short multibyte code string"; break;
case REGERR_TOO_BIG_BACKREF_NUMBER:
case ONIGERR_TOO_BIG_BACKREF_NUMBER:
p = "too big backref number"; break;
case REGERR_INVALID_BACKREF:
#ifdef USE_NAMED_SUBEXP
case ONIGERR_INVALID_BACKREF:
#ifdef USE_NAMED_GROUP
p = "invalid backref number/name"; break;
#else
p = "invalid backref number"; break;
#endif
case REGERR_TOO_BIG_WIDE_CHAR_VALUE:
case ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED:
p = "numbered backref/call is not allowed. (use name)"; break;
case ONIGERR_TOO_BIG_WIDE_CHAR_VALUE:
p = "too big wide-char value"; break;
case REGERR_TOO_LONG_WIDE_CHAR_VALUE:
case ONIGERR_TOO_LONG_WIDE_CHAR_VALUE:
p = "too long wide-char value"; break;
case REGERR_INVALID_WIDE_CHAR_VALUE:
case ONIGERR_INVALID_WIDE_CHAR_VALUE:
p = "invalid wide-char value"; break;
case REGERR_INVALID_SUBEXP_NAME:
p = "invalid subexp name"; break;
case REGERR_UNDEFINED_NAME_REFERENCE:
case ONIGERR_EMPTY_GROUP_NAME:
p = "group name is empty"; break;
case ONIGERR_INVALID_GROUP_NAME:
p = "invalid group name <%n>"; break;
case ONIGERR_INVALID_CHAR_IN_GROUP_NAME:
#ifdef USE_NAMED_GROUP
p = "invalid char in group name <%n>"; break;
#else
p = "invalid char in group number <%n>"; break;
#endif
case ONIGERR_UNDEFINED_NAME_REFERENCE:
p = "undefined name <%n> reference"; break;
case REGERR_UNDEFINED_GROUP_REFERENCE:
p = "undefined group reference"; break;
case REGERR_MULTIPLEX_DEFINITION_NAME_CALL:
case ONIGERR_UNDEFINED_GROUP_REFERENCE:
p = "undefined group <%n> reference"; break;
case ONIGERR_MULTIPLEX_DEFINED_NAME:
p = "multiplex defined name <%n>"; break;
case ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL:
p = "multiplex definition name <%n> call"; break;
case REGERR_NEVER_ENDING_RECURSION:
case ONIGERR_NEVER_ENDING_RECURSION:
p = "never ending recursion"; break;
case REGERR_OVER_THREAD_PASS_LIMIT_COUNT:
case ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY:
p = "group number is too big for capture history"; break;
case ONIGERR_INVALID_CHAR_PROPERTY_NAME:
p = "invalid character property name"; break;
case ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT:
p = "over thread pass limit count"; break;
default:
@ -139,31 +157,36 @@ regex_error_code_to_format(int code)
}
/* for REG_MAX_ERROR_MESSAGE_LEN */
/* for ONIG_MAX_ERROR_MESSAGE_LEN */
#define MAX_ERROR_PAR_LEN 30
extern int
#ifdef HAVE_STDARG_PROTOTYPES
regex_error_code_to_str(UChar* s, int code, ...)
onig_error_code_to_str(UChar* s, int code, ...)
#else
regex_error_code_to_str(UChar* s, code, va_alist)
onig_error_code_to_str(s, code, va_alist)
UChar* s;
int code;
va_dcl
#endif
{
UChar *p, *q;
RegErrorInfo* einfo;
OnigErrorInfo* einfo;
int len;
va_list vargs;
va_init_list(vargs, code);
switch (code) {
case REGERR_UNDEFINED_NAME_REFERENCE:
case REGERR_MULTIPLEX_DEFINITION_NAME_CALL:
einfo = va_arg(vargs, RegErrorInfo*);
case ONIGERR_UNDEFINED_NAME_REFERENCE:
case ONIGERR_UNDEFINED_GROUP_REFERENCE:
case ONIGERR_MULTIPLEX_DEFINED_NAME:
case ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL:
case ONIGERR_INVALID_GROUP_NAME:
case ONIGERR_INVALID_CHAR_IN_GROUP_NAME:
einfo = va_arg(vargs, OnigErrorInfo*);
len = einfo->par_end - einfo->par;
q = regex_error_code_to_format(code);
q = onig_error_code_to_format(code);
p = s;
while (*q != '\0') {
if (*q == '%') {
@ -194,7 +217,7 @@ regex_error_code_to_str(UChar* s, code, va_alist)
break;
default:
q = regex_error_code_to_format(code);
q = onig_error_code_to_format(code);
len = strlen(q);
xmemcpy(s, q, len);
s[len] = '\0';
@ -208,13 +231,13 @@ regex_error_code_to_str(UChar* s, code, va_alist)
void
#ifdef HAVE_STDARG_PROTOTYPES
regex_snprintf_with_pattern(char buf[], int bufsize, RegCharEncoding enc,
onig_snprintf_with_pattern(char buf[], int bufsize, OnigEncoding enc,
char* pat, char* pat_end, char *fmt, ...)
#else
regex_snprintf_with_pattern(buf, bufsize, enc, pat, pat_end, fmt, va_alist)
onig_snprintf_with_pattern(buf, bufsize, enc, pat, pat_end, fmt, va_alist)
char buf[];
int bufsize;
RegCharEncoding enc;
OnigEncoding enc;
char* pat;
char* pat_end;
const char *fmt;
@ -222,7 +245,7 @@ regex_snprintf_with_pattern(buf, bufsize, enc, pat, pat_end, fmt, va_alist)
#endif
{
int n, need, len;
char *p, *s;
UChar *p, *s;
va_list args;
va_init_list(args, fmt);
@ -236,21 +259,22 @@ regex_snprintf_with_pattern(buf, bufsize, enc, pat, pat_end, fmt, va_alist)
s = buf + strlen(buf);
p = pat;
while (p < pat_end) {
if (*p == '\\') {
while (p < (UChar* )pat_end) {
if (*p == MC_ESC) {
*s++ = *p++;
len = mblen(enc, *p);
len = enc_len(enc, *p);
while (len-- > 0) *s++ = *p++;
}
else if (*p == '/') {
*s++ = '\\';
*s++ = MC_ESC;
*s++ = *p++;
}
else if (ismb(enc, *p)) {
len = mblen(enc, *p);
else if (ONIGENC_IS_MBC_HEAD(enc, *p)) {
len = enc_len(enc, *p);
while (len-- > 0) *s++ = *p++;
}
else if (!IS_PRINT(*p) && !IS_SPACE(*p)) {
else if (!ONIGENC_IS_CODE_PRINT(enc, *p) &&
!ONIGENC_IS_CODE_SPACE(enc, *p)) {
char b[5];
sprintf(b, "\\%03o", *p & 0377);
len = strlen(b);

View file

@ -2,15 +2,27 @@
regex.c - Oniguruma (regular expression library)
Copyright (C) 2002-2003 K.Kosako (kosako@sofnec.co.jp)
Copyright (C) 2002-2004 K.Kosako (kosako@sofnec.co.jp)
**********************************************************************/
/*
* Source wrapper for Ruby.
*/
#define ONIG_SOURCE_IS_WRAPPED
#include "regint.h"
#include "regex.h"
#include "regparse.c"
#include "regcomp.c"
#include "regexec.c"
#include "regenc.c"
#include "reggnu.c"
#include "regerror.c"
#ifndef ONIG_RUBY_M17N
#include "enc/ascii.c"
#include "enc/utf8.c"
#include "enc/euc_jp.c"
#include "enc/sjis.c"
#endif

File diff suppressed because it is too large Load diff

View file

@ -2,13 +2,17 @@
reggnu.c - Oniguruma (regular expression library)
Copyright (C) 2002-2003 K.Kosako (kosako@sofnec.co.jp)
Copyright (C) 2002-2004 K.Kosako (kosako@sofnec.co.jp)
**********************************************************************/
#include "regint.h"
#ifndef ONIGGNU_H /* name changes from oniggnu.h to regex.h in ruby. */
#include "oniggnu.h"
#endif
#if defined(RUBY_PLATFORM) || defined(RUBY)
#ifndef REG_RUBY_M17N
#ifndef ONIG_RUBY_M17N
#define USE_COMPATIBILITY_FOR_RUBY_EXTENSION_LIBRARY
#endif
#endif
@ -18,25 +22,25 @@
#endif
extern void
re_free_registers(RegRegion* r)
re_free_registers(OnigRegion* r)
{
/* 0: don't free self */
regex_region_free(r, 0);
onig_region_free(r, 0);
}
extern int
re_adjust_startpos(regex_t* reg, const char* string, int size,
int startpos, int range)
{
if (startpos > 0 && mbmaxlen(reg->enc) != 1 && startpos < size) {
if (startpos > 0 && ONIGENC_MBC_MAXLEN(reg->enc) != 1 && startpos < size) {
UChar *p;
UChar *s = (UChar* )string + startpos;
if (range > 0) {
p = regex_get_right_adjust_char_head(reg->enc, (UChar* )string, s);
p = onigenc_get_right_adjust_char_head(reg->enc, (UChar* )string, s);
}
else {
p = regex_get_left_adjust_char_head(reg->enc, (UChar* )string, s);
p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, (UChar* )string, s);
}
return p - (UChar* )string;
}
@ -48,29 +52,30 @@ extern int
re_match(regex_t* reg, const char* str, int size, int pos,
struct re_registers* regs)
{
return regex_match(reg, (UChar* )str, (UChar* )(str + size),
(UChar* )(str + pos), regs, REG_OPTION_NONE);
return onig_match(reg, (UChar* )str, (UChar* )(str + size),
(UChar* )(str + pos), regs, ONIG_OPTION_NONE);
}
extern int
re_search(regex_t* bufp, const char* string, int size, int startpos, int range,
struct re_registers* regs)
{
return regex_search(bufp, (UChar* )string, (UChar* )(string + size),
(UChar* )(string + startpos),
(UChar* )(string + startpos + range), regs, REG_OPTION_NONE);
return onig_search(bufp, (UChar* )string, (UChar* )(string + size),
(UChar* )(string + startpos),
(UChar* )(string + startpos + range),
regs, ONIG_OPTION_NONE);
}
extern int
re_compile_pattern(const char* pattern, int size, regex_t* reg, char* ebuf)
{
int r;
RegErrorInfo einfo;
OnigErrorInfo einfo;
r = regex_compile(reg, (UChar* )pattern, (UChar* )(pattern + size), &einfo);
r = onig_compile(reg, (UChar* )pattern, (UChar* )(pattern + size), &einfo);
if (r != 0) {
if (IS_NOT_NULL(ebuf))
(void )regex_error_code_to_str((UChar* )ebuf, r, &einfo);
(void )onig_error_code_to_str((UChar* )ebuf, r, &einfo);
}
return r;
@ -80,19 +85,19 @@ extern int
re_recompile_pattern(const char* pattern, int size, regex_t* reg, char* ebuf)
{
int r;
RegErrorInfo einfo;
RegCharEncoding enc;
OnigErrorInfo einfo;
OnigEncoding enc;
/* I think encoding and options should be arguments of this function.
But this is adapted to present re.c. (2002/11/29)
*/
enc = RegDefaultCharEncoding;
enc = OnigEncDefaultCharEncoding;
r = regex_recompile(reg, (UChar* )pattern, (UChar* )(pattern + size),
reg->options, enc, RegDefaultSyntax, &einfo);
r = onig_recompile(reg, (UChar* )pattern, (UChar* )(pattern + size),
reg->options, enc, OnigDefaultSyntax, &einfo);
if (r != 0) {
if (IS_NOT_NULL(ebuf))
(void )regex_error_code_to_str((UChar* )ebuf, r, &einfo);
(void )onig_error_code_to_str((UChar* )ebuf, r, &einfo);
}
return r;
}
@ -100,23 +105,20 @@ re_recompile_pattern(const char* pattern, int size, regex_t* reg, char* ebuf)
extern void
re_free_pattern(regex_t* reg)
{
regex_free(reg);
onig_free(reg);
}
extern int
re_alloc_pattern(regex_t** reg)
{
if (RegDefaultCharEncoding == REGCODE_UNDEF)
return REGERR_DEFAULT_ENCODING_IS_NOT_SETTED;
return regex_alloc_init(reg, REG_OPTION_DEFAULT, RegDefaultCharEncoding,
RegDefaultSyntax);
return onig_alloc_init(reg, ONIG_OPTION_DEFAULT, OnigEncDefaultCharEncoding,
OnigDefaultSyntax);
}
extern void
re_set_casetable(const char* table)
{
regex_set_default_trans_table((UChar* )table);
onigenc_set_default_caseconv_table((UChar* )table);
}
#ifdef USE_COMPATIBILITY_FOR_RUBY_EXTENSION_LIBRARY
@ -167,7 +169,7 @@ static const unsigned char mbctab_sjis[] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@ -200,16 +202,39 @@ const unsigned char *re_mbctab = mbctab_ascii;
#endif
extern void
#ifdef REG_RUBY_M17N
re_mbcinit(RegCharEncoding enc)
#ifdef ONIG_RUBY_M17N
re_mbcinit(OnigEncoding enc)
#else
re_mbcinit(int mb_code)
#endif
{
#ifdef REG_RUBY_M17N
RegDefaultCharEncoding = enc;
#ifdef ONIG_RUBY_M17N
onigenc_set_default_encoding(enc);
#else
RegDefaultCharEncoding = REG_MBLEN_TABLE[mb_code];
OnigEncoding enc;
switch (mb_code) {
case MBCTYPE_ASCII:
enc = ONIG_ENCODING_ASCII;
break;
case MBCTYPE_EUC:
enc = ONIG_ENCODING_EUC_JP;
break;
case MBCTYPE_SJIS:
enc = ONIG_ENCODING_SJIS;
break;
case MBCTYPE_UTF8:
enc = ONIG_ENCODING_UTF8;
break;
default:
return ;
break;
}
onigenc_set_default_encoding(enc);
#endif
#ifdef USE_COMPATIBILITY_FOR_RUBY_EXTENSION_LIBRARY

View file

@ -2,56 +2,62 @@
regint.h - Oniguruma (regular expression library)
Copyright (C) 2002-2003 K.Kosako (kosako@sofnec.co.jp)
Copyright (C) 2002-2004 K.Kosako (kosako@sofnec.co.jp)
**********************************************************************/
#ifndef REGINT_H
#define REGINT_H
/* for debug */
/* #define REG_DEBUG_PARSE_TREE */
/* #define REG_DEBUG_COMPILE */
/* #define REG_DEBUG_SEARCH */
/* #define REG_DEBUG_MATCH */
/* #define REG_DONT_OPTIMIZE */
/* #define ONIG_DEBUG_PARSE_TREE */
/* #define ONIG_DEBUG_COMPILE */
/* #define ONIG_DEBUG_SEARCH */
/* #define ONIG_DEBUG_MATCH */
/* #define ONIG_DONT_OPTIMIZE */
/* for byte-code statistical data. */
/* #define REG_DEBUG_STATISTICS */
/* #define ONIG_DEBUG_STATISTICS */
#if defined(REG_DEBUG_PARSE_TREE) || defined(REG_DEBUG_MATCH) || \
defined(REG_DEBUG_COMPILE) || defined(REG_DEBUG_STATISTICS)
#ifndef REG_DEBUG
#define REG_DEBUG
#if defined(ONIG_DEBUG_PARSE_TREE) || defined(ONIG_DEBUG_MATCH) || \
defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_STATISTICS)
#ifndef ONIG_DEBUG
#define ONIG_DEBUG
#endif
#endif
#if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
(defined(__ppc__) && defined(__APPLE__)) || \
defined(__x86_64) || defined(__x86_64__) || \
defined(__mc68020__)
#define UNALIGNED_WORD_ACCESS
#define PLATFORM_UNALIGNED_WORD_ACCESS
#endif
/* config */
#define USE_NAMED_SUBEXP
/* spec. config */
#define USE_NAMED_GROUP
#define USE_SUBEXP_CALL
#define USE_FOLD_MATCH /* ess-tsett etc... */
#define USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK /* /(?:()|())*\2/ */
#define USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE /* /\n$/ =~ "\n" */
#define USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR
/* internal config */
#define USE_RECYCLE_NODE
#define USE_OP_PUSH_OR_JUMP_EXACT
#define USE_QUALIFIER_PEEK_NEXT
#define USE_RECYCLE_NODE
#define USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE /* /\n$/ =~ "\n" */
/* #define USE_SBMB_CLASS */
#define INIT_MATCH_STACK_SIZE 160
#define MATCH_STACK_LIMIT_SIZE 200000
#define MATCH_STACK_LIMIT_SIZE 500000
/* interface to external system */
#ifdef NOT_RUBY /* gived from Makefile */
#include "config.h"
#define USE_VARIABLE_META_CHARS
#define USE_VARIABLE_SYNTAX
#define USE_WORD_BEGIN_END /* "\<": word-begin, "\>": word-end */
#define DEFAULT_TRANSTABLE_EXIST 1
#define THREAD_ATOMIC_START /* depend on thread system */
#define THREAD_ATOMIC_END /* depend on thread system */
#define THREAD_PASS /* depend on thread system */
#define USE_POSIX_REGION_OPTION /* needed for POSIX API support */
#define THREAD_ATOMIC_START /* depend on thread system */
#define THREAD_ATOMIC_END /* depend on thread system */
#define THREAD_PASS /* depend on thread system */
#define xmalloc malloc
#define xrealloc realloc
#define xfree free
@ -59,12 +65,11 @@
#include "ruby.h"
#include "version.h"
#include "rubysig.h" /* for DEFER_INTS, ENABLE_INTS */
#define USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR
#define THREAD_ATOMIC_START DEFER_INTS
#define THREAD_ATOMIC_END ENABLE_INTS
#define THREAD_PASS /* I want to use rb_thread_pass() */
#define WARNING rb_warn
#define VERB_WARNING rb_warning
#define THREAD_ATOMIC_START DEFER_INTS
#define THREAD_ATOMIC_END ENABLE_INTS
#define THREAD_PASS rb_thread_schedule()
#define DEFAULT_WARN_FUNCTION rb_warn
#define DEFAULT_VERB_WARN_FUNCTION rb_warning
#if defined(RUBY_VERSION_MAJOR)
#if RUBY_VERSION_MAJOR > 1 || \
@ -74,6 +79,8 @@
#endif
#endif
#define ONIG_RUBY_DEFINE_GLOBAL_FUNCTION(s,f,n) \
rb_define_global_function(s, f, n)
#endif /* else NOT_RUBY */
#define THREAD_PASS_LIMIT_COUNT 10
@ -82,7 +89,9 @@
#define xmemmove memmove
#if defined(_WIN32) && !defined(__CYGWIN__)
#define xalloca _alloca
#ifdef NOT_RUBY
#define vsnprintf _vsnprintf
#endif
#else
#define xalloca alloca
#endif
@ -104,15 +113,12 @@
#include <ctype.h>
#include <sys/types.h>
#ifdef REG_DEBUG
#ifdef ONIG_DEBUG
# include <stdio.h>
#endif
#ifdef NOT_RUBY
# include "oniguruma.h"
#else
# include "regex.h"
#endif
#include "regenc.h"
#include "oniguruma.h"
#ifdef MIN
#undef MIN
@ -123,17 +129,24 @@
#define MIN(a,b) (((a)>(b))?(b):(a))
#define MAX(a,b) (((a)<(b))?(b):(a))
#ifndef UNALIGNED_WORD_ACCESS
#define IS_NULL(p) (((void*)(p)) == (void*)0)
#define IS_NOT_NULL(p) (((void*)(p)) != (void*)0)
#define CHECK_NULL_RETURN(p) if (IS_NULL(p)) return NULL
#define CHECK_NULL_RETURN_VAL(p,val) if (IS_NULL(p)) return (val)
#define NULL_UCHARP ((UChar* )0)
#ifndef PLATFORM_UNALIGNED_WORD_ACCESS
#define WORD_ALIGNMENT_SIZE SIZEOF_INT
#define GET_ALIGNMENT_PAD_SIZE(addr,pad_size) do {\
(pad_size) = WORD_ALIGNMENT_SIZE - ((int )(addr) % WORD_ALIGNMENT_SIZE);\
(pad_size) = WORD_ALIGNMENT_SIZE \
- ((unsigned int )(addr) % WORD_ALIGNMENT_SIZE);\
if ((pad_size) == WORD_ALIGNMENT_SIZE) (pad_size) = 0;\
} while (0)
#define ALIGNMENT_RIGHT(addr) do {\
(addr) += (WORD_ALIGNMENT_SIZE - 1);\
(addr) -= ((int )(addr) % WORD_ALIGNMENT_SIZE);\
(addr) -= ((unsigned int )(addr) % WORD_ALIGNMENT_SIZE);\
} while (0)
@ -216,7 +229,7 @@
#define SERIALIZE_UINT(i,p) SERIALIZE_8BYTE_INT(i,p)
#endif
#endif /* UNALIGNED_WORD_ACCESS */
#endif /* PLATFORM_UNALIGNED_WORD_ACCESS */
/* stack pop level */
#define STACK_POP_LEVEL_FREE 0
@ -224,12 +237,12 @@
#define STACK_POP_LEVEL_ALL 2
/* optimize flags */
#define REG_OPTIMIZE_NONE 0
#define REG_OPTIMIZE_EXACT 1 /* Slow Search */
#define REG_OPTIMIZE_EXACT_BM 2 /* Boyer Moore Search */
#define REG_OPTIMIZE_EXACT_BM_NOT_REV 3 /* BM (but not simple match) */
#define REG_OPTIMIZE_EXACT_IC 4 /* Slow Search (ignore case) */
#define REG_OPTIMIZE_MAP 5 /* char map */
#define ONIG_OPTIMIZE_NONE 0
#define ONIG_OPTIMIZE_EXACT 1 /* Slow Search */
#define ONIG_OPTIMIZE_EXACT_BM 2 /* Boyer Moore Search */
#define ONIG_OPTIMIZE_EXACT_BM_NOT_REV 3 /* BM (but not simple match) */
#define ONIG_OPTIMIZE_EXACT_IC 4 /* Slow Search (ignore case) */
#define ONIG_OPTIMIZE_MAP 5 /* char map */
/* bit status */
typedef unsigned int BitStatusType;
@ -255,71 +268,32 @@ typedef unsigned int BitStatusType;
#define INT_MAX_LIMIT ((1UL << (SIZEOF_INT * 8 - 1)) - 1)
typedef unsigned int WCINT;
#define DIGITVAL(code) ((code) - '0')
#define ODIGITVAL(code) DIGITVAL(code)
#define XDIGITVAL(enc,code) \
(ONIGENC_IS_CODE_DIGIT(enc,code) ? DIGITVAL(code) \
: (ONIGENC_IS_CODE_UPPER(enc,code) ? (code) - 'A' + 10 : (code) - 'a' + 10))
#define SIZE_WCINT sizeof(WCINT)
#define GET_WCINT(wc,p) (wc) = *((WCINT* )(p))
#define INFINITE_DISTANCE ~((RegDistance )0)
#if defined STDC_HEADERS || (!defined isascii && !defined HAVE_ISASCII)
# define IS_ASCII(c) 1
#else
# define IS_ASCII(c) isascii(c)
#endif
#ifdef isblank
# define IS_BLANK(c) (IS_ASCII(c) && isblank(c))
#else
# define IS_BLANK(c) ((c) == ' ' || (c) == '\t')
#endif
#ifdef isgraph
# define IS_GRAPH(c) (IS_ASCII(c) && isgraph(c))
#else
# define IS_GRAPH(c) (IS_ASCII(c) && isprint(c) && !isspace(c))
#endif
#define IS_PRINT(c) (isprint(c) && IS_ASCII(c))
#define IS_ALNUM(c) (isalnum(c) && IS_ASCII(c))
#define IS_ALPHA(c) (isalpha(c) && IS_ASCII(c))
#define IS_LOWER(c) (islower(c) && IS_ASCII(c))
#define IS_UPPER(c) (isupper(c) && IS_ASCII(c))
#define IS_CNTRL(c) (iscntrl(c) && IS_ASCII(c))
#define IS_PUNCT(c) (ispunct(c) && IS_ASCII(c))
#define IS_SPACE(c) (isspace(c) && IS_ASCII(c))
#define IS_DIGIT(c) (isdigit(c) && IS_ASCII(c))
#define IS_XDIGIT(c) (isxdigit(c) && IS_ASCII(c))
#define IS_ODIGIT(c) (IS_DIGIT(c) && (c) < '8')
#define DIGITVAL(c) ((c) - '0')
#define ODIGITVAL(c) DIGITVAL(c)
#define XDIGITVAL(c) \
(IS_DIGIT(c) ? DIGITVAL(c) : (IS_UPPER(c) ? (c) - 'A' + 10 : (c) - 'a' + 10))
#define IS_SINGLELINE(option) ((option) & REG_OPTION_SINGLELINE)
#define IS_MULTILINE(option) ((option) & REG_OPTION_MULTILINE)
#define IS_IGNORECASE(option) ((option) & REG_OPTION_IGNORECASE)
#define IS_EXTEND(option) ((option) & REG_OPTION_EXTEND)
#define IS_FIND_LONGEST(option) ((option) & REG_OPTION_FIND_LONGEST)
#define IS_FIND_NOT_EMPTY(option) ((option) & REG_OPTION_FIND_NOT_EMPTY)
#define IS_SINGLELINE(option) ((option) & ONIG_OPTION_SINGLELINE)
#define IS_MULTILINE(option) ((option) & ONIG_OPTION_MULTILINE)
#define IS_IGNORECASE(option) ((option) & ONIG_OPTION_IGNORECASE)
#define IS_EXTEND(option) ((option) & ONIG_OPTION_EXTEND)
#define IS_FIND_LONGEST(option) ((option) & ONIG_OPTION_FIND_LONGEST)
#define IS_FIND_NOT_EMPTY(option) ((option) & ONIG_OPTION_FIND_NOT_EMPTY)
#define IS_POSIXLINE(option) (IS_SINGLELINE(option) && IS_MULTILINE(option))
#define IS_FIND_CONDITION(option) ((option) & \
(REG_OPTION_FIND_LONGEST | REG_OPTION_FIND_NOT_EMPTY))
#define IS_NOTBOL(option) ((option) & REG_OPTION_NOTBOL)
#define IS_NOTEOL(option) ((option) & REG_OPTION_NOTEOL)
#define IS_POSIX_REGION(option) ((option) & REG_OPTION_POSIX_REGION)
(ONIG_OPTION_FIND_LONGEST | ONIG_OPTION_FIND_NOT_EMPTY))
#define IS_NOTBOL(option) ((option) & ONIG_OPTION_NOTBOL)
#define IS_NOTEOL(option) ((option) & ONIG_OPTION_NOTEOL)
#define IS_POSIX_REGION(option) ((option) & ONIG_OPTION_POSIX_REGION)
#ifdef NEWLINE
#undef NEWLINE
#endif
#define NEWLINE '\n'
#define IS_NULL(p) (((void*)(p)) == (void*)0)
#define IS_NOT_NULL(p) (((void*)(p)) != (void*)0)
#define IS_NEWLINE(c) ((c) == NEWLINE)
#define CHECK_NULL_RETURN(p) if (IS_NULL(p)) return NULL
#define CHECK_NULL_RETURN_VAL(p,val) if (IS_NULL(p)) return (val)
/* OP_SET_OPTION is required for these options.
#define IS_DYNAMIC_OPTION(option) \
(((option) & (ONIG_OPTION_MULTILINE | ONIG_OPTION_IGNORECASE)) != 0)
*/
/* ignore-case and multibyte status are included in compiled code. */
#define IS_DYNAMIC_OPTION(option) 0
#define NULL_UCHARP ((UChar* )0)
/* bitset */
#define BITS_PER_BYTE 8
@ -327,7 +301,7 @@ typedef unsigned int WCINT;
#define BITS_IN_ROOM (sizeof(Bits) * BITS_PER_BYTE)
#define BITSET_SIZE (SINGLE_BYTE_SIZE / BITS_IN_ROOM)
#ifdef UNALIGNED_WORD_ACCESS
#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
typedef unsigned int Bits;
#else
typedef unsigned char Bits;
@ -357,18 +331,18 @@ typedef struct _BBuf {
unsigned int alloc;
} BBuf;
#define BBUF_INIT(buf,size) regex_bbuf_init((BBuf* )(buf), (size))
#define BBUF_INIT(buf,size) onig_bbuf_init((BBuf* )(buf), (size))
#define BBUF_SIZE_INC(buf,inc) do{\
(buf)->alloc += (inc);\
(buf)->p = (UChar* )xrealloc((buf)->p, (buf)->alloc);\
if (IS_NULL((buf)->p)) return(REGERR_MEMORY);\
if (IS_NULL((buf)->p)) return(ONIGERR_MEMORY);\
} while (0)
#define BBUF_EXPAND(buf,low) do{\
do { (buf)->alloc *= 2; } while ((buf)->alloc < low);\
(buf)->p = (UChar* )xrealloc((buf)->p, (buf)->alloc);\
if (IS_NULL((buf)->p)) return(REGERR_MEMORY);\
if (IS_NULL((buf)->p)) return(ONIGERR_MEMORY);\
} while (0)
#define BBUF_ENSURE_SIZE(buf,size) do{\
@ -376,7 +350,7 @@ typedef struct _BBuf {
while (new_alloc < (size)) { new_alloc *= 2; }\
if ((buf)->alloc != new_alloc) {\
(buf)->p = (UChar* )xrealloc((buf)->p, new_alloc);\
if (IS_NULL((buf)->p)) return(REGERR_MEMORY);\
if (IS_NULL((buf)->p)) return(ONIGERR_MEMORY);\
(buf)->alloc = new_alloc;\
}\
} while (0)
@ -430,112 +404,6 @@ typedef struct _BBuf {
#define BBUF_GET_BYTE(buf, pos) (buf)->p[(pos)]
extern UChar* DefaultTransTable;
#define TOLOWER(enc,c) (DefaultTransTable[c])
/* methods for support multi-byte code, */
#define ismb(code,c) (mblen((code),(c)) != 1)
#define MB2WC(p,end,code) mb2wc((p),(end),(code))
#define MBBACK(code,start,s,n) step_backward_char((code),(start),(s),(n))
#ifdef REG_RUBY_M17N
#define MB2WC_AVAILABLE(enc) 1
#define WC2MB_FIRST(enc, wc) m17n_firstbyte((enc),(wc))
#define mbmaxlen(enc) m17n_mbmaxlen(enc)
#define mblen(enc,c) m17n_mbclen(enc,c)
#define mbmaxlen_dist(enc) \
(mbmaxlen(enc) > 0 ? mbmaxlen(enc) : INFINITE_DISTANCE)
#define IS_SINGLEBYTE_CODE(enc) (m17n_mbmaxlen(enc) == 1)
/* #define IS_INDEPENDENT_TRAIL(enc) m17n_independent_trail(enc) */
#define IS_INDEPENDENT_TRAIL(enc) IS_SINGLEBYTE_CODE(enc)
#define IS_CODE_ASCII(enc,c) IS_ASCII(c)
#define IS_CODE_GRAPH(enc,c) IS_GRAPH(c)
#define IS_CODE_PRINT(enc,c) m17n_isprint(enc,c)
#define IS_CODE_ALNUM(enc,c) m17n_isalnum(enc,c)
#define IS_CODE_ALPHA(enc,c) m17n_isalpha(enc,c)
#define IS_CODE_LOWER(enc,c) m17n_islower(enc,c)
#define IS_CODE_UPPER(enc,c) m17n_isupper(enc,c)
#define IS_CODE_CNTRL(enc,c) m17n_iscntrl(enc,c)
#define IS_CODE_PUNCT(enc,c) m17n_ispunct(enc,c)
#define IS_CODE_SPACE(enc,c) m17n_isspace(enc,c)
#define IS_CODE_BLANK(enc,c) IS_BLANK(c)
#define IS_CODE_DIGIT(enc,c) m17n_isdigit(enc,c)
#define IS_CODE_XDIGIT(enc,c) m17n_isxdigit(enc,c)
#define IS_CODE_WORD(enc,c) m17n_iswchar(enc,c)
#define ISNOT_CODE_WORD(enc,c) (!m17n_iswchar(enc,c))
#define IS_WORD_STR(code,s,end) \
(ismb((code),*(s)) ? (s + mblen((code),*(s)) <= (end)) : \
m17n_iswchar(code,*(s)))
#define IS_WORD_STR_INC(code,s,end) \
(ismb((code),*(s)) ? ((s) += mblen((code),*(s)), (s) <= (end)) : \
(s++, m17n_iswchar(code,s[-1])))
#define IS_WORD_HEAD(enc,c) (ismb(enc,c) ? 1 : IS_CODE_WORD(enc,c))
#define IS_SB_WORD(code,c) (mblen(code,c) == 1 && IS_CODE_WORD(code,c))
#define IS_MB_WORD(code,c) ismb(code,c)
#define mb2wc(p,e,enc) m17n_codepoint((enc),(p),(e))
#else /* REG_RUBY_M17N */
#define mb2wc(p,e,code) regex_mb2wc((p),(e),(code))
#define MB2WC_AVAILABLE(code) 1
#define WC2MB_FIRST(code, wc) regex_wc2mb_first(code, wc)
#define mbmaxlen_dist(code) mbmaxlen(code)
#define mbmaxlen(code) regex_mb_max_length(code)
#define mblen(code,c) (code)[(int )(c)]
#define IS_SINGLEBYTE_CODE(code) ((code) == REGCODE_ASCII)
#define IS_INDEPENDENT_TRAIL(code) \
((code) == REGCODE_ASCII || (code) == REGCODE_UTF8)
#define IS_CODE_ASCII(code,c) IS_ASCII(c)
#define IS_CODE_GRAPH(code,c) IS_GRAPH(c)
#define IS_CODE_PRINT(code,c) IS_PRINT(c)
#define IS_CODE_ALNUM(code,c) IS_ALNUM(c)
#define IS_CODE_ALPHA(code,c) IS_ALPHA(c)
#define IS_CODE_LOWER(code,c) IS_LOWER(c)
#define IS_CODE_UPPER(code,c) IS_UPPER(c)
#define IS_CODE_CNTRL(code,c) IS_CNTRL(c)
#define IS_CODE_PUNCT(code,c) IS_PUNCT(c)
#define IS_CODE_SPACE(code,c) IS_SPACE(c)
#define IS_CODE_BLANK(code,c) IS_BLANK(c)
#define IS_CODE_DIGIT(code,c) IS_DIGIT(c)
#define IS_CODE_ODIGIT(code,c) IS_ODIGIT(c)
#define IS_CODE_XDIGIT(code,c) IS_XDIGIT(c)
#define IS_SB_WORD(code,c) (IS_CODE_ALNUM(code,c) || (c) == '_')
#define IS_MB_WORD(code,c) ismb(code,c)
#define IS_CODE_WORD(code,c) \
(IS_SB_WORD(code,c) && ((c) < 0x80 || (code) == REGCODE_ASCII))
#define ISNOT_CODE_WORD(code,c) \
((!IS_SB_WORD(code,c)) && !ismb(code,c))
#define IS_WORD_STR(code,s,end) \
(ismb((code),*(s)) ? (s + mblen((code),*(s)) <= (end)) : \
IS_SB_WORD(code,*(s)))
#define IS_WORD_STR_INC(code,s,end) \
(ismb((code),*(s)) ? ((s) += mblen((code),*(s)), (s) <= (end)) : \
(s++, IS_SB_WORD(code,s[-1])))
#define IS_WORD_HEAD(code,c) (ismb(code,c) ? 1 : IS_SB_WORD(code,c))
extern int regex_mb_max_length P_((RegCharEncoding code));
extern WCINT regex_mb2wc P_((UChar* p, UChar* end, RegCharEncoding code));
extern int regex_wc2mb_first P_((RegCharEncoding code, WCINT wc));
#endif /* not REG_RUBY_M17N */
#define ANCHOR_BEGIN_BUF (1<<0)
#define ANCHOR_BEGIN_LINE (1<<1)
@ -571,7 +439,7 @@ enum OpCode {
OP_EXACTMB2N2, /* mb-length = 2 N = 2 */
OP_EXACTMB2N3, /* mb-length = 2 N = 3 */
OP_EXACTMB2N, /* mb-length = 2 */
OP_EXACTMB3N, /* mb length = 3 */
OP_EXACTMB3N, /* mb-length = 3 */
OP_EXACTMBN, /* other length */
OP_EXACT1_IC, /* single byte, N = 1, ignore case */
@ -584,9 +452,12 @@ enum OpCode {
OP_CCLASS_MB_NOT,
OP_CCLASS_MIX_NOT,
OP_ANYCHAR, /* "." */
OP_ANYCHAR_STAR, /* ".*" */
OP_ANYCHAR, /* "." */
OP_ANYCHAR_ML, /* "." multi-line */
OP_ANYCHAR_STAR, /* ".*" */
OP_ANYCHAR_ML_STAR, /* ".*" multi-line */
OP_ANYCHAR_STAR_PEEK_NEXT,
OP_ANYCHAR_ML_STAR_PEEK_NEXT,
OP_WORD,
OP_NOT_WORD,
@ -608,7 +479,9 @@ enum OpCode {
OP_BACKREF2,
OP_BACKREF3,
OP_BACKREFN,
OP_BACKREFN_IC,
OP_BACKREF_MULTI,
OP_BACKREF_MULTI_IC,
OP_MEMORY_START,
OP_MEMORY_START_PUSH, /* push back-tracker to stack */
@ -632,6 +505,8 @@ enum OpCode {
OP_REPEAT_INC_NG, /* non greedy */
OP_NULL_CHECK_START, /* null loop checker start */
OP_NULL_CHECK_END, /* null loop checker end */
OP_NULL_CHECK_END_MEMST, /* null loop checker end (with capture status) */
OP_NULL_CHECK_END_MEMST_PUSH, /* with capture status and push check-end */
OP_PUSH_POS, /* (?=...) start */
OP_POP_POS, /* (?=...) end */
@ -668,9 +543,10 @@ typedef int RepeatNumType;
#define SIZE_LENGTH sizeof(LengthType)
#define SIZE_MEMNUM sizeof(MemNumType)
#define SIZE_REPEATNUM sizeof(RepeatNumType)
#define SIZE_OPTION sizeof(RegOptionType)
#define SIZE_OPTION sizeof(OnigOptionType)
#define SIZE_CODE_POINT sizeof(OnigCodePoint)
#ifdef UNALIGNED_WORD_ACCESS
#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
#define GET_RELADDR_INC(addr,p) do{\
addr = *((RelAddrType* )(p));\
(p) += SIZE_RELADDR;\
@ -697,7 +573,7 @@ typedef int RepeatNumType;
} while(0)
#define GET_OPTION_INC(option,p) do{\
option = *((RegOptionType* )(p));\
option = *((OnigOptionType* )(p));\
(p) += SIZE_OPTION;\
} while(0)
#else
@ -718,8 +594,10 @@ typedef int RepeatNumType;
#define SERIALIZE_BUFSIZE SIZEOF_INT
#endif /* UNALIGNED_WORD_ACCESS */
#endif /* PLATFORM_UNALIGNED_WORD_ACCESS */
/* code point's address must be aligned address. */
#define GET_CODE_POINT(code,p) code = *((OnigCodePoint* )(p))
#define GET_BYTE_INC(byte,p) do{\
byte = *(p);\
(p)++;\
@ -760,31 +638,50 @@ typedef int RepeatNumType;
#define SIZE_OP_RETURN SIZE_OPCODE
#ifdef REG_DEBUG
typedef struct {
OnigCodePoint esc;
OnigCodePoint anychar;
OnigCodePoint anytime;
OnigCodePoint zero_or_one_time;
OnigCodePoint one_or_more_time;
OnigCodePoint anychar_anytime;
} OnigMetaCharTableType;
extern OnigMetaCharTableType OnigMetaCharTable;
#define MC_ESC OnigMetaCharTable.esc
#define MC_ANYCHAR OnigMetaCharTable.anychar
#define MC_ANYTIME OnigMetaCharTable.anytime
#define MC_ZERO_OR_ONE_TIME OnigMetaCharTable.zero_or_one_time
#define MC_ONE_OR_MORE_TIME OnigMetaCharTable.one_or_more_time
#define MC_ANYCHAR_ANYTIME OnigMetaCharTable.anychar_anytime
#ifdef ONIG_DEBUG
typedef struct {
short int opcode;
char* name;
short int arg_type;
} RegOpInfoType;
} OnigOpInfoType;
extern RegOpInfoType RegOpInfo[];
extern OnigOpInfoType OnigOpInfo[];
extern void regex_print_compiled_byte_code P_((FILE* f, UChar* bp, UChar** nextp));
extern void onig_print_compiled_byte_code P_((FILE* f, UChar* bp, UChar** nextp));
#ifdef REG_DEBUG_STATISTICS
extern void regex_statistics_init P_((void));
extern void regex_print_statistics P_((FILE* f));
#ifdef ONIG_DEBUG_STATISTICS
extern void onig_statistics_init P_((void));
extern void onig_print_statistics P_((FILE* f));
#endif
#endif
extern char* regex_error_code_to_format P_((int code));
extern void regex_snprintf_with_pattern PV_((char buf[], int bufsize, RegCharEncoding enc, char* pat, char* pat_end, char *fmt, ...));
extern UChar* regex_strdup P_((UChar* s, UChar* end));
extern int regex_bbuf_init P_((BBuf* buf, int size));
extern int regex_alloc_init P_((regex_t** reg, RegOptionType option, RegCharEncoding code, RegSyntaxType* syntax));
extern int regex_compile P_((regex_t* reg, UChar* pattern, UChar* pattern_end, RegErrorInfo* einfo));
extern void regex_chain_reduce P_((regex_t* reg));
extern int regex_is_in_wc_range P_((UChar* p, WCINT wc));
extern char* onig_error_code_to_format P_((int code));
extern void onig_snprintf_with_pattern PV_((char buf[], int bufsize, OnigEncoding enc, char* pat, char* pat_end, char *fmt, ...));
extern UChar* onig_strdup P_((UChar* s, UChar* end));
extern int onig_bbuf_init P_((BBuf* buf, int size));
extern int onig_alloc_init P_((regex_t** reg, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax));
extern int onig_compile P_((regex_t* reg, UChar* pattern, UChar* pattern_end, OnigErrorInfo* einfo));
extern void onig_chain_reduce P_((regex_t* reg));
extern int onig_is_in_code_range P_((UChar* p, OnigCodePoint code));
#endif /* REGINT_H */

File diff suppressed because it is too large Load diff

View file

@ -2,7 +2,7 @@
regparse.h - Oniguruma (regular expression library)
Copyright (C) 2003 K.Kosako (kosako@sofnec.co.jp)
Copyright (C) 2003-2004 K.Kosako (kosako@sofnec.co.jp)
**********************************************************************/
#ifndef REGPARSE_H
@ -64,6 +64,7 @@
#define NSTRING_LEN(node) ((node)->u.str.end - (node)->u.str.s)
#define NSTRING_SET_RAW(node) (node)->u.str.flag |= NSTR_RAW
#define NSTRING_CLEAR_RAW(node) (node)->u.str.flag &= ~NSTR_RAW
#define NSTRING_SET_CASE_AMBIG(node) (node)->u.str.flag |= NSTR_CASE_AMBIG
#define NSTRING_IS_RAW(node) (((node)->u.str.flag & NSTR_RAW) != 0)
#define NSTRING_IS_CASE_AMBIG(node) \
@ -72,6 +73,14 @@
#define BACKREFS_P(br) \
(IS_NOT_NULL((br)->back_dynamic) ? (br)->back_dynamic : (br)->back_static);
#define CCLASS_SET_NOT(cc) (cc)->not = 1
#define NQ_TARGET_ISNOT_EMPTY 0
#define NQ_TARGET_IS_EMPTY 1
#define NQ_TARGET_IS_EMPTY_MEM 2
#define NQ_TARGET_IS_EMPTY_REC 3
typedef struct {
UChar* s;
UChar* end;
@ -92,23 +101,26 @@ typedef struct {
int upper;
int greedy;
int by_number; /* {n,m} */
int target_may_empty; /* target can match with empty data */
int target_empty_info;
struct _Node* head_exact;
struct _Node* next_head_exact;
int is_refered; /* include called node. don't eliminate even if {0} */
} QualifierNode;
/* status bits */
#define NST_RECURSION (1<<0)
#define NST_CALLED (1<<1)
#define NST_ADDR_FIXED (1<<2)
#define NST_MIN_FIXED (1<<3)
#define NST_MAX_FIXED (1<<4)
#define NST_CLEN_FIXED (1<<5)
#define NST_MARK1 (1<<6)
#define NST_MARK2 (1<<7)
#define NST_MEM_BACKREFED (1<<8)
#define NST_SIMPLE_REPEAT (1<<9) /* for stop backtrack optimization */
#define NST_MIN_FIXED (1<<0)
#define NST_MAX_FIXED (1<<1)
#define NST_CLEN_FIXED (1<<2)
#define NST_MARK1 (1<<3)
#define NST_MARK2 (1<<4)
#define NST_MEM_BACKREFED (1<<5)
#define NST_SIMPLE_REPEAT (1<<6) /* for stop backtrack optimization */
#define NST_RECURSION (1<<7)
#define NST_CALLED (1<<8)
#define NST_ADDR_FIXED (1<<9)
#define NST_NAMED_GROUP (1<<10)
#define NST_NAME_REF (1<<11)
#define SET_EFFECT_STATUS(node,f) (node)->u.effect.state |= (f)
#define CLEAR_EFFECT_STATUS(node,f) (node)->u.effect.state &= ~(f)
@ -122,20 +134,23 @@ typedef struct {
#define IS_EFFECT_MAX_FIXED(en) (((en)->state & NST_MAX_FIXED) != 0)
#define IS_EFFECT_CLEN_FIXED(en) (((en)->state & NST_CLEN_FIXED) != 0)
#define IS_EFFECT_SIMPLE_REPEAT(en) (((en)->state & NST_SIMPLE_REPEAT) != 0)
#define IS_EFFECT_NAMED_GROUP(en) (((en)->state & NST_NAMED_GROUP) != 0)
#define SET_CALL_RECURSION(node) (node)->u.call.state |= NST_RECURSION
#define IS_CALL_RECURSION(cn) (((cn)->state & NST_RECURSION) != 0)
#define IS_CALL_NAME_REF(cn) (((cn)->state & NST_NAME_REF) != 0)
#define IS_BACKREF_NAME_REF(bn) (((bn)->state & NST_NAME_REF) != 0)
typedef struct {
int state;
int type;
int regnum;
RegOptionType option;
OnigOptionType option;
struct _Node* target;
AbsAddrType call_addr;
/* for multiple call reference */
RegDistance min_len; /* min length (byte) */
RegDistance max_len; /* max length (byte) */
OnigDistance min_len; /* min length (byte) */
OnigDistance max_len; /* max length (byte) */
int char_len; /* character length */
int opt_count; /* referenced count in optimize_node_left() */
} EffectNode;
@ -209,10 +224,12 @@ typedef struct _Node {
(senv)->mem_nodes_dynamic : (senv)->mem_nodes_static)
typedef struct {
RegOptionType option;
RegCharEncoding enc;
RegSyntaxType* syntax;
BitStatusType backtrack_mem;
OnigOptionType option;
OnigEncoding enc;
OnigSyntaxType* syntax;
BitStatusType capture_history;
BitStatusType bt_mem_start;
BitStatusType bt_mem_end;
BitStatusType backrefed_mem;
UChar* pattern;
UChar* pattern_end;
@ -224,6 +241,9 @@ typedef struct {
UnsetAddrList* unset_addr_list;
#endif
int num_mem;
#ifdef USE_NAMED_GROUP
int num_named;
#endif
int mem_alloc;
Node* mem_nodes_static[SCANENV_MEMNODES_SIZE];
Node** mem_nodes_dynamic;
@ -234,21 +254,23 @@ typedef struct {
#define IS_SYNTAX_OP2(syn, opm) (((syn)->op2 & (opm)) != 0)
#define IS_SYNTAX_BV(syn, bvm) (((syn)->behavior & (bvm)) != 0)
extern int onig_is_code_in_cc P_((OnigEncoding enc, OnigCodePoint code, CClassNode* cc));
extern int onig_strncmp P_((UChar* s1, UChar* s2, int n));
extern void onig_scan_env_set_error_string P_((ScanEnv* env, int ecode, UChar* arg, UChar* arg_end));
extern int onig_scan_unsigned_number P_((UChar** src, UChar* end, OnigEncoding enc));
extern void onig_reduce_nested_qualifier P_((Node* pnode, Node* cnode));
extern void onig_node_conv_to_str_node P_((Node* node, int raw));
extern int onig_node_str_cat P_((Node* node, UChar* s, UChar* end));
extern void onig_node_free P_((Node* node));
extern Node* onig_node_new_effect P_((int type));
extern Node* onig_node_new_anchor P_((int type));
extern int onig_free_node_list();
extern int onig_names_free P_((regex_t* reg));
extern int onig_parse_make_tree P_((Node** root, UChar* pattern, UChar* end, regex_t* reg, ScanEnv* env));
extern void regex_scan_env_set_error_string P_((ScanEnv* env, int ecode, UChar* arg, UChar* arg_end));
extern int regex_scan_unsigned_number P_((UChar** src, UChar* end, RegCharEncoding enc));
extern void regex_node_conv_to_str_node P_((Node* node, int raw));
extern int regex_node_str_cat P_((Node* node, UChar* s, UChar* end));
extern void regex_node_free P_((Node* node));
extern Node* regex_node_new_effect P_((int type));
extern Node* regex_node_new_anchor P_((int type));
extern int regex_free_node_list();
extern int regex_names_free P_((regex_t* reg));
extern int regex_parse_make_tree P_((Node** root, UChar* pattern, UChar* end, regex_t* reg, ScanEnv* env));
#ifdef REG_DEBUG
#ifdef USE_NAMED_SUBEXP
extern int regex_print_names(FILE*, regex_t*);
#ifdef ONIG_DEBUG
#ifdef USE_NAMED_GROUP
extern int onig_print_names(FILE*, regex_t*);
#endif
#endif

View file

@ -2,7 +2,7 @@
regposerr.c - Oniguruma (regular expression library)
Copyright (C) 2003 K.Kosako (kosako@sofnec.co.jp)
Copyright (C) 2003-2004 K.Kosako (kosako@sofnec.co.jp)
**********************************************************************/
#include "config.h"

View file

@ -2,7 +2,7 @@
regposix.c - Oniguruma (regular expression library)
Copyright (C) 2003 K.Kosako (kosako@sofnec.co.jp)
Copyright (C) 2003-2004 K.Kosako (kosako@sofnec.co.jp)
**********************************************************************/
@ -17,7 +17,7 @@
#if 1
#define ENC_STRING_LEN(enc,s,len) do { \
UChar* tmps = (UChar* )(s); \
/* while (*tmps != 0) tmps += mblen(enc,*tmps); */ \
/* while (*tmps != 0) tmps += enc_len(enc,*tmps); */ \
while (*tmps != 0) tmps++; /* OK for UTF-8, EUC-JP, Shift_JIS */ \
len = tmps - (UChar* )(s); \
} while(0)
@ -34,57 +34,65 @@ static int
onig2posix_error_code(int code)
{
static O2PERR o2p[] = {
{ REG_MISMATCH, REG_NOMATCH },
{ REG_NO_SUPPORT_CONFIG, REG_EONIG_INTERNAL },
{ REGERR_MEMORY, REG_ESPACE },
{ REGERR_MATCH_STACK_LIMIT_OVER, REG_EONIG_INTERNAL },
{ REGERR_TYPE_BUG, REG_EONIG_INTERNAL },
{ REGERR_PARSER_BUG, REG_EONIG_INTERNAL },
{ REGERR_STACK_BUG, REG_EONIG_INTERNAL },
{ REGERR_UNDEFINED_BYTECODE, REG_EONIG_INTERNAL },
{ REGERR_UNEXPECTED_BYTECODE, REG_EONIG_INTERNAL },
{ REGERR_DEFAULT_ENCODING_IS_NOT_SETTED, REG_EONIG_BADARG },
{ REGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR, REG_EONIG_BADARG },
{ REGERR_END_PATTERN_AT_LEFT_BRACE, REG_EBRACE },
{ REGERR_END_PATTERN_AT_LEFT_BRACKET, REG_EBRACK },
{ REGERR_EMPTY_CHAR_CLASS, REG_ECTYPE },
{ REGERR_PREMATURE_END_OF_CHAR_CLASS, REG_ECTYPE },
{ REGERR_END_PATTERN_AT_BACKSLASH, REG_EESCAPE },
{ REGERR_END_PATTERN_AT_META, REG_EESCAPE },
{ REGERR_END_PATTERN_AT_CONTROL, REG_EESCAPE },
{ REGERR_META_CODE_SYNTAX, REG_BADPAT },
{ REGERR_CONTROL_CODE_SYNTAX, REG_BADPAT },
{ REGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE, REG_ECTYPE },
{ REGERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE, REG_ECTYPE },
{ REGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS, REG_ECTYPE },
{ REGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED, REG_BADRPT },
{ REGERR_TARGET_OF_REPEAT_OPERATOR_INVALID, REG_BADRPT },
{ REGERR_NESTED_REPEAT_OPERATOR, REG_BADRPT },
{ REGERR_UNMATCHED_CLOSE_PARENTHESIS, REG_EPAREN },
{ REGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS, REG_EPAREN },
{ REGERR_END_PATTERN_IN_GROUP, REG_BADPAT },
{ REGERR_UNDEFINED_GROUP_OPTION, REG_BADPAT },
{ REGERR_INVALID_POSIX_BRACKET_TYPE, REG_BADPAT },
{ REGERR_INVALID_LOOK_BEHIND_PATTERN, REG_BADPAT },
{ REGERR_INVALID_REPEAT_RANGE_PATTERN, REG_BADPAT },
{ REGERR_TOO_BIG_NUMBER, REG_BADPAT },
{ REGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE, REG_BADBR },
{ REGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE, REG_BADBR },
{ REGERR_EMPTY_RANGE_IN_CHAR_CLASS, REG_ECTYPE },
{ REGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE, REG_ECTYPE },
{ REGERR_TOO_MANY_MULTI_BYTE_RANGES, REG_ECTYPE },
{ REGERR_TOO_SHORT_MULTI_BYTE_STRING, REG_BADPAT },
{ REGERR_TOO_BIG_BACKREF_NUMBER, REG_ESUBREG },
{ REGERR_INVALID_BACKREF, REG_ESUBREG },
{ REGERR_TOO_BIG_WIDE_CHAR_VALUE, REG_EONIG_BADWC },
{ REGERR_TOO_LONG_WIDE_CHAR_VALUE, REG_EONIG_BADWC },
{ REGERR_INVALID_WIDE_CHAR_VALUE, REG_EONIG_BADWC },
{ REGERR_INVALID_SUBEXP_NAME, REG_BADPAT },
{ REGERR_UNDEFINED_NAME_REFERENCE, REG_BADPAT },
{ REGERR_UNDEFINED_GROUP_REFERENCE, REG_BADPAT },
{ REGERR_MULTIPLEX_DEFINITION_NAME_CALL, REG_BADPAT },
{ REGERR_NEVER_ENDING_RECURSION, REG_BADPAT },
{ REGERR_OVER_THREAD_PASS_LIMIT_COUNT, REG_EONIG_THREAD }
{ ONIG_MISMATCH, REG_NOMATCH },
{ ONIG_NO_SUPPORT_CONFIG, REG_EONIG_INTERNAL },
{ ONIGERR_MEMORY, REG_ESPACE },
{ ONIGERR_MATCH_STACK_LIMIT_OVER, REG_EONIG_INTERNAL },
{ ONIGERR_TYPE_BUG, REG_EONIG_INTERNAL },
{ ONIGERR_PARSER_BUG, REG_EONIG_INTERNAL },
{ ONIGERR_STACK_BUG, REG_EONIG_INTERNAL },
{ ONIGERR_UNDEFINED_BYTECODE, REG_EONIG_INTERNAL },
{ ONIGERR_UNEXPECTED_BYTECODE, REG_EONIG_INTERNAL },
{ ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED, REG_EONIG_BADARG },
{ ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR, REG_EONIG_BADARG },
{ ONIGERR_INVALID_ARGUMENT, REG_EONIG_BADARG },
{ ONIGERR_END_PATTERN_AT_LEFT_BRACE, REG_EBRACE },
{ ONIGERR_END_PATTERN_AT_LEFT_BRACKET, REG_EBRACK },
{ ONIGERR_EMPTY_CHAR_CLASS, REG_ECTYPE },
{ ONIGERR_PREMATURE_END_OF_CHAR_CLASS, REG_ECTYPE },
{ ONIGERR_END_PATTERN_AT_BACKSLASH, REG_EESCAPE },
{ ONIGERR_END_PATTERN_AT_META, REG_EESCAPE },
{ ONIGERR_END_PATTERN_AT_CONTROL, REG_EESCAPE },
{ ONIGERR_META_CODE_SYNTAX, REG_BADPAT },
{ ONIGERR_CONTROL_CODE_SYNTAX, REG_BADPAT },
{ ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE, REG_ECTYPE },
{ ONIGERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE, REG_ECTYPE },
{ ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS, REG_ECTYPE },
{ ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED, REG_BADRPT },
{ ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID, REG_BADRPT },
{ ONIGERR_NESTED_REPEAT_OPERATOR, REG_BADRPT },
{ ONIGERR_UNMATCHED_CLOSE_PARENTHESIS, REG_EPAREN },
{ ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS, REG_EPAREN },
{ ONIGERR_END_PATTERN_IN_GROUP, REG_BADPAT },
{ ONIGERR_UNDEFINED_GROUP_OPTION, REG_BADPAT },
{ ONIGERR_INVALID_POSIX_BRACKET_TYPE, REG_BADPAT },
{ ONIGERR_INVALID_LOOK_BEHIND_PATTERN, REG_BADPAT },
{ ONIGERR_INVALID_REPEAT_RANGE_PATTERN, REG_BADPAT },
{ ONIGERR_TOO_BIG_NUMBER, REG_BADPAT },
{ ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE, REG_BADBR },
{ ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE, REG_BADBR },
{ ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS, REG_ECTYPE },
{ ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE, REG_ECTYPE },
{ ONIGERR_TOO_MANY_MULTI_BYTE_RANGES, REG_ECTYPE },
{ ONIGERR_TOO_SHORT_MULTI_BYTE_STRING, REG_BADPAT },
{ ONIGERR_TOO_BIG_BACKREF_NUMBER, REG_ESUBREG },
{ ONIGERR_INVALID_BACKREF, REG_ESUBREG },
{ ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED, REG_BADPAT },
{ ONIGERR_TOO_BIG_WIDE_CHAR_VALUE, REG_EONIG_BADWC },
{ ONIGERR_TOO_LONG_WIDE_CHAR_VALUE, REG_EONIG_BADWC },
{ ONIGERR_INVALID_WIDE_CHAR_VALUE, REG_EONIG_BADWC },
{ ONIGERR_EMPTY_GROUP_NAME, REG_BADPAT },
{ ONIGERR_INVALID_GROUP_NAME, REG_BADPAT },
{ ONIGERR_INVALID_CHAR_IN_GROUP_NAME, REG_BADPAT },
{ ONIGERR_UNDEFINED_NAME_REFERENCE, REG_BADPAT },
{ ONIGERR_UNDEFINED_GROUP_REFERENCE, REG_BADPAT },
{ ONIGERR_MULTIPLEX_DEFINED_NAME, REG_BADPAT },
{ ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL, REG_BADPAT },
{ ONIGERR_NEVER_ENDING_RECURSION, REG_BADPAT },
{ ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY, REG_BADPAT },
{ ONIGERR_INVALID_CHAR_PROPERTY_NAME, REG_BADPAT },
{ ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT, REG_EONIG_THREAD }
};
int i;
@ -103,26 +111,27 @@ extern int
regcomp(regex_t* reg, const char* pattern, int posix_options)
{
int r, len;
RegSyntaxType* syntax = RegDefaultSyntax;
RegOptionType options;
OnigSyntaxType* syntax = OnigDefaultSyntax;
OnigOptionType options;
if ((posix_options & REG_EXTENDED) == 0)
syntax = REG_SYNTAX_POSIX_BASIC;
syntax = ONIG_SYNTAX_POSIX_BASIC;
options = syntax->options;
if ((posix_options & REG_ICASE) != 0)
REG_OPTION_ON(options, REG_OPTION_IGNORECASE);
ONIG_OPTION_ON(options, ONIG_OPTION_IGNORECASE);
if ((posix_options & REG_NEWLINE) != 0) {
REG_OPTION_ON( options, REG_OPTION_NEGATE_SINGLELINE);
REG_OPTION_OFF(options, REG_OPTION_SINGLELINE);
ONIG_OPTION_ON( options, ONIG_OPTION_NEGATE_SINGLELINE);
ONIG_OPTION_OFF(options, ONIG_OPTION_SINGLELINE);
}
reg->comp_options = posix_options;
ENC_STRING_LEN(RegDefaultCharEncoding, pattern, len);
r = regex_new(PONIG_C(reg), (UChar* )pattern, (UChar* )(pattern + len),
options, RegDefaultCharEncoding, syntax, (RegErrorInfo* )NULL);
if (r != REG_NORMAL) {
ENC_STRING_LEN(OnigEncDefaultCharEncoding, pattern, len);
r = onig_new(PONIG_C(reg), (UChar* )pattern, (UChar* )(pattern + len),
options, OnigEncDefaultCharEncoding, syntax,
(OnigErrorInfo* )NULL);
if (r != ONIG_NORMAL) {
return onig2posix_error_code(r);
}
@ -136,11 +145,11 @@ regexec(regex_t* reg, const char* str, size_t nmatch,
{
int r, i, len;
UChar* end;
RegOptionType options;
OnigOptionType options;
options = REG_OPTION_POSIX_REGION;
if ((posix_options & REG_NOTBOL) != 0) options |= REG_OPTION_NOTBOL;
if ((posix_options & REG_NOTEOL) != 0) options |= REG_OPTION_NOTEOL;
options = ONIG_OPTION_POSIX_REGION;
if ((posix_options & REG_NOTBOL) != 0) options |= ONIG_OPTION_NOTBOL;
if ((posix_options & REG_NOTEOL) != 0) options |= ONIG_OPTION_NOTEOL;
if ((reg->comp_options & REG_NOSUB) != 0) {
pmatch = (regmatch_t* )NULL;
@ -149,16 +158,16 @@ regexec(regex_t* reg, const char* str, size_t nmatch,
ENC_STRING_LEN(ONIG_C(reg)->code,str,len);
end = (UChar* )(str + len);
r = regex_search(ONIG_C(reg), (UChar* )str, end, (UChar* )str, end,
(RegRegion* )pmatch, options);
r = onig_search(ONIG_C(reg), (UChar* )str, end, (UChar* )str, end,
(OnigRegion* )pmatch, options);
if (r >= 0) {
r = 0; /* Match */
}
else if (r == REG_MISMATCH) {
else if (r == ONIG_MISMATCH) {
r = REG_NOMATCH;
for (i = 0; i < nmatch; i++)
pmatch[i].rm_so = pmatch[i].rm_eo = REG_REGION_NOTPOS;
pmatch[i].rm_so = pmatch[i].rm_eo = ONIG_REGION_NOTPOS;
}
else {
r = onig2posix_error_code(r);
@ -170,26 +179,74 @@ regexec(regex_t* reg, const char* str, size_t nmatch,
extern void
regfree(regex_t* reg)
{
regex_free(ONIG_C(reg));
onig_free(ONIG_C(reg));
}
extern void
reg_set_encoding(int mb_code)
{
RegDefaultCharEncoding = REG_MBLEN_TABLE[mb_code];
OnigEncoding enc;
switch (mb_code) {
case REG_POSIX_ENCODING_ASCII:
enc = ONIG_ENCODING_ASCII;
break;
case REG_POSIX_ENCODING_EUC_JP:
enc = ONIG_ENCODING_EUC_JP;
break;
case REG_POSIX_ENCODING_SJIS:
enc = ONIG_ENCODING_SJIS;
break;
case REG_POSIX_ENCODING_UTF8:
enc = ONIG_ENCODING_UTF8;
break;
default:
return ;
break;
}
onigenc_set_default_encoding(enc);
}
extern int
reg_name_to_group_numbers(regex_t* reg,
unsigned char* name, unsigned char* name_end, int** nums)
{
return regex_name_to_group_numbers(ONIG_C(reg), name, name_end, nums);
return onig_name_to_group_numbers(ONIG_C(reg), name, name_end, nums);
}
typedef struct {
int (*func)(unsigned char*,unsigned char*,int,int*,regex_t*,void*);
regex_t* reg;
void* arg;
} i_wrap;
static int i_wrapper(unsigned char* name, unsigned char* name_end,
int ng, int* gs,
onig_regex_t* reg, void* arg)
{
i_wrap* warg = (i_wrap* )arg;
return (*warg->func)(name, name_end, ng, gs, warg->reg, warg->arg);
}
extern int
reg_foreach_name(regex_t* reg, int (*func)(unsigned char*,int,int*,void*),
void* arg)
reg_foreach_name(regex_t* reg,
int (*func)(unsigned char*,unsigned char*,int,int*,regex_t*,void*),
void* arg)
{
return regex_foreach_name(ONIG_C(reg), func, arg);
i_wrap warg;
warg.func = func;
warg.reg = reg;
warg.arg = arg;
return onig_foreach_name(ONIG_C(reg), i_wrapper, &warg);
}
extern int
reg_number_of_names(regex_t* reg)
{
return onig_number_of_names(ONIG_C(reg));
}

View file

@ -1,64 +0,0 @@
/*
* names.c -- example of group name callback.
*/
#include<stdio.h>
#include "oniguruma.h"
static int
name_callback(UChar* name, int ngroup_num, int* group_nums, void* arg)
{
int i, gn;
RegRegion *region = (RegRegion* )arg;
for (i = 0; i < ngroup_num; i++) {
gn = group_nums[i];
fprintf(stderr, "%s (%d): ", name, gn);
fprintf(stderr, "(%d-%d)\n", region->beg[gn], region->end[gn]);
}
return 0; /* 0: continue */
}
extern int main(int argc, char* argv[])
{
int r;
unsigned char *start, *range, *end;
regex_t* reg;
RegErrorInfo einfo;
RegRegion *region;
static unsigned char* pattern = "(?<foo>a*)(?<bar>b*)(?<foo>c*)";
static unsigned char* str = "aaabbbbcc";
r = regex_new(&reg, pattern, pattern + strlen(pattern),
REG_OPTION_DEFAULT, REGCODE_ASCII, REG_SYNTAX_DEFAULT, &einfo);
if (r != REG_NORMAL) {
char s[REG_MAX_ERROR_MESSAGE_LEN];
regex_error_code_to_str(s, r, &einfo);
fprintf(stderr, "ERROR: %s\n", s);
exit(-1);
}
region = regex_region_new();
end = str + strlen(str);
start = str;
range = end;
r = regex_search(reg, str, end, start, range, region, REG_OPTION_NONE);
if (r >= 0) {
fprintf(stderr, "match at %d\n\n", r);
r = regex_foreach_name(reg, name_callback, (void* )region);
}
else if (r == REG_MISMATCH) {
fprintf(stderr, "search fail\n");
}
else { /* error */
char s[REG_MAX_ERROR_MESSAGE_LEN];
regex_error_code_to_str(s, r);
exit(-1);
}
regex_region_free(region, 1 /* 1:free self, 0:free contents only */);
regex_free(reg);
regex_end();
return 0;
}

View file

@ -1,92 +0,0 @@
/*
* posix.c
*/
#include<stdio.h>
#include "onigposix.h"
static int x(regex_t* reg, unsigned char* pattern, unsigned char* str)
{
int r, i;
char buf[200];
regmatch_t pmatch[20];
r = regexec(reg, str, reg->re_nsub + 1, pmatch, 0);
if (r != 0 && r != REG_NOMATCH) {
regerror(r, reg, buf, sizeof(buf));
fprintf(stderr, "ERROR: %s\n", buf);
exit(-1);
}
if (r == REG_NOMATCH) {
fprintf(stderr, "FAIL: /%s/ '%s'\n", pattern, str);
}
else {
fprintf(stderr, "OK: /%s/ '%s'\n", pattern, str);
for (i = 0; i <= reg->re_nsub; i++) {
fprintf(stderr, "%d: %d-%d\n", i, pmatch[i].rm_so, pmatch[i].rm_eo);
}
}
return 0;
}
extern int main(int argc, char* argv[])
{
int r;
char buf[200];
regex_t reg;
unsigned char* pattern;
/* default syntax (REG_SYNTAX_RUBY) */
pattern = "^a+b{2,7}[c-f]?$|uuu";
r = regcomp(&reg, pattern, REG_EXTENDED);
if (r) {
regerror(r, &reg, buf, sizeof(buf));
fprintf(stderr, "ERROR: %s\n", buf);
exit(-1);
}
x(&reg, pattern, "aaabbbbd");
/* POSIX Basic RE (REG_EXTENDED is not specified.) */
pattern = "^a+b{2,7}[c-f]?|uuu";
r = regcomp(&reg, pattern, 0);
if (r) {
regerror(r, &reg, buf, sizeof(buf));
fprintf(stderr, "ERROR: %s\n", buf);
exit(-1);
}
x(&reg, pattern, "a+b{2,7}d?|uuu");
/* POSIX Basic RE (REG_EXTENDED is not specified.) */
pattern = "^a*b\\{2,7\\}\\([c-f]\\)$";
r = regcomp(&reg, pattern, 0);
if (r) {
regerror(r, &reg, buf, sizeof(buf));
fprintf(stderr, "ERROR: %s\n", buf);
exit(-1);
}
x(&reg, pattern, "aaaabbbbbbd");
/* POSIX Extended RE */
regex_set_default_syntax(REG_SYNTAX_POSIX_EXTENDED);
pattern = "^a+b{2,7}[c-f]?)$|uuu";
r = regcomp(&reg, pattern, REG_EXTENDED);
if (r) {
regerror(r, &reg, buf, sizeof(buf));
fprintf(stderr, "ERROR: %s\n", buf);
exit(-1);
}
x(&reg, pattern, "aaabbbbd)");
pattern = "^b.";
r = regcomp(&reg, pattern, REG_EXTENDED | REG_NEWLINE);
if (r) {
regerror(r, &reg, buf, sizeof(buf));
fprintf(stderr, "ERROR: %s\n", buf);
exit(-1);
}
x(&reg, pattern, "a\nb\n");
regfree(&reg);
regex_end();
return 0;
}

View file

@ -1,54 +0,0 @@
/*
* simple.c
*/
#include<stdio.h>
#include "oniguruma.h"
extern int main(int argc, char* argv[])
{
int r;
unsigned char *start, *range, *end;
regex_t* reg;
RegErrorInfo einfo;
RegRegion *region;
static unsigned char* pattern = "a(.*)b|[e-f]+";
static unsigned char* str = "zzzzaffffffffb";
r = regex_new(&reg, pattern, pattern + strlen(pattern),
REG_OPTION_DEFAULT, REGCODE_ASCII, REG_SYNTAX_DEFAULT, &einfo);
if (r != REG_NORMAL) {
char s[REG_MAX_ERROR_MESSAGE_LEN];
regex_error_code_to_str(s, r, &einfo);
fprintf(stderr, "ERROR: %s\n", s);
exit(-1);
}
region = regex_region_new();
end = str + strlen(str);
start = str;
range = end;
r = regex_search(reg, str, end, start, range, region, REG_OPTION_NONE);
if (r >= 0) {
int i;
fprintf(stderr, "match at %d\n", r);
for (i = 0; i < region->num_regs; i++) {
fprintf(stderr, "%d: (%d-%d)\n", i, region->beg[i], region->end[i]);
}
}
else if (r == REG_MISMATCH) {
fprintf(stderr, "search fail\n");
}
else { /* error */
char s[REG_MAX_ERROR_MESSAGE_LEN];
regex_error_code_to_str(s, r);
exit(-1);
}
regex_region_free(region, 1 /* 1:free self, 0:free contents only */);
regex_free(reg);
regex_end();
return 0;
}

View file

@ -1,971 +0,0 @@
# test.rb
# Copyright (C) 2003 K.Kosako (kosako@sofnec.co.jp)
def pr(result, reg, str, n = 0, *range)
printf("%s /%s/:'%s'", result, reg.source, str)
if (n.class == Fixnum)
printf(":%d", n) if n != 0
if (range.size > 0)
if (range[3].nil?)
printf(" (%d-%d : X-X)", range[0], range[1])
else
printf(" (%d-%d : %d-%d)", range[0], range[1], range[2], range[3])
end
end
else
printf(" %s", n)
end
printf("\n")
end
def rok(result_opt, reg, str, n = 0, *range)
result = "OK" + result_opt
result += " " * (7 - result.length)
pr(result, reg, str, n, *range)
$rok += 1
end
def rfail(result_opt, reg, str, n = 0, *range)
result = "FAIL" + result_opt
result += " " * (7 - result.length)
pr(result, reg, str, n, *range)
$rfail += 1
end
def x(reg, str, s, e, n = 0)
m = reg.match(str)
if m
if (m.size() <= n)
rfail("(%d)" % (m.size()-1), reg, str, n)
else
if (m.begin(n) == s && m.end(n) == e)
rok("", reg, str, n)
else
rfail("", reg, str, n, s, e, m.begin(n), m.end(n))
end
end
else
rfail("", reg, str, n)
end
end
def n(reg, str)
m = reg.match(str)
if m
rfail("(N)", reg, str, 0)
else
rok("(N)", reg, str, 0)
end
end
def r(reg, str, index, pos = nil)
if (pos)
res = str.rindex(reg, pos)
else
res = str.rindex(reg)
end
if res
if (res == index)
rok("(r)", reg, str)
else
rfail("(r)", reg, str, [res, '-', index])
end
else
rfail("(r)", reg, str)
end
end
def i(reg, str, s = 0, e = 0, n = 0)
# ignore
end
### main ###
$rok = $rfail = 0
def test_sb(enc)
$KCODE = enc
x(//, '', 0, 0)
x(/^/, '', 0, 0)
x(/$/, '', 0, 0)
x(/\G/, '', 0, 0)
x(/\A/, '', 0, 0)
x(/\Z/, '', 0, 0)
x(/\z/, '', 0, 0)
x(/^$/, '', 0, 0)
x(/\ca/, "\001", 0, 1)
x(/\C-b/, "\002", 0, 1)
x(/\M-Z/, "\xDA", 0, 1)
x(//, 'a', 0, 0)
x(/a/, 'a', 0, 1)
x(/aa/, 'aa', 0, 2)
x(/aaa/, 'aaa', 0, 3)
x(/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/, 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa', 0, 35)
x(/ab/, 'ab', 0, 2)
x(/b/, 'ab', 1, 2)
x(/bc/, 'abc', 1, 3)
x(/\17/, "\017", 0, 1)
x(/\x1f/, "\x1f", 0, 1)
x(/\xFE/, "\xfe", 0, 1)
x(/a(?#....\\JJJJ)b/, 'ab', 0, 2)
x(/./, 'a', 0, 1)
n(/./, '')
x(/../, 'ab', 0, 2)
x(/\w/, 'e', 0, 1)
n(/\W/, 'e')
x(/\s/, ' ', 0, 1)
x(/\S/, 'b', 0, 1)
x(/\d/, '4', 0, 1)
n(/\D/, '4')
x(/\b/, 'z ', 0, 0)
x(/\b/, ' z', 1, 1)
x(/\B/, 'zz ', 1, 1)
x(/\B/, 'z ', 2, 2)
x(/\B/, ' z', 0, 0)
x(/[ab]/, 'b', 0, 1)
n(/[ab]/, 'c')
x(/[a-z]/, 't', 0, 1)
n(/[^a]/, 'a')
x(/[^a]/, "\n", 0, 1)
x(/[]]/, ']', 0, 1)
n(/[^]]/, ']')
x(/[b-]/, 'b', 0, 1)
x(/[b-]/, '-', 0, 1)
x(/[\w]/, 'z', 0, 1)
n(/[\w]/, ' ')
x(/[\d]/, '5', 0, 1)
n(/[\d]/, 'e')
x(/[\D]/, 't', 0, 1)
n(/[\D]/, '3')
x(/[\s]/, ' ', 0, 1)
n(/[\s]/, 'a')
x(/[\S]/, 'b', 0, 1)
n(/[\S]/, ' ')
x(/[\w\d]/, '2', 0, 1)
n(/[\w\d]/, ' ')
x(/[[:upper:]]/, 'B', 0, 1)
x(/[*[:xdigit:]+]/, '+', 0, 1)
x(/[*[:xdigit:]+]/, 'GHIKK-9+*', 6, 7)
x(/[*[:xdigit:]+]/, '-@^+', 3, 4)
n(/[[:upper]]/, 'A')
x(/[[:upper]]/, ':', 0, 1)
x(/[\044-\047]/, "\046", 0, 1)
x(/[\x5a-\x5c]/, "\x5b", 0, 1)
x(/[\x6A-\x6D]/, "\x6c", 0, 1)
n(/[\x6A-\x6D]/, "\x6E")
n(/^[0-9A-F]+ 0+ UNDEF /, '75F 00000000 SECT14A notype () External | _rb_apply')
x(/[\[]/, '[', 0, 1)
x(/[\]]/, ']', 0, 1)
x(/[&]/, '&', 0, 1)
x(/[[ab]]/, 'b', 0, 1)
x(/[[ab]c]/, 'c', 0, 1)
n(/[[^a]]/, 'a')
n(/[^[a]]/, 'a')
x(/[[ab]&&bc]/, 'b', 0, 1)
n(/[[ab]&&bc]/, 'a')
n(/[[ab]&&bc]/, 'c')
x(/[a-z&&b-y&&c-x]/, 'w', 0, 1)
n(/[^a-z&&b-y&&c-x]/, 'w')
x(/[[^a&&a]&&a-z]/, 'b', 0, 1)
n(/[[^a&&a]&&a-z]/, 'a')
x(/[[^a-z&&bcdef]&&[^c-g]]/, 'h', 0, 1)
n(/[[^a-z&&bcdef]&&[^c-g]]/, 'c')
x(/[^[^abc]&&[^cde]]/, 'c', 0, 1)
x(/[^[^abc]&&[^cde]]/, 'e', 0, 1)
n(/[^[^abc]&&[^cde]]/, 'f')
x(/[a-&&-a]/, '-', 0, 1)
n(/[a-&&-a]/, '&')
n(/\wabc/, ' abc')
x(/a\Wbc/, 'a bc', 0, 4)
x(/a.b.c/, 'aabbc', 0, 5)
x(/.\wb\W..c/, 'abb bcc', 0, 7)
x(/\s\wzzz/, ' zzzz', 0, 5)
x(/aa.b/, 'aabb', 0, 4)
n(/.a/, 'ab')
x(/.a/, 'aa', 0, 2)
x(/^a/, 'a', 0, 1)
x(/^a$/, 'a', 0, 1)
x(/^\w$/, 'a', 0, 1)
n(/^\w$/, ' ')
x(/^\wab$/, 'zab', 0, 3)
x(/^\wabcdef$/, 'zabcdef', 0, 7)
x(/^\w...def$/, 'zabcdef', 0, 7)
x(/\w\w\s\Waaa\d/, 'aa aaa4', 0, 8)
x(/\A\Z/, '', 0, 0)
x(/\Axyz/, 'xyz', 0, 3)
x(/xyz\Z/, 'xyz', 0, 3)
x(/xyz\z/, 'xyz', 0, 3)
x(/\Gaz/, 'az', 0, 2)
n(/\Gz/, 'bza')
n(/az\G/, 'az')
n(/az\A/, 'az')
n(/a\Az/, 'az')
x(/\^\$/, '^$', 0, 2)
x(/\w/, '_', 0, 1)
n(/\W/, '_')
x(/(?=z)z/, 'z', 0, 1)
n(/(?=z)./, 'a')
x(/(?!z)a/, 'a', 0, 1)
n(/(?!z)a/, 'z')
x(/(?i:a)/, 'a', 0, 1)
x(/(?i:a)/, 'A', 0, 1)
x(/(?i:A)/, 'a', 0, 1)
n(/(?i:A)/, 'b')
x(/(?i:[A-Z])/, 'a', 0, 1)
x(/(?i:[f-m])/, 'H', 0, 1)
x(/(?i:[f-m])/, 'h', 0, 1)
n(/(?i:[f-m])/, 'e')
n(/(?i:[A-c])/, 'D') # changed spec. 2003/02/07
n(/(?i:[a-C])/, 'D') # changed spec. 2003/02/07
n(/(?i:[b-C])/, 'A')
x(/(?i:[a-C])/, 'B', 0, 1)
n(/(?i:[c-X])/, '[')
n(/(?i:[!-k])/, 'Z')
x(/(?i:[!-k])/, '7', 0, 1)
n(/(?i:[T-}])/, 'b')
x(/(?i:[T-}])/, '{', 0, 1)
x(/(?i:\?a)/, '?A', 0, 2)
x(/(?i:\*A)/, '*a', 0, 2)
n(/./, "\n")
x(/(?m:.)/, "\n", 0, 1)
x(/(?m:a.)/, "a\n", 0, 2)
x(/(?m:.b)/, "a\nb", 1, 3)
x(/a?/, '', 0, 0)
x(/a?/, 'b', 0, 0)
x(/a?/, 'a', 0, 1)
x(/a*/, '', 0, 0)
x(/a*/, 'a', 0, 1)
x(/a*/, 'aaa', 0, 3)
x(/a*/, 'baaaa', 0, 0)
n(/a+/, '')
x(/a+/, 'a', 0, 1)
x(/a+/, 'aaaa', 0, 4)
x(/a+/, 'aabbb', 0, 2)
x(/a+/, 'baaaa', 1, 5)
x(/.?/, '', 0, 0)
x(/.?/, 'f', 0, 1)
x(/.?/, "\n", 0, 0)
x(/.*/, '', 0, 0)
x(/.*/, 'abcde', 0, 5)
x(/.+/, 'z', 0, 1)
x(/.+/, "zdswer\n", 0, 6)
x(/a|b/, 'a', 0, 1)
x(/a|b/, 'b', 0, 1)
x(/|a/, 'a', 0, 0)
x(/(|a)/, 'a', 0, 0)
x(/ab|bc/, 'ab', 0, 2)
x(/ab|bc/, 'bc', 0, 2)
x(/z(?:ab|bc)/, 'zbc', 0, 3)
x(/a(?:ab|bc)c/, 'aabc', 0, 4)
x(/ab|(?:ac|az)/, 'az', 0, 2)
x(/a|b|c/, 'dc', 1, 2)
x(/a|b|cd|efg|h|ijk|lmn|o|pq|rstuvwx|yz/, 'pqr', 0, 2)
n(/a|b|cd|efg|h|ijk|lmn|o|pq|rstuvwx|yz/, 'mn')
x(/a|^z/, 'ba', 1, 2)
x(/a|^z/, 'za', 0, 1)
x(/a|\Gz/, 'bza', 2, 3)
x(/a|\Gz/, 'za', 0, 1)
x(/a|\Az/, 'bza', 2, 3)
x(/a|\Az/, 'za', 0, 1)
x(/a|b\Z/, 'ba', 1, 2)
x(/a|b\Z/, 'b', 0, 1)
x(/a|b\z/, 'ba', 1, 2)
x(/a|b\z/, 'b', 0, 1)
x(/\w|\s/, ' ', 0, 1)
n(/\w|\w/, ' ')
x(/\w|%/, '%', 0, 1)
x(/\w|[&$]/, '&', 0, 1)
x(/[b-d]|[^e-z]/, 'a', 0, 1)
x(/(?:a|[c-f])|bz/, 'dz', 0, 1)
x(/(?:a|[c-f])|bz/, 'bz', 0, 2)
x(/abc|(?=zz)..f/, 'zzf', 0, 3)
x(/abc|(?!zz)..f/, 'abf', 0, 3)
x(/(?=za)..a|(?=zz)..a/, 'zza', 0, 3)
n(/(?>a|abd)c/, 'abdc')
x(/(?>abd|a)c/, 'abdc', 0, 4)
x(/a?|b/, 'a', 0, 1)
x(/a?|b/, 'b', 0, 0)
x(/a?|b/, '', 0, 0)
x(/a*|b/, 'aa', 0, 2)
x(/a*|b*/, 'ba', 0, 0)
x(/a*|b*/, 'ab', 0, 1)
x(/a+|b*/, '', 0, 0)
x(/a+|b*/, 'bbb', 0, 3)
x(/a+|b*/, 'abbb', 0, 1)
n(/a+|b+/, '')
x(/(a|b)?/, 'b', 0, 1)
x(/(a|b)*/, 'ba', 0, 2)
x(/(a|b)+/, 'bab', 0, 3)
x(/(ab|ca)+/, 'caabbc', 0, 4)
x(/(ab|ca)+/, 'aabca', 1, 5)
x(/(ab|ca)+/, 'abzca', 0, 2)
x(/(a|bab)+/, 'ababa', 0, 5)
x(/(a|bab)+/, 'ba', 1, 2)
x(/(a|bab)+/, 'baaaba', 1, 4)
x(/(?:a|b)(?:a|b)/, 'ab', 0, 2)
x(/(?:a*|b*)(?:a*|b*)/, 'aaabbb', 0, 3)
x(/(?:a*|b*)(?:a+|b+)/, 'aaabbb', 0, 6)
x(/(?:a+|b+){2}/, 'aaabbb', 0, 6)
x(/h{0,}/, 'hhhh', 0, 4)
x(/(?:a+|b+){1,2}/, 'aaabbb', 0, 6)
x(/(?:a+|\Ab*)cc/, 'cc', 0, 2)
n(/(?:a+|\Ab*)cc/, 'abcc')
x(/(?:^a+|b+)*c/, 'aabbbabc', 6, 8)
x(/(?:^a+|b+)*c/, 'aabbbbc', 0, 7)
x(/a|(?i)c/, 'C', 0, 1)
x(/(?i)c|a/, 'C', 0, 1)
i(/(?i)c|a/, 'A', 0, 1) # different spec.
x(/(?i:c)|a/, 'C', 0, 1)
n(/(?i:c)|a/, 'A')
x(/[abc]?/, 'abc', 0, 1)
x(/[abc]*/, 'abc', 0, 3)
x(/[^abc]*/, 'abc', 0, 0)
n(/[^abc]+/, 'abc')
x(/a??/, 'aaa', 0, 0)
x(/ba??b/, 'bab', 0, 3)
x(/a*?/, 'aaa', 0, 0)
x(/ba*?/, 'baa', 0, 1)
x(/ba*?b/, 'baab', 0, 4)
x(/a+?/, 'aaa', 0, 1)
x(/ba+?/, 'baa', 0, 2)
x(/ba+?b/, 'baab', 0, 4)
x(/(?:a?)??/, 'a', 0, 0)
x(/(?:a??)?/, 'a', 0, 0)
x(/(?:a?)+?/, 'aaa', 0, 1)
x(/(?:a+)??/, 'aaa', 0, 0)
x(/(?:a+)??b/, 'aaab', 0, 4)
i(/(?:ab)?{2}/, '', 0, 0) # GNU regex bug
x(/(?:ab)?{2}/, 'ababa', 0, 4)
x(/(?:ab)*{0}/, 'ababa', 0, 0)
x(/(?:ab){3,}/, 'abababab', 0, 8)
n(/(?:ab){3,}/, 'abab')
x(/(?:ab){2,4}/, 'ababab', 0, 6)
x(/(?:ab){2,4}/, 'ababababab', 0, 8)
x(/(?:ab){2,4}?/, 'ababababab', 0, 4)
x(/(?:ab){,}/, 'ab{,}', 0, 5)
x(/(?:abc)+?{2}/, 'abcabcabc', 0, 6)
x(/(?:X*)(?i:xa)/, 'XXXa', 0, 4)
x(/(d+)([^abc]z)/, 'dddz', 0, 4)
x(/([^abc]*)([^abc]z)/, 'dddz', 0, 4)
x(/(\w+)(\wz)/, 'dddz', 0, 4)
x(/(a)/, 'a', 0, 1, 1)
x(/(ab)/, 'ab', 0, 2, 1)
x(/((ab))/, 'ab', 0, 2)
x(/((ab))/, 'ab', 0, 2, 1)
x(/((ab))/, 'ab', 0, 2, 2)
x(/((((((((((((((((((((ab))))))))))))))))))))/, 'ab', 0, 2, 20)
x(/(ab)(cd)/, 'abcd', 0, 2, 1)
x(/(ab)(cd)/, 'abcd', 2, 4, 2)
x(/()(a)bc(def)ghijk/, 'abcdefghijk', 3, 6, 3)
x(/(()(a)bc(def)ghijk)/, 'abcdefghijk', 3, 6, 4)
x(/(^a)/, 'a', 0, 1)
x(/(a)|(a)/, 'ba', 1, 2, 1)
x(/(^a)|(a)/, 'ba', 1, 2, 2)
x(/(a?)/, 'aaa', 0, 1, 1)
x(/(a*)/, 'aaa', 0, 3, 1)
x(/(a*)/, '', 0, 0, 1)
x(/(a+)/, 'aaaaaaa', 0, 7, 1)
x(/(a+|b*)/, 'bbbaa', 0, 3, 1)
x(/(a+|b?)/, 'bbbaa', 0, 1, 1)
x(/(abc)?/, 'abc', 0, 3, 1)
x(/(abc)*/, 'abc', 0, 3, 1)
x(/(abc)+/, 'abc', 0, 3, 1)
x(/(xyz|abc)+/, 'abc', 0, 3, 1)
x(/([xyz][abc]|abc)+/, 'abc', 0, 3, 1)
x(/((?i:abc))/, 'AbC', 0, 3, 1)
x(/(abc)(?i:\1)/, 'abcABC', 0, 6)
x(/((?m:a.c))/, "a\nc", 0, 3, 1)
x(/((?=az)a)/, 'azb', 0, 1, 1)
x(/abc|(.abd)/, 'zabd', 0, 4, 1)
x(/(?:abc)|(ABC)/, 'abc', 0, 3)
x(/(?i:(abc))|(zzz)/, 'ABC', 0, 3, 1)
x(/a*(.)/, 'aaaaz', 4, 5, 1)
x(/a*?(.)/, 'aaaaz', 0, 1, 1)
x(/a*?(c)/, 'aaaac', 4, 5, 1)
x(/[bcd]a*(.)/, 'caaaaz', 5, 6, 1)
x(/(\Abb)cc/, 'bbcc', 0, 2, 1)
n(/(\Abb)cc/, 'zbbcc')
x(/(^bb)cc/, 'bbcc', 0, 2, 1)
n(/(^bb)cc/, 'zbbcc')
x(/cc(bb$)/, 'ccbb', 2, 4, 1)
n(/cc(bb$)/, 'ccbbb')
#n(/\1/, 'a') # compile error on Oniguruma
n(/(\1)/, '')
n(/\1(a)/, 'aa')
n(/(a(b)\1)\2+/, 'ababb')
n(/(?:(?:\1|z)(a))+$/, 'zaa')
x(/(?:(?:\1|z)(a))+$/, 'zaaa', 0, 4)
x(/(a)(?=\1)/, 'aa', 0, 1)
n(/(a)$|\1/, 'az')
x(/(a)\1/, 'aa', 0, 2)
n(/(a)\1/, 'ab')
x(/(a?)\1/, 'aa', 0, 2)
x(/(a??)\1/, 'aa', 0, 0)
x(/(a*)\1/, 'aaaaa', 0, 4)
x(/(a*)\1/, 'aaaaa', 0, 2, 1)
x(/a(b*)\1/, 'abbbb', 0, 5)
x(/a(b*)\1/, 'ab', 0, 1)
x(/(a*)(b*)\1\2/, 'aaabbaaabb', 0, 10)
x(/(a*)(b*)\2/, 'aaabbbb', 0, 7)
x(/(((((((a*)b))))))c\7/, 'aaabcaaa', 0, 8)
x(/(((((((a*)b))))))c\7/, 'aaabcaaa', 0, 3, 7)
x(/(a)(b)(c)\2\1\3/, 'abcbac', 0, 6)
x(/([a-d])\1/, 'cc', 0, 2)
x(/(\w\d\s)\1/, 'f5 f5 ', 0, 6)
n(/(\w\d\s)\1/, 'f5 f5')
x(/(who|[a-c]{3})\1/, 'whowho', 0, 6)
x(/...(who|[a-c]{3})\1/, 'abcwhowho', 0, 9)
x(/(who|[a-c]{3})\1/, 'cbccbc', 0, 6)
x(/(^a)\1/, 'aa', 0, 2)
n(/(^a)\1/, 'baa')
n(/(a$)\1/, 'aa')
n(/(ab\Z)\1/, 'ab')
x(/(a*\Z)\1/, 'a', 1, 1)
x(/.(a*\Z)\1/, 'ba', 1, 2)
x(/(.(abc)\2)/, 'zabcabc', 0, 7, 1)
x(/(.(..\d.)\2)/, 'z12341234', 0, 9, 1)
x(/((?i:az))\1/, 'AzAz', 0, 4)
n(/((?i:az))\1/, 'Azaz')
x(/(?<=a)b/, 'ab', 1, 2)
n(/(?<=a)b/, 'bb')
x(/(?<=a|b)b/, 'bb', 1, 2)
x(/(?<=a|bc)b/, 'bcb', 2, 3)
x(/(?<=a|bc)b/, 'ab', 1, 2)
x(/(?<=a|bc||defghij|klmnopq|r)z/, 'rz', 1, 2)
x(/(?<!a)b/, 'cb', 1, 2)
n(/(?<!a)b/, 'ab')
x(/(?<!a|bc)b/, 'bbb', 0, 1)
n(/(?<!a|bc)z/, 'bcz')
x(/(?<name1>a)/, 'a', 0, 1)
x(/(?<name-2>ab)\1/, 'abab', 0, 4)
x(/(?<name-3>.zv.)\k<name-3>/, 'azvbazvb', 0, 8)
x(/(?<=\g<ab>)|-\zEND (?<ab>XyZ)/, 'XyZ', 3, 3)
x(/(?<n>|a\g<n>)+/, '', 0, 0)
x(/(?<n>|\(\g<n>\))+$/, '()(())', 0, 6)
x(/\g<n>(?<n>.){0}/, 'X', 0, 1, 1)
x(/\g<n>(abc|df(?<n>.YZ){2,8}){0}/, 'XYZ', 0, 3)
x(/\A(?<n>(a\g<n>)|)\z/, 'aaaa', 0, 4)
x(/(?<n>|\g<m>\g<n>)\z|\zEND (?<m>a|(b)\g<m>)/, 'bbbbabba', 0, 8)
x(/(?<@:name[1240]>\w+\sx)a+\k<@:name[1240]>/, ' fg xaaaaaaaafg x', 2, 18)
x(/(z)()()(?<9>a)\4/, 'zaa', 1, 2, 4)
x(/(.)(((?<*>a)))\k<*>/, 'zaa', 0, 3)
x(/((?<name1>\d)|(?<name2>\w))(\k<name1>|\k<name2>)/, 'ff', 0, 2)
x(/(?:(?<x>)|(?<x>efg))\k<x>/, '', 0, 0)
x(/(?:(?<@x>abc)|(?<@x>efg))\k<@x>/, 'abcefgefg', 3, 9)
n(/(?:(?<@x>abc)|(?<@x>efg))\k<@x>/, 'abcefg')
x(/(?:(?<n1>.)|(?<n1>..)|(?<n1>...)|(?<n1>....)|(?<n1>.....)|(?<n1>......)|(?<n1>.......)|(?<n1>........)|(?<n1>.........)|(?<n1>..........)|(?<n1>...........)|(?<n1>............)|(?<n1>.............)|(?<n1>..............))\k<n1>$/, 'a-pyumpyum', 2, 10)
x(/(?:(?<n1>.)|(?<n1>..)|(?<n1>...)|(?<n1>....)|(?<n1>.....)|(?<n1>......)|(?<n1>.......)|(?<n1>........)|(?<n1>.........)|(?<n1>..........)|(?<n1>...........)|(?<n1>............)|(?<n1>.............)|(?<n1>..............))\k<n1>$/, 'xxxxabcdefghijklmnabcdefghijklmn', 4, 18, 14)
x(/(?<name1>)(?<name2>)(?<name3>)(?<name4>)(?<name5>)(?<name6>)(?<name7>)(?<name8>)(?<name9>)(?<name10>)(?<name11>)(?<name12>)(?<name13>)(?<name14>)(?<name15>)(?<name16>aaa)(?<name17>)$/, 'aaa', 0, 3, 16)
x(/(?<foo>a|\(\g<foo>\))/, 'a', 0, 1)
x(/(?<foo>a|\(\g<foo>\))/, '((((((a))))))', 0, 13)
x(/(?<foo>a|\(\g<foo>\))/, '((((((((a))))))))', 0, 17, 1)
x(/\g<bar>|\zEND(?<bar>.*abc$)/, 'abcxxxabc', 0, 9)
x(/\g<1>|\zEND(.a.)/, 'bac', 0, 3)
x(/\g<2>\g<1>|\zEND(.a.)(?<?>.b.)/, 'xbxyay', 3, 6, 1)
x(/\A(?:\g<pon>|\g<pan>|\zEND (?<pan>a|c\g<pon>c)(?<pon>b|d\g<pan>d))$/, 'cdcbcdc', 0, 7)
x(/\A(?<n>|a\g<m>)\z|\zEND (?<m>\g<n>)/, 'aaaa', 0, 4)
x(/(?<n>(a|b\g<n>c){3,5})/, 'baaaaca', 1, 5)
x(/(?<n>(a|b\g<n>c){3,5})/, 'baaaacaaaaa', 0, 10)
r(//, '', 0)
r(/a/, 'a', 0)
r(/a/, 'a', 0, 1)
r(/b/, 'abc', 1)
r(/b/, 'abc', 1, 2)
r(/./, 'a', 0)
r(/.*/, 'abcde fgh', 9)
r(/a*/, 'aaabbc', 6)
r(/a+/, 'aaabbc', 2)
r(/a?/, 'bac', 3)
r(/a??/, 'bac', 3)
r(/abcde/, 'abcdeavcd', 0)
r(/\w\d\s/, ' a2 aa $3 ', 2)
r(/[c-f]aa[x-z]/, '3caaycaaa', 1)
r(/(?i:fG)g/, 'fGgFggFgG', 3)
r(/a|b/, 'b', 0)
r(/ab|bc|cd/, 'bcc', 0)
r(/(ffy)\1/, 'ffyffyffy', 3)
r(/|z/, 'z', 1)
r(/^az/, 'azaz', 0)
r(/az$/, 'azaz', 2)
r(/(((.a)))\3/, 'zazaaa', 0)
r(/(ac*?z)\1/, 'aacczacczacz', 1)
r(/aaz{3,4}/, 'bbaabbaazzzaazz', 6)
r(/\000a/, "b\000a", 1)
r(/ff\xfe/, "fff\xfe", 1)
r(/...abcdefghijklmnopqrstuvwxyz/, 'zzzzzabcdefghijklmnopqrstuvwxyz', 2)
end
def test_euc(enc)
$KCODE = enc
x(//, 'あ', 0, 0)
x(//, 'あ', 0, 2)
n(//, 'あ')
x(/うう/, 'うう', 0, 4)
x(/あいう/, 'あいう', 0, 6)
x(/こここここここここここここここここここここここここここここここここここ/, 'こここここここここここここここここここここここここここここここここここ', 0, 70)
x(//, 'いあ', 2, 4)
x(/いう/, 'あいう', 2, 6)
x(/\xca\xb8/, "\xca\xb8", 0, 2)
x(/./, 'あ', 0, 2)
x(/../, 'かき', 0, 4)
x(/\w/, 'お', 0, 2)
n(/\W/, 'あ')
x(/\S/, 'そ', 0, 2)
x(/\S/, '漢', 0, 2)
x(/\b/, '気 ', 0, 0)
x(/\b/, ' ほ', 1, 1)
x(/\B/, 'せそ ', 2, 2)
x(/\B/, 'う ', 3, 3)
x(/\B/, ' い', 0, 0)
x(/[たち]/, 'ち', 0, 2)
n(/[なに]/, 'ぬ')
x(/[う-お]/, 'え', 0, 2)
n(/[^け]/, 'け')
x(/[\w]/, 'ね', 0, 2)
n(/[\d]/, 'ふ')
x(/[\D]/, 'は', 0, 2)
n(/[\s]/, 'く')
x(/[\S]/, 'へ', 0, 2)
x(/[\w\d]/, 'よ', 0, 2)
x(/[\w\d]/, ' よ', 3, 5)
#x(/[\xa4\xcf-\xa4\xd3]/, "\xa4\xd0", 0, 2) # diff spec with GNU regex.
#n(/[\xb6\xe7-\xb6\xef]/, "\xb6\xe5") # diff spec with GNU regex.
n(/\w鬼車/, ' 鬼車')
x(/\W車/, '鬼 車', 0, 5)
x(/あ.い.う/, 'ああいいう', 0, 10)
x(/.\wう\W..ぞ/, 'えうう うぞぞ', 0, 13)
x(/\s\wこここ/, ' ここここ', 0, 9)
x(/ああ.け/, 'ああけけ', 0, 8)
n(/.い/, 'いえ')
x(/.お/, 'おお', 0, 4)
x(/^あ/, 'あ', 0, 2)
x(/^む$/, 'む', 0, 2)
x(/^\w$/, 'に', 0, 2)
x(/^\wかきくけこ$/, 'zかきくけこ', 0, 11)
x(/^\w...うえお$/, 'zあいううえお', 0, 13)
x(/\w\w\s\Wおおお\d/, 'aお おおお4', 0, 12)
x(/\Aたちつ/, 'たちつ', 0, 6)
x(/むめも\Z/, 'むめも', 0, 6)
x(/かきく\z/, 'かきく', 0, 6)
x(/かきく\Z/, "かきく\n", 0, 6)
x(/\Gぽぴ/, 'ぽぴ', 0, 4)
n(/\Gえ/, 'うえお')
n(/とて\G/, 'とて')
n(/まみ\A/, 'まみ')
n(/\Aみ/, 'まみ')
x(/(?=せ)せ/, 'せ', 0, 2)
n(/(?=う)./, 'い')
x(/(?!う)か/, 'か', 0, 2)
n(/(?!と)あ/, 'と')
x(/(?i:あ)/, 'あ', 0, 2)
x(/(?i:ぶべ)/, 'ぶべ', 0, 4)
n(/(?i:い)/, 'う')
x(/(?m:よ.)/, "\n", 0, 3)
x(/(?m:.め)/, "\n", 2, 5)
x(/あ?/, '', 0, 0)
x(/変?/, '化', 0, 0)
x(/変?/, '変', 0, 2)
x(/量*/, '', 0, 0)
x(/量*/, '量', 0, 2)
x(/子*/, '子子子', 0, 6)
x(/馬*/, '鹿馬馬馬馬', 0, 0)
n(/山+/, '')
x(/河+/, '河', 0, 2)
x(/時+/, '時時時時', 0, 8)
x(/え+/, 'ええううう', 0, 4)
x(/う+/, 'おうううう', 2, 10)
x(/.?/, 'た', 0, 2)
x(/.*/, 'ぱぴぷぺ', 0, 8)
x(/.+/, 'ろ', 0, 2)
x(/.+/, "いうえか\n", 0, 8)
x(/あ|い/, 'あ', 0, 2)
x(/あ|い/, 'い', 0, 2)
x(/あい|いう/, 'あい', 0, 4)
x(/あい|いう/, 'いう', 0, 4)
x(/を(?:かき|きく)/, 'をかき', 0, 6)
x(/を(?:かき|きく)け/, 'をきくけ', 0, 8)
x(/あい|(?:あう|あを)/, 'あを', 0, 4)
x(/あ|い|う/, 'えう', 2, 4)
x(/あ|い|うえ|おかき|く|けこさ|しすせ|そ|たち|つてとなに|ぬね/, 'しすせ', 0, 6)
n(/あ|い|うえ|おかき|く|けこさ|しすせ|そ|たち|つてとなに|ぬね/, 'すせ')
x(/あ|^わ/, 'ぶあ', 2, 4)
x(/あ|^を/, 'をあ', 0, 2)
x(/鬼|\G車/, 'け車鬼', 4, 6)
x(/鬼|\G車/, '車鬼', 0, 2)
x(/鬼|\A車/, 'b車鬼', 3, 5)
x(/鬼|\A車/, '車', 0, 2)
x(/鬼|車\Z/, '車鬼', 2, 4)
x(/鬼|車\Z/, '車', 0, 2)
x(/鬼|車\Z/, "\n", 0, 2)
x(/鬼|車\z/, '車鬼', 2, 4)
x(/鬼|車\z/, '車', 0, 2)
x(/\w|\s/, 'お', 0, 2)
x(/\w|%/, '%お', 0, 1)
x(/\w|[&$]/, 'う&', 0, 2)
x(/[い-け]/, 'う', 0, 2)
x(/[い-け]|[^か-こ]/, 'あ', 0, 2)
x(/[い-け]|[^か-こ]/, 'か', 0, 2)
x(/(?:あ|[う-き])|いを/, 'うを', 0, 2)
x(/(?:あ|[う-き])|いを/, 'いを', 0, 4)
x(/あいう|(?=けけ)..ほ/, 'けけほ', 0, 6)
x(/あいう|(?!けけ)..ほ/, 'あいほ', 0, 6)
x(/(?=をあ)..あ|(?=をを)..あ/, 'ををあ', 0, 6)
x(/(?<=あ|いう)い/, 'いうい', 4, 6)
n(/(?>あ|あいえ)う/, 'あいえう')
x(/(?>あいえ|あ)う/, 'あいえう', 0, 8)
x(/あ?|い/, 'あ', 0, 2)
x(/あ?|い/, 'い', 0, 0)
x(/あ?|い/, '', 0, 0)
x(/あ*|い/, 'ああ', 0, 4)
x(/あ*|い*/, 'いあ', 0, 0)
x(/あ*|い*/, 'あい', 0, 2)
x(/[aあ]*|い*/, 'aあいいい', 0, 3)
x(/あ+|い*/, '', 0, 0)
x(/あ+|い*/, 'いいい', 0, 6)
x(/あ+|い*/, 'あいいい', 0, 2)
x(/あ+|い*/, 'aあいいい', 0, 0)
n(/あ+|い+/, '')
x(/(あ|い)?/, 'い', 0, 2)
x(/(あ|い)*/, 'いあ', 0, 4)
x(/(あ|い)+/, 'いあい', 0, 6)
x(/(あい|うあ)+/, 'うああいうえ', 0, 8)
x(/(あい|うえ)+/, 'うああいうえ', 4, 12)
x(/(あい|うあ)+/, 'ああいうあ', 2, 10)
x(/(あい|うあ)+/, 'あいをうあ', 0, 4)
x(/(あい|うあ)+/, '$$zzzzあいをうあ', 6, 10)
x(/(あ|いあい)+/, 'あいあいあ', 0, 10)
x(/(あ|いあい)+/, 'いあ', 2, 4)
x(/(あ|いあい)+/, 'いあああいあ', 2, 8)
x(/(?:あ|い)(?:あ|い)/, 'あい', 0, 4)
x(/(?:あ*|い*)(?:あ*|い*)/, 'あああいいい', 0, 6)
x(/(?:あ*|い*)(?:あ+|い+)/, 'あああいいい', 0, 12)
x(/(?:あ+|い+){2}/, 'あああいいい', 0, 12)
x(/(?:あ+|い+){1,2}/, 'あああいいい', 0, 12)
x(/(?:あ+|\Aい*)うう/, 'うう', 0, 4)
n(/(?:あ+|\Aい*)うう/, 'あいうう')
x(/(?:^あ+|い+)*う/, 'ああいいいあいう', 12, 16)
x(/(?:^あ+|い+)*う/, 'ああいいいいう', 0, 14)
x(/う{0,}/, 'うううう', 0, 8)
x(/あ|(?i)c/, 'C', 0, 1)
x(/(?i)c|あ/, 'C', 0, 1)
x(/(?i:あ)|a/, 'a', 0, 1)
n(/(?i:あ)|a/, 'A')
x(/[あいう]?/, 'あいう', 0, 2)
x(/[あいう]*/, 'あいう', 0, 6)
x(/[^あいう]*/, 'あいう', 0, 0)
n(/[^あいう]+/, 'あいう')
x(/あ??/, 'あああ', 0, 0)
x(/いあ??い/, 'いあい', 0, 6)
x(/あ*?/, 'あああ', 0, 0)
x(/いあ*?/, 'いああ', 0, 2)
x(/いあ*?い/, 'いああい', 0, 8)
x(/あ+?/, 'あああ', 0, 2)
x(/いあ+?/, 'いああ', 0, 4)
x(/いあ+?い/, 'いああい', 0, 8)
x(/(?:天?)??/, '天', 0, 0)
x(/(?:天??)?/, '天', 0, 0)
x(/(?:夢?)+?/, '夢夢夢', 0, 2)
x(/(?:風+)??/, '風風風', 0, 0)
x(/(?:雪+)??霜/, '雪雪雪霜', 0, 8)
i(/(?:あい)?{2}/, '', 0, 0) # GNU regex bug
x(/(?:鬼車)?{2}/, '鬼車鬼車鬼', 0, 8)
x(/(?:鬼車)*{0}/, '鬼車鬼車鬼', 0, 0)
x(/(?:鬼車){3,}/, '鬼車鬼車鬼車鬼車', 0, 16)
n(/(?:鬼車){3,}/, '鬼車鬼車')
x(/(?:鬼車){2,4}/, '鬼車鬼車鬼車', 0, 12)
x(/(?:鬼車){2,4}/, '鬼車鬼車鬼車鬼車鬼車', 0, 16)
x(/(?:鬼車){2,4}?/, '鬼車鬼車鬼車鬼車鬼車', 0, 8)
x(/(?:鬼車){,}/, '鬼車{,}', 0, 7)
x(/(?:かきく)+?{2}/, 'かきくかきくかきく', 0, 12)
x(/(火)/, '火', 0, 2, 1)
x(/(火水)/, '火水', 0, 4, 1)
x(/((時間))/, '時間', 0, 4)
x(/((風水))/, '風水', 0, 4, 1)
x(/((昨日))/, '昨日', 0, 4, 2)
x(/((((((((((((((((((((量子))))))))))))))))))))/, '量子', 0, 4, 20)
x(/(あい)(うえ)/, 'あいうえ', 0, 4, 1)
x(/(あい)(うえ)/, 'あいうえ', 4, 8, 2)
x(/()(あ)いう(えおか)きくけこ/, 'あいうえおかきくけこ', 6, 12, 3)
x(/(()(あ)いう(えおか)きくけこ)/, 'あいうえおかきくけこ', 6, 12, 4)
x(/.*(フォ)ン・マ(ン()シュタ)イン/, 'フォン・マンシュタイン', 10, 18, 2)
x(/(^あ)/, 'あ', 0, 2)
x(/(あ)|(あ)/, 'いあ', 2, 4, 1)
x(/(^あ)|(あ)/, 'いあ', 2, 4, 2)
x(/(あ?)/, 'あああ', 0, 2, 1)
x(/(ま*)/, 'ままま', 0, 6, 1)
x(/(と*)/, '', 0, 0, 1)
x(/(る+)/, 'るるるるるるる', 0, 14, 1)
x(/(ふ+|へ*)/, 'ふふふへへ', 0, 6, 1)
x(/(あ+|い?)/, 'いいいああ', 0, 2, 1)
x(/(あいう)?/, 'あいう', 0, 6, 1)
x(/(あいう)*/, 'あいう', 0, 6, 1)
x(/(あいう)+/, 'あいう', 0, 6, 1)
x(/(さしす|あいう)+/, 'あいう', 0, 6, 1)
x(/([なにぬ][かきく]|かきく)+/, 'かきく', 0, 6, 1)
x(/((?i:あいう))/, 'あいう', 0, 6, 1)
x(/((?m:あ.う))/, "\n", 0, 5, 1)
x(/((?=あん)あ)/, 'あんい', 0, 2, 1)
x(/あいう|(.あいえ)/, 'んあいえ', 0, 8, 1)
x(/あ*(.)/, 'ああああん', 8, 10, 1)
x(/あ*?(.)/, 'ああああん', 0, 2, 1)
x(/あ*?(ん)/, 'ああああん', 8, 10, 1)
x(/[いうえ]あ*(.)/, 'えああああん', 10, 12, 1)
x(/(\Aいい)うう/, 'いいうう', 0, 4, 1)
n(/(\Aいい)うう/, 'んいいうう')
x(/(^いい)うう/, 'いいうう', 0, 4, 1)
n(/(^いい)うう/, 'んいいうう')
x(/ろろ(るる$)/, 'ろろるる', 4, 8, 1)
n(/ろろ(るる$)/, 'ろろるるる')
x(/(無)\1/, '無無', 0, 4)
n(/(無)\1/, '無武')
x(/(空?)\1/, '空空', 0, 4)
x(/(空??)\1/, '空空', 0, 0)
x(/(空*)\1/, '空空空空空', 0, 8)
x(/(空*)\1/, '空空空空空', 0, 4, 1)
x(/あ(い*)\1/, 'あいいいい', 0, 10)
x(/あ(い*)\1/, 'あい', 0, 2)
x(/(あ*)(い*)\1\2/, 'あああいいあああいい', 0, 20)
x(/(あ*)(い*)\2/, 'あああいいいい', 0, 14)
x(/(あ*)(い*)\2/, 'あああいいいい', 6, 10, 2)
x(/(((((((ぽ*)ぺ))))))ぴ\7/, 'ぽぽぽぺぴぽぽぽ', 0, 16)
x(/(((((((ぽ*)ぺ))))))ぴ\7/, 'ぽぽぽぺぴぽぽぽ', 0, 6, 7)
x(/(は)(ひ)(ふ)\2\1\3/, 'はひふひはふ', 0, 12)
x(/([き-け])\1/, 'くく', 0, 4)
x(/(\w\d\s)\1/, 'あ5 あ5 ', 0, 8)
n(/(\w\d\s)\1/, 'あ5 あ5')
x(/(誰?|[あ-う]{3})\1/, '誰?誰?', 0, 8)
x(/...(誰?|[あ-う]{3})\1/, 'あaあ誰', 0, 13)
x(/(誰?|[あ-う]{3})\1/, 'ういうういう', 0, 12)
x(/(^こ)\1/, 'ここ', 0, 4)
n(/(^む)\1/, 'めむむ')
n(/(あ$)\1/, 'ああ')
n(/(あい\Z)\1/, 'あい')
x(/(あ*\Z)\1/, 'あ', 2, 2)
x(/.(あ*\Z)\1/, 'いあ', 2, 4)
x(/(.(やいゆ)\2)/, 'zやいゆやいゆ', 0, 13, 1)
x(/(.(..\d.)\2)/, 'あ12341234', 0, 10, 1)
x(/((?i:あvず))\1/, 'あvずあvず', 0, 10)
x(/(?<愚か>変|\(\g<愚か>\))/, '((((((変))))))', 0, 14)
x(/\A(?:\g<阿-1>|\g<云-2>|\z終了 (?<阿-1>観|自\g<云-2>自)(?<云-2>在|菩薩\g<阿-1>菩薩))$/, '菩薩自菩薩自在自菩薩自菩薩', 0, 26)
x(/[[ひふ]]/, 'ふ', 0, 2)
x(/[[いおう]か]/, 'か', 0, 2)
n(/[[^あ]]/, 'あ')
n(/[^[あ]]/, 'あ')
x(/[^[^あ]]/, 'あ', 0, 2)
x(/[[かきく]&&きく]/, 'く', 0, 2)
n(/[[かきく]&&きく]/, 'か')
n(/[[かきく]&&きく]/, 'け')
x(/[あ-ん&&い-を&&う-ゑ]/, 'ゑ', 0, 2)
n(/[^あ-ん&&い-を&&う-ゑ]/, 'ゑ')
x(/[[^あ&&あ]&&あ-ん]/, 'い', 0, 2)
n(/[[^あ&&あ]&&あ-ん]/, 'あ')
x(/[[^あ-ん&&いうえお]&&[^う-か]]/, 'き', 0, 2)
n(/[[^あ-ん&&いうえお]&&[^う-か]]/, 'い')
x(/[^[^あいう]&&[^うえお]]/, 'う', 0, 2)
x(/[^[^あいう]&&[^うえお]]/, 'え', 0, 2)
n(/[^[^あいう]&&[^うえお]]/, 'か')
x(/[あ-&&-あ]/, '-', 0, 1)
x(/[^[^a-zあいう]&&[^bcdefgうえお]q-w]/, 'え', 0, 2)
x(/[^[^a-zあいう]&&[^bcdefgうえお]g-w]/, 'f', 0, 1)
x(/[^[^a-zあいう]&&[^bcdefgうえお]g-w]/, 'g', 0, 1)
n(/[^[^a-zあいう]&&[^bcdefgうえお]g-w]/, '2')
r(//, 'あ', 0)
r(//, 'あ', 0, 2)
r(//, 'あいう', 2)
r(//, 'あいう', 2, 4)
r(/./, 'あ', 0)
r(/.*/, 'あいうえお かきく', 17)
r(/.*えお/, 'あいうえお かきく', 6)
r(/あ*/, 'あああいいう', 12)
r(/あ+/, 'あああいいう', 4)
r(/あ?/, 'いあう', 6)
r(/全??/, '負全変', 6)
r(/a辺c漢e/, 'a辺c漢eavcd', 0)
r(/\w\d\s/, ' あ2 うう $3 ', 2)
r(/[う-お]ああ[と-ん]/, '3うああなうあああ', 1)
r(/あ|い/, 'い', 0)
r(/あい|いう|うえ/, 'いうう', 0)
r(/(ととち)\1/, 'ととちととちととち', 6)
r(/|え/, 'え', 2)
r(/^あず/, 'あずあず', 0)
r(/あず$/, 'あずあず', 4)
r(/(((.あ)))\3/, 'zあzあああ', 0)
r(/(あう*?ん)\1/, 'ああううんあううんあうん', 2)
r(/ああん{3,4}/, 'ててああいいああんんんああんああん', 12)
r(/\000あ/, "\000", 2)
r(/とと\xfe\xfe/, "ととと\xfe\xfe", 2)
r(/...あいうえおかきくけこさしすせそ/, 'zzzzzあいうえおかきくけこさしすせそ', 2)
end
test_sb('ASCII')
test_sb('EUC')
test_sb('SJIS')
test_sb('UTF8')
test_euc('EUC')
# UTF-8 (by UENO Katsuhiro)
$KCODE = 'UTF-8'
s = "\xe3\x81\x82\xe3\x81\x81\xf0\x90\x80\x85\xe3\x81\x8a\xe3\x81\x85"
x(/[\xc2\x80-\xed\x9f\xbf]+/u, s, 0, 6)
s = "\xf0\x90\x80\x85\xe3\x81\x82"
x(/[\xc2\x80-\xed\x9f\xbf]/u, s, 4, 7)
s = "\xed\x9f\xbf"
n(/[\xc2\x80-\xed\x9f\xbe]/u, s)
s = "\xed\x9f\xbf"
n(/[\xc2\x80-\xed\x9f\xbe]/u, s)
s = "\xed\x9f\xbf"
n(/[\xc2\x80-\xed\x9f\xbe]/u, s)
s = "\xed\x9f\xbf"
n(/[\xc3\xad\xed\x9f\xbe]/u, s)
s = "\xed\x9f\xbf"
n(/[\xc4\x80-\xed\x9f\xbe]/u, s)
s = "\xed\x9f\xbf\xf0\x90\x80\x85\xed\x9f\xbf"
x(/[^\xc2\x80-\xed\x9f\xbe]/u, s, 0, 3)
s = "\xed\x9f\xbf"
x(/[^\xc3\xad\xed\x9f\xbe]/u, s, 0, 3)
s = "\xed\x9f\xbf\xf0\x90\x80\x85\xed\x9f\xbf"
x(/[^\xc4\x80-\xed\x9f\xbe]/u, s, 0, 3)
s = "\xc3\xbe\xc3\xbf"
n(/[\xfe\xff\xc3\x80]/u, s)
# Japanese long text.
$KCODE = 'EUC'
s = <<EOS
調
(1998)
EOS
x(/\((.+)\)/, s, 305, 309, 1)
x(/司馬遼太郎/, s, 229, 239)
x(/。$/, s, 202, 204)
x(/(^兵藤..八)/, s, 269, 279, 1)
x(/^$/, s, 268, 268)
s = <<EOS
bookはことばであるがhonは音をならべただけで
EOS
n(/\((.+)\)/, s)
x(/「(.*)」/, s, 254, 264, 1)
x(/。$/, s, 34, 36)
x(/(book)/, s, 120, 124, 1)
x(/^$/, s, 360, 360)
s = <<EOS
(1986)
EOS
x(/\((.+)\)/, s, 290, 296)
x(/「(.*)(.+)」/, s, 257, 275, 2)
x(/^ /, s, 179, 184)
x(/(釈迦)/, s, 0, 4, 1)
x(/\w、/, s, 30, 34)
s = <<EOS
(1961)
EOS
x(/\((\d+)\)/, s, 496, 502)
x(/(「.+雑誌.*」)/, s, 449, 479, 1)
x(/第(.)号/, s, 96, 98, 1)
x(/。$/, s, 120, 122)
x(/近代説話/, s, 209, 217)
s = <<EOS
(2000)
EOS
x(/\((\d+)\)/, s, 506, 512)
x(/(「.*」)/, s, 493, 505, 1)
x(/行徳塩/, s, 292, 298)
s = <<EOS
(1998)
EOS
x(/\((\d+)\)/, s, 185, 191)
x(/(「.*」)/, s, 108, 138, 1)
x(/^それは/, s, 90, 96)
x(/^.*$/, s, 0, 58)
s = <<EOS
(1960)
EOS
x(/(稗は米の三分の一くらいに)/, s, 357, 381, 1)
x(/あります。$/, s, 140, 150)
x(/ 人間(.*)。/, s, 157, 423, 1)
x(/ヘズリ飯[をはで]/, s, 165, 175)
s = <<EOS
(1859)
EOS
x(/\((.+)\)/, s, 68, 74)
x(/「(.*)」/, s, 59, 65, 1)
x(/^(吉田松蔭)/, s, 48, 56, 1)
# result
printf("\n*** Result SUCCESS: %d, FAIL: %d ***\n", $rok, $rfail)
# END.

View file

@ -9,11 +9,14 @@
#include "oniguruma.h"
#endif
static int nsucc = 0;
static int nfail = 0;
static int nsucc = 0;
static int nfail = 0;
static int nerror = 0;
static FILE* err_file;
#ifndef POSIX_TEST
static RegRegion* region;
static OnigRegion* region;
#endif
static void xx(char* pattern, char* str, int from, int to, int mem, int not)
@ -28,15 +31,17 @@ static void xx(char* pattern, char* str, int from, int to, int mem, int not)
r = regcomp(&reg, pattern, REG_EXTENDED | REG_NEWLINE);
if (r) {
regerror(r, &reg, buf, sizeof(buf));
fprintf(stderr, "ERROR: %s\n", buf);
exit(-1);
fprintf(err_file, "ERROR: %s\n", buf);
nerror++;
return ;
}
r = regexec(&reg, str, reg.re_nsub + 1, pmatch, 0);
if (r != 0 && r != REG_NOMATCH) {
regerror(r, &reg, buf, sizeof(buf));
fprintf(stderr, "ERROR: %s\n", buf);
exit(-1);
fprintf(err_file, "ERROR: %s\n", buf);
nerror++;
return ;
}
if (r == REG_NOMATCH) {
@ -70,28 +75,30 @@ static void xx(char* pattern, char* str, int from, int to, int mem, int not)
#else
regex_t* reg;
RegErrorInfo einfo;
OnigErrorInfo einfo;
r = regex_new(&reg, (UChar* )pattern, (UChar* )(pattern + strlen(pattern)),
REG_OPTION_DEFAULT, REGCODE_SJIS, REG_SYNTAX_DEFAULT, &einfo);
r = onig_new(&reg, (UChar* )pattern, (UChar* )(pattern + strlen(pattern)),
ONIG_OPTION_DEFAULT, ONIG_ENCODING_EUC_JP, ONIG_SYNTAX_DEFAULT, &einfo);
if (r) {
char s[REG_MAX_ERROR_MESSAGE_LEN];
regex_error_code_to_str(s, r, &einfo);
fprintf(stderr, "ERROR: %s\n", s);
exit(-1);
char s[ONIG_MAX_ERROR_MESSAGE_LEN];
onig_error_code_to_str(s, r, &einfo);
fprintf(err_file, "ERROR: %s\n", s);
nerror++;
return ;
}
r = regex_search(reg, (UChar* )str, (UChar* )(str + strlen(str)),
(UChar* )str, (UChar* )(str + strlen(str)),
region, REG_OPTION_NONE);
if (r < REG_MISMATCH) {
char s[REG_MAX_ERROR_MESSAGE_LEN];
regex_error_code_to_str(s, r);
fprintf(stderr, "ERROR: %s\n", s);
exit(-1);
r = onig_search(reg, (UChar* )str, (UChar* )(str + strlen(str)),
(UChar* )str, (UChar* )(str + strlen(str)),
region, ONIG_OPTION_NONE);
if (r < ONIG_MISMATCH) {
char s[ONIG_MAX_ERROR_MESSAGE_LEN];
onig_error_code_to_str(s, r);
fprintf(err_file, "ERROR: %s\n", s);
nerror++;
return ;
}
if (r == REG_MISMATCH) {
if (r == ONIG_MISMATCH) {
if (not) {
fprintf(stdout, "OK(N): /%s/ '%s'\n", pattern, str);
nsucc++;
@ -118,7 +125,7 @@ static void xx(char* pattern, char* str, int from, int to, int mem, int not)
}
}
}
regex_free(reg);
onig_free(reg);
#endif
}
@ -139,10 +146,12 @@ static void n(char* pattern, char* str)
extern int main(int argc, char* argv[])
{
err_file = stdout;
#ifdef POSIX_TEST
reg_set_encoding(REG_ENCODING_SJIS);
reg_set_encoding(REG_POSIX_ENCODING_EUC_JP);
#else
region = regex_region_new();
region = onig_region_new();
#endif
x2("", "", 0, 0);
@ -168,6 +177,7 @@ extern int main(int argc, char* argv[])
x2("\\x1f", "\x1f", 0, 1);
x2("\\xFE", "\xfe", 0, 1);
x2("a(?#....\\\\JJJJ)b", "ab", 0, 2);
x2("(?x) G (o O(?-x)oO) g L", "GoOoOgLe", 0, 7);
x2(".", "a", 0, 1);
n(".", "");
x2("..", "ab", 0, 2);
@ -189,10 +199,12 @@ extern int main(int argc, char* argv[])
x2("[^a]", "\n", 0, 1);
x2("[]]", "]", 0, 1);
n("[^]]", "]");
x2("[\\^]+", "0^^1", 1, 3);
x2("[b-]", "b", 0, 1);
x2("[b-]", "-", 0, 1);
x2("[\\w]", "z", 0, 1);
n("[\\w]", " ");
x2("[\\W]", "b$", 1, 2);
x2("[\\d]", "5", 0, 1);
n("[\\d]", "e");
x2("[\\D]", "t", 0, 1);
@ -261,6 +273,8 @@ extern int main(int argc, char* argv[])
n("az\\A", "az");
n("a\\Az", "az");
x2("\\^\\$", "^$", 0, 2);
x2("^x?y", "xy", 0, 2);
x2("^(x?y)", "xy", 0, 2);
x2("\\w", "_", 0, 1);
n("\\W", "_");
x2("(?=z)z", "z", 0, 1);
@ -275,12 +289,10 @@ extern int main(int argc, char* argv[])
x2("(?i:[f-m])", "H", 0, 1);
x2("(?i:[f-m])", "h", 0, 1);
n("(?i:[f-m])", "e");
n("(?i:[b-C])", "A");
x2("(?i:[a-C])", "B", 0, 1);
n("(?i:[c-X])", "[");
n("(?i:[!-k])", "Z");
x2("(?i:[A-c])", "D", 0, 1);
x2("(?i:[!-k])", "Z", 0, 1);
x2("(?i:[!-k])", "7", 0, 1);
n("(?i:[T-}])", "b");
x2("(?i:[T-}])", "b", 0, 1);
x2("(?i:[T-}])", "{", 0, 1);
x2("(?i:\\?a)", "?A", 0, 2);
x2("(?i:\\*A)", "*a", 0, 2);
@ -288,6 +300,8 @@ extern int main(int argc, char* argv[])
x2("(?m:.)", "\n", 0, 1);
x2("(?m:a.)", "a\n", 0, 2);
x2("(?m:.b)", "a\nb", 1, 3);
n("(?i)(?-i)a", "A");
n("(?i)(?-i:a)", "A");
x2("a?", "", 0, 0);
x2("a?", "b", 0, 0);
x2("a?", "a", 0, 1);
@ -366,6 +380,10 @@ extern int main(int argc, char* argv[])
x2("(?:a+|b+){2}", "aaabbb", 0, 6);
x2("h{0,}", "hhhh", 0, 4);
x2("(?:a+|b+){1,2}", "aaabbb", 0, 6);
n("ax{2}*a", "0axxxa1");
n("a.{0,2}a", "0aXXXa0");
n("a.{0,2}?a", "0aXXXa0");
n("a.{0,2}?a", "0aXXXXa0");
x2("(?:a+|\\Ab*)cc", "cc", 0, 2);
n("(?:a+|\\Ab*)cc", "abcc");
x2("(?:^a+|b+)*c", "aabbbabc", 6, 8);
@ -379,19 +397,19 @@ extern int main(int argc, char* argv[])
x2("[abc]*", "abc", 0, 3);
x2("[^abc]*", "abc", 0, 0);
n("[^abc]+", "abc");
x2("a?\?", "aaa", 0, 0);
x2("ba?\?b", "bab", 0, 3);
x2("a??", "aaa", 0, 0);
x2("ba??b", "bab", 0, 3);
x2("a*?", "aaa", 0, 0);
x2("ba*?", "baa", 0, 1);
x2("ba*?b", "baab", 0, 4);
x2("a+?", "aaa", 0, 1);
x2("ba+?", "baa", 0, 2);
x2("ba+?b", "baab", 0, 4);
x2("(?:a?)?\?", "a", 0, 0);
x2("(?:a?\?)?", "a", 0, 0);
x2("(?:a?)??", "a", 0, 0);
x2("(?:a??)?", "a", 0, 0);
x2("(?:a?)+?", "aaa", 0, 1);
x2("(?:a+)?\?", "aaa", 0, 0);
x2("(?:a+)?\?b", "aaab", 0, 4);
x2("(?:a+)??", "aaa", 0, 0);
x2("(?:a+)??b", "aaab", 0, 4);
x2("(?:ab)?{2}", "", 0, 0);
x2("(?:ab)?{2}", "ababa", 0, 4);
x2("(?:ab)*{0}", "ababa", 0, 0);
@ -457,7 +475,7 @@ extern int main(int argc, char* argv[])
x2("(a)\\1", "aa", 0, 2);
n("(a)\\1", "ab");
x2("(a?)\\1", "aa", 0, 2);
x2("(a?\?)\\1", "aa", 0, 0);
x2("(a??)\\1", "aa", 0, 0);
x2("(a*)\\1", "aaaaa", 0, 4);
x3("(a*)\\1", "aaaaa", 0, 2, 1);
x2("a(b*)\\1", "abbbb", 0, 5);
@ -489,13 +507,14 @@ extern int main(int argc, char* argv[])
x2("(?<=a|bc)b", "bcb", 2, 3);
x2("(?<=a|bc)b", "ab", 1, 2);
x2("(?<=a|bc||defghij|klmnopq|r)z", "rz", 1, 2);
x2("(a)\\g<1>", "aa", 0, 2);
x2("(?<!a)b", "cb", 1, 2);
n("(?<!a)b", "ab");
x2("(?<!a|bc)b", "bbb", 0, 1);
n("(?<!a|bc)z", "bcz");
x2("(?<name1>a)", "a", 0, 1);
x2("(?<name-2>ab)\\1", "abab", 0, 4);
x2("(?<name-3>.zv.)\\k<name-3>", "azvbazvb", 0, 8);
x2("(?<name_2>ab)\\g<name_2>", "abab", 0, 4);
x2("(?<name_3>.zv.)\\k<name_3>", "azvbazvb", 0, 8);
x2("(?<=\\g<ab>)|-\\zEND (?<ab>XyZ)", "XyZ", 3, 3);
x2("(?<n>|a\\g<n>)+", "", 0, 0);
x2("(?<n>|\\(\\g<n>\\))+$", "()(())", 0, 6);
@ -503,13 +522,13 @@ extern int main(int argc, char* argv[])
x2("\\g<n>(abc|df(?<n>.YZ){2,8}){0}", "XYZ", 0, 3);
x2("\\A(?<n>(a\\g<n>)|)\\z", "aaaa", 0, 4);
x2("(?<n>|\\g<m>\\g<n>)\\z|\\zEND (?<m>a|(b)\\g<m>)", "bbbbabba", 0, 8);
x2("(?<@:name[1240]>\\w+\\sx)a+\\k<@:name[1240]>", " fg xaaaaaaaafg x", 2, 18);
x3("(z)()()(?<9>a)\\4", "zaa", 1, 2, 4);
x2("(.)(((?<*>a)))\\k<*>", "zaa", 0, 3);
x2("(?<name1240>\\w+\\sx)a+\\k<name1240>", " fg xaaaaaaaafg x", 2, 18);
x3("(z)()()(?<_9>a)\\g<_9>", "zaa", 2, 3, 1);
x2("(.)(((?<_>a)))\\k<_>", "zaa", 0, 3);
x2("((?<name1>\\d)|(?<name2>\\w))(\\k<name1>|\\k<name2>)", "ff", 0, 2);
x2("(?:(?<x>)|(?<x>efg))\\k<x>", "", 0, 0);
x2("(?:(?<@x>abc)|(?<@x>efg))\\k<@x>", "abcefgefg", 3, 9);
n("(?:(?<@x>abc)|(?<@x>efg))\\k<@x>", "abcefg");
x2("(?:(?<x>abc)|(?<x>efg))\\k<x>", "abcefgefg", 3, 9);
n("(?:(?<x>abc)|(?<x>efg))\\k<x>", "abcefg");
x2("(?:(?<n1>.)|(?<n1>..)|(?<n1>...)|(?<n1>....)|(?<n1>.....)|(?<n1>......)|(?<n1>.......)|(?<n1>........)|(?<n1>.........)|(?<n1>..........)|(?<n1>...........)|(?<n1>............)|(?<n1>.............)|(?<n1>..............))\\k<n1>$", "a-pyumpyum", 2, 10);
x3("(?:(?<n1>.)|(?<n1>..)|(?<n1>...)|(?<n1>....)|(?<n1>.....)|(?<n1>......)|(?<n1>.......)|(?<n1>........)|(?<n1>.........)|(?<n1>..........)|(?<n1>...........)|(?<n1>............)|(?<n1>.............)|(?<n1>..............))\\k<n1>$", "xxxxabcdefghijklmnabcdefghijklmn", 4, 18, 14);
x3("(?<name1>)(?<name2>)(?<name3>)(?<name4>)(?<name5>)(?<name6>)(?<name7>)(?<name8>)(?<name9>)(?<name10>)(?<name11>)(?<name12>)(?<name13>)(?<name14>)(?<name15>)(?<name16>aaa)(?<name17>)$", "aaa", 0, 3, 16);
@ -518,286 +537,296 @@ extern int main(int argc, char* argv[])
x3("(?<foo>a|\\(\\g<foo>\\))", "((((((((a))))))))", 0, 17, 1);
x2("\\g<bar>|\\zEND(?<bar>.*abc$)", "abcxxxabc", 0, 9);
x2("\\g<1>|\\zEND(.a.)", "bac", 0, 3);
x3("\\g<2>\\g<1>|\\zEND(.a.)(?<?>.b.)", "xbxyay", 3, 6, 1);
x3("\\g<_A>\\g<_A>|\\zEND(.a.)(?<_A>.b.)", "xbxyby", 3, 6, 1);
x2("\\A(?:\\g<pon>|\\g<pan>|\\zEND (?<pan>a|c\\g<pon>c)(?<pon>b|d\\g<pan>d))$", "cdcbcdc", 0, 7);
x2("\\A(?<n>|a\\g<m>)\\z|\\zEND (?<m>\\g<n>)", "aaaa", 0, 4);
x2("(?<n>(a|b\\g<n>c){3,5})", "baaaaca", 1, 5);
x2("(?<n>(a|b\\g<n>c){3,5})", "baaaacaaaaa", 0, 10);
x2("", "", 0, 0);
x2("", "", 0, 2);
n("", "");
x2("うう", "うう", 0, 4);
x2("あいう", "あいう", 0, 6);
x2("こここここここここここここここここここここここここここここここここここ", "こここここここここここここここここここここここここここここここここここ", 0, 70);
x2("", "いあ", 2, 4);
x2("いう", "あいう", 2, 6);
x2("()*\\1", "", 0, 0);
x2("(?:()|())*\\1\\2", "", 0, 0);
x3("(?:\\1a|())*", "a", 0, 0, 1);
x2("x((.)*)*x", "0x1x2x3", 1, 6);
x2("x((.)*)*x(?i:\\1)\\Z", "0x1x2x1X2", 1, 9);
x2("(?:()|()|()|()|()|())*\\2\\5", "", 0, 0);
x2("(?:()|()|()|(x)|()|())*\\2b\\5", "b", 0, 1);
x2("", "", 0, 0);
x2("", "", 0, 2);
n("", "");
x2("うう", "うう", 0, 4);
x2("あいう", "あいう", 0, 6);
x2("こここここここここここここここここここここここここここここここここここ", "こここここここここここここここここここここここここここここここここここ", 0, 70);
x2("", "いあ", 2, 4);
x2("いう", "あいう", 2, 6);
x2("\\xca\\xb8", "\xca\xb8", 0, 2);
x2(".", "", 0, 2);
x2("..", "かき", 0, 4);
x2("\\w", "", 0, 2);
n("\\W", "");
x2("\\S", "", 0, 2);
x2("\\S", "", 0, 2);
x2("\\b", "", 0, 0);
x2("\\b", "", 1, 1);
x2("\\B", "せそ ", 2, 2);
x2("\\B", "", 3, 3);
x2("\\B", "", 0, 0);
x2("[たち]", "", 0, 2);
n("[なに]", "");
x2("[う-お]", "", 0, 2);
n("[^け]", "");
x2("[\\w]", "", 0, 2);
n("[\\d]", "");
x2("[\\D]", "", 0, 2);
n("[\\s]", "");
x2("[\\S]", "", 0, 2);
x2("[\\w\\d]", "", 0, 2);
x2("[\\w\\d]", "", 3, 5);
n("\\w鬼車", " 鬼車");
x2("\\W車", "鬼 車", 0, 5);
x2("あ.い.う", "ああいいう", 0, 10);
x2(".\\wう\\W..ぞ", "えうう うぞぞ", 0, 13);
x2("\\s\\wこここ", " ここここ", 0, 9);
x2("ああ.け", "ああけけ", 0, 8);
n(".い", "いえ");
x2(".お", "おお", 0, 4);
x2("^あ", "", 0, 2);
x2("^む$", "", 0, 2);
x2("^\\w$", "", 0, 2);
x2("^\\wかきくけこ$", "zかきくけこ", 0, 11);
x2("^\\w...うえお$", "zあいううえお", 0, 13);
x2("\\w\\w\\s\\Wおおお\\d", "aお おおお4", 0, 12);
x2("\\Aたちつ", "たちつ", 0, 6);
x2("むめも\\Z", "むめも", 0, 6);
x2("かきく\\z", "かきく", 0, 6);
x2("かきく\\Z", "かきく\n", 0, 6);
x2("\\Gぽぴ", "ぽぴ", 0, 4);
n("\\Gえ", "うえお");
n("とて\\G", "とて");
n("まみ\\A", "まみ");
n("\\Aみ", "まみ");
x2("(?=せ)せ", "", 0, 2);
n("(?=う).", "");
x2("(?!う)か", "", 0, 2);
n("(?!と)あ", "");
x2("(?i:あ)", "", 0, 2);
x2("(?i:ぶべ)", "ぶべ", 0, 4);
n("(?i:い)", "");
x2("(?m:よ.)", "\n", 0, 3);
x2("(?m:.め)", "\n", 2, 5);
x2("あ?", "", 0, 0);
x2("変?", "", 0, 0);
x2("変?", "", 0, 2);
x2("量*", "", 0, 0);
x2("量*", "", 0, 2);
x2("子*", "子子子", 0, 6);
x2("馬*", "鹿馬馬馬馬", 0, 0);
n("山+", "");
x2("河+", "", 0, 2);
x2("時+", "時時時時", 0, 8);
x2("え+", "ええううう", 0, 4);
x2("う+", "おうううう", 2, 10);
x2(".?", "", 0, 2);
x2(".*", "ぱぴぷぺ", 0, 8);
x2(".+", "", 0, 2);
x2(".+", "いうえか\n", 0, 8);
x2("あ|い", "", 0, 2);
x2("あ|い", "", 0, 2);
x2("あい|いう", "あい", 0, 4);
x2("あい|いう", "いう", 0, 4);
x2("を(?:かき|きく)", "をかき", 0, 6);
x2("を(?:かき|きく)け", "をきくけ", 0, 8);
x2("あい|(?:あう|あを)", "あを", 0, 4);
x2("あ|い|う", "えう", 2, 4);
x2("あ|い|うえ|おかき|く|けこさ|しすせ|そ|たち|つてとなに|ぬね", "しすせ", 0, 6);
n("あ|い|うえ|おかき|く|けこさ|しすせ|そ|たち|つてとなに|ぬね", "すせ");
x2("あ|^わ", "ぶあ", 2, 4);
x2("あ|^を", "をあ", 0, 2);
x2("鬼|\\G車", "け車鬼", 4, 6);
x2("鬼|\\G車", "車鬼", 0, 2);
x2("鬼|\\A車", "b車鬼", 3, 5);
x2("鬼|\\A車", "", 0, 2);
x2("鬼|車\\Z", "車鬼", 2, 4);
x2("鬼|車\\Z", "", 0, 2);
x2("鬼|車\\Z", "\n", 0, 2);
x2("鬼|車\\z", "車鬼", 2, 4);
x2("鬼|車\\z", "", 0, 2);
x2("\\w|\\s", "", 0, 2);
x2("\\w|%", "%お", 0, 1);
x2("\\w|[&$]", "う&", 0, 2);
x2("[い-け]", "", 0, 2);
x2("[い-け]|[^か-こ]", "", 0, 2);
x2("[い-け]|[^か-こ]", "", 0, 2);
x2("(?:あ|[う-き])|いを", "うを", 0, 2);
x2("(?:あ|[う-き])|いを", "いを", 0, 4);
x2("あいう|(?=けけ)..ほ", "けけほ", 0, 6);
x2("あいう|(?!けけ)..ほ", "あいほ", 0, 6);
x2("(?=をあ)..あ|(?=をを)..あ", "ををあ", 0, 6);
x2("(?<=あ|いう)い", "いうい", 4, 6);
n("(?>あ|あいえ)う", "あいえう");
x2("(?>あいえ|あ)う", "あいえう", 0, 8);
x2("あ?|い", "", 0, 2);
x2("あ?|い", "", 0, 0);
x2("あ?|い", "", 0, 0);
x2("あ*|い", "ああ", 0, 4);
x2("あ*|い*", "いあ", 0, 0);
x2("あ*|い*", "あい", 0, 2);
x2("[aあ]*|い*", "aあいいい", 0, 3);
x2("あ+|い*", "", 0, 0);
x2("あ+|い*", "いいい", 0, 6);
x2("あ+|い*", "あいいい", 0, 2);
x2("あ+|い*", "aあいいい", 0, 0);
n("あ+|い+", "");
x2("(あ|い)?", "", 0, 2);
x2("(あ|い)*", "いあ", 0, 4);
x2("(あ|い)+", "いあい", 0, 6);
x2("(あい|うあ)+", "うああいうえ", 0, 8);
x2("(あい|うえ)+", "うああいうえ", 4, 12);
x2("(あい|うあ)+", "ああいうあ", 2, 10);
x2("(あい|うあ)+", "あいをうあ", 0, 4);
x2("(あい|うあ)+", "$$zzzzあいをうあ", 6, 10);
x2("(あ|いあい)+", "あいあいあ", 0, 10);
x2("(あ|いあい)+", "いあ", 2, 4);
x2("(あ|いあい)+", "いあああいあ", 2, 8);
x2("(?:あ|い)(?:あ|い)", "あい", 0, 4);
x2("(?:あ*|い*)(?:あ*|い*)", "あああいいい", 0, 6);
x2("(?:あ*|い*)(?:あ+|い+)", "あああいいい", 0, 12);
x2("(?:あ+|い+){2}", "あああいいい", 0, 12);
x2("(?:あ+|い+){1,2}", "あああいいい", 0, 12);
x2("(?:あ+|\\Aい*)うう", "うう", 0, 4);
n("(?:あ+|\\Aい*)うう", "あいうう");
x2("(?:^あ+|い+)*う", "ああいいいあいう", 12, 16);
x2("(?:^あ+|い+)*う", "ああいいいいう", 0, 14);
x2("う{0,}", "うううう", 0, 8);
x2("あ|(?i)c", "C", 0, 1);
x2("(?i)c|あ", "C", 0, 1);
x2("(?i:あ)|a", "a", 0, 1);
n("(?i:あ)|a", "A");
x2("[あいう]?", "あいう", 0, 2);
x2("[あいう]*", "あいう", 0, 6);
x2("[^あいう]*", "あいう", 0, 0);
n("[^あいう]+", "あいう");
x2("あ?\?", "あああ", 0, 0);
x2("いあ?\?い", "いあい", 0, 6);
x2("あ*?", "あああ", 0, 0);
x2("いあ*?", "いああ", 0, 2);
x2("いあ*?い", "いああい", 0, 8);
x2("あ+?", "あああ", 0, 2);
x2("いあ+?", "いああ", 0, 4);
x2("いあ+?い", "いああい", 0, 8);
x2("(?:天?)?\?", "", 0, 0);
x2("(?:天?\?)?", "", 0, 0);
x2("(?:夢?)+?", "夢夢夢", 0, 2);
x2("(?:風+)?\?", "風風風", 0, 0);
x2("(?:雪+)?\?霜", "雪雪雪霜", 0, 8);
x2("(?:あい)?{2}", "", 0, 0);
x2("(?:鬼車)?{2}", "鬼車鬼車鬼", 0, 8);
x2("(?:鬼車)*{0}", "鬼車鬼車鬼", 0, 0);
x2("(?:鬼車){3,}", "鬼車鬼車鬼車鬼車", 0, 16);
n("(?:鬼車){3,}", "鬼車鬼車");
x2("(?:鬼車){2,4}", "鬼車鬼車鬼車", 0, 12);
x2("(?:鬼車){2,4}", "鬼車鬼車鬼車鬼車鬼車", 0, 16);
x2("(?:鬼車){2,4}?", "鬼車鬼車鬼車鬼車鬼車", 0, 8);
x2("(?:鬼車){,}", "鬼車{,}", 0, 7);
x2("(?:かきく)+?{2}", "かきくかきくかきく", 0, 12);
x3("(火)", "", 0, 2, 1);
x3("(火水)", "火水", 0, 4, 1);
x2("((時間))", "時間", 0, 4);
x3("((風水))", "風水", 0, 4, 1);
x3("((昨日))", "昨日", 0, 4, 2);
x3("((((((((((((((((((((量子))))))))))))))))))))", "量子", 0, 4, 20);
x3("(あい)(うえ)", "あいうえ", 0, 4, 1);
x3("(あい)(うえ)", "あいうえ", 4, 8, 2);
x3("()(あ)いう(えおか)きくけこ", "あいうえおかきくけこ", 6, 12, 3);
x3("(()(あ)いう(えおか)きくけこ)", "あいうえおかきくけこ", 6, 12, 4);
x3(".*(フォ)ン・マ(ン()シュタ)イン", "フォン・マンシュタイン", 10, 18, 2);
x2("(^あ)", "", 0, 2);
x3("(あ)|(あ)", "いあ", 2, 4, 1);
x3("(^あ)|(あ)", "いあ", 2, 4, 2);
x3("(あ?)", "あああ", 0, 2, 1);
x3("(ま*)", "ままま", 0, 6, 1);
x3("(と*)", "", 0, 0, 1);
x3("(る+)", "るるるるるるる", 0, 14, 1);
x3("(ふ+|へ*)", "ふふふへへ", 0, 6, 1);
x3("(あ+|い?)", "いいいああ", 0, 2, 1);
x3("(あいう)?", "あいう", 0, 6, 1);
x3("(あいう)*", "あいう", 0, 6, 1);
x3("(あいう)+", "あいう", 0, 6, 1);
x3("(さしす|あいう)+", "あいう", 0, 6, 1);
x3("([なにぬ][かきく]|かきく)+", "かきく", 0, 6, 1);
x3("((?i:あいう))", "あいう", 0, 6, 1);
x3("((?m:あ.う))", "\n", 0, 5, 1);
x3("((?=あん)あ)", "あんい", 0, 2, 1);
x3("あいう|(.あいえ)", "んあいえ", 0, 8, 1);
x3("あ*(.)", "ああああん", 8, 10, 1);
x3("あ*?(.)", "ああああん", 0, 2, 1);
x3("あ*?(ん)", "ああああん", 8, 10, 1);
x3("[いうえ]あ*(.)", "えああああん", 10, 12, 1);
x3("(\\Aいい)うう", "いいうう", 0, 4, 1);
n("(\\Aいい)うう", "んいいうう");
x3("(^いい)うう", "いいうう", 0, 4, 1);
n("(^いい)うう", "んいいうう");
x3("ろろ(るる$)", "ろろるる", 4, 8, 1);
n("ろろ(るる$)", "ろろるるる");
x2("(無)\\1", "無無", 0, 4);
n("(無)\\1", "無武");
x2("(空?)\\1", "空空", 0, 4);
x2("(空?\?)\\1", "空空", 0, 0);
x2("(空*)\\1", "空空空空空", 0, 8);
x3("(空*)\\1", "空空空空空", 0, 4, 1);
x2("あ(い*)\\1", "あいいいい", 0, 10);
x2("あ(い*)\\1", "あい", 0, 2);
x2("(あ*)(い*)\\1\\2", "あああいいあああいい", 0, 20);
x2("(あ*)(い*)\\2", "あああいいいい", 0, 14);
x3("(あ*)(い*)\\2", "あああいいいい", 6, 10, 2);
x2("(((((((ぽ*)ぺ))))))ぴ\\7", "ぽぽぽぺぴぽぽぽ", 0, 16);
x3("(((((((ぽ*)ぺ))))))ぴ\\7", "ぽぽぽぺぴぽぽぽ", 0, 6, 7);
x2("(は)(ひ)(ふ)\\2\\1\\3", "はひふひはふ", 0, 12);
x2("([き-け])\\1", "くく", 0, 4);
x2("(\\w\\d\\s)\\1", "あ5 あ5 ", 0, 8);
n("(\\w\\d\\s)\\1", "あ5 あ5");
x2("(誰?|[あ-う]{3})\\1", "誰?誰?", 0, 8);
x2("...(誰?|[あ-う]{3})\\1", "あaあ誰", 0, 13);
x2("(誰?|[あ-う]{3})\\1", "ういうういう", 0, 12);
x2("(^こ)\\1", "ここ", 0, 4);
n("(^む)\\1", "めむむ");
n("(あ$)\\1", "ああ");
n("(あい\\Z)\\1", "あい");
x2("(あ*\\Z)\\1", "", 2, 2);
x2(".(あ*\\Z)\\1", "いあ", 2, 4);
x3("(.(やいゆ)\\2)", "zやいゆやいゆ", 0, 13, 1);
x3("(.(..\\d.)\\2)", "あ12341234", 0, 10, 1);
x2("((?i:あvず))\\1", "あvずあvず", 0, 10);
x2("(?<愚か>変|\\(\\g<愚か>\\))", "((((((変))))))", 0, 14);
x2("\\A(?:\\g<阿-1>|\\g<云-2>|\\z終了 (?<阿-1>観|自\\g<云-2>自)(?<云-2>在|菩薩\\g<阿-1>菩薩))$", "菩薩自菩薩自在自菩薩自菩薩", 0, 26);
x2("[[ひふ]]", "", 0, 2);
x2("[[いおう]か]", "", 0, 2);
n("[[^あ]]", "");
n("[^[あ]]", "");
x2("[^[^あ]]", "", 0, 2);
x2("[[かきく]&&きく]", "", 0, 2);
n("[[かきく]&&きく]", "");
n("[[かきく]&&きく]", "");
x2("[あ-ん&&い-を&&う-ゑ]", "", 0, 2);
n("[^あ-ん&&い-を&&う-ゑ]", "");
x2("[[^あ&&あ]&&あ-ん]", "", 0, 2);
n("[[^あ&&あ]&&あ-ん]", "");
x2("[[^あ-ん&&いうえお]&&[^う-か]]", "", 0, 2);
n("[[^あ-ん&&いうえお]&&[^う-か]]", "");
x2("[^[^あいう]&&[^うえお]]", "", 0, 2);
x2("[^[^あいう]&&[^うえお]]", "", 0, 2);
n("[^[^あいう]&&[^うえお]]", "");
x2("[あ-&&-あ]", "-", 0, 1);
x2("[^[^a-zあいう]&&[^bcdefgうえお]q-w]", "", 0, 2);
x2("[^[^a-zあいう]&&[^bcdefgうえお]g-w]", "f", 0, 1);
x2("[^[^a-zあいう]&&[^bcdefgうえお]g-w]", "g", 0, 1);
n("[^[^a-zあいう]&&[^bcdefgうえお]g-w]", "2");
fprintf(stdout, "\nRESULT SUCC: %d, FAIL: %d\n", nsucc, nfail);
x2(".", "", 0, 2);
x2("..", "かき", 0, 4);
x2("\\w", "", 0, 2);
n("\\W", "");
x2("[\\W]", "う$", 2, 3);
x2("\\S", "", 0, 2);
x2("\\S", "", 0, 2);
x2("\\b", "", 0, 0);
x2("\\b", "", 1, 1);
x2("\\B", "せそ ", 2, 2);
x2("\\B", "", 3, 3);
x2("\\B", "", 0, 0);
x2("[たち]", "", 0, 2);
n("[なに]", "");
x2("[う-お]", "", 0, 2);
n("[^け]", "");
x2("[\\w]", "", 0, 2);
n("[\\d]", "");
x2("[\\D]", "", 0, 2);
n("[\\s]", "");
x2("[\\S]", "", 0, 2);
x2("[\\w\\d]", "", 0, 2);
x2("[\\w\\d]", "", 3, 5);
n("\\w鬼車", " 鬼車");
x2("\\W車", "鬼 車", 0, 5);
x2("あ.い.う", "ああいいう", 0, 10);
x2(".\\wう\\W..ぞ", "えうう うぞぞ", 0, 13);
x2("\\s\\wこここ", " ここここ", 0, 9);
x2("ああ.け", "ああけけ", 0, 8);
n(".い", "いえ");
x2(".お", "おお", 0, 4);
x2("^あ", "", 0, 2);
x2("^む$", "", 0, 2);
x2("^\\w$", "", 0, 2);
x2("^\\wかきくけこ$", "zかきくけこ", 0, 11);
x2("^\\w...うえお$", "zあいううえお", 0, 13);
x2("\\w\\w\\s\\Wおおお\\d", "aお おおお4", 0, 12);
x2("\\Aたちつ", "たちつ", 0, 6);
x2("むめも\\Z", "むめも", 0, 6);
x2("かきく\\z", "かきく", 0, 6);
x2("かきく\\Z", "かきく\n", 0, 6);
x2("\\Gぽぴ", "ぽぴ", 0, 4);
n("\\Gえ", "うえお");
n("とて\\G", "とて");
n("まみ\\A", "まみ");
n("\\Aみ", "まみ");
x2("(?=せ)せ", "", 0, 2);
n("(?=う).", "");
x2("(?!う)か", "", 0, 2);
n("(?!と)あ", "");
x2("(?i:あ)", "", 0, 2);
x2("(?i:ぶべ)", "ぶべ", 0, 4);
n("(?i:い)", "");
x2("(?m:よ.)", "\n", 0, 3);
x2("(?m:.め)", "\n", 2, 5);
x2("あ?", "", 0, 0);
x2("変?", "", 0, 0);
x2("変?", "", 0, 2);
x2("量*", "", 0, 0);
x2("量*", "", 0, 2);
x2("子*", "子子子", 0, 6);
x2("馬*", "鹿馬馬馬馬", 0, 0);
n("山+", "");
x2("河+", "", 0, 2);
x2("時+", "時時時時", 0, 8);
x2("え+", "ええううう", 0, 4);
x2("う+", "おうううう", 2, 10);
x2(".?", "", 0, 2);
x2(".*", "ぱぴぷぺ", 0, 8);
x2(".+", "", 0, 2);
x2(".+", "いうえか\n", 0, 8);
x2("あ|い", "", 0, 2);
x2("あ|い", "", 0, 2);
x2("あい|いう", "あい", 0, 4);
x2("あい|いう", "いう", 0, 4);
x2("を(?:かき|きく)", "をかき", 0, 6);
x2("を(?:かき|きく)け", "をきくけ", 0, 8);
x2("あい|(?:あう|あを)", "あを", 0, 4);
x2("あ|い|う", "えう", 2, 4);
x2("あ|い|うえ|おかき|く|けこさ|しすせ|そ|たち|つてとなに|ぬね", "しすせ", 0, 6);
n("あ|い|うえ|おかき|く|けこさ|しすせ|そ|たち|つてとなに|ぬね", "すせ");
x2("あ|^わ", "ぶあ", 2, 4);
x2("あ|^を", "をあ", 0, 2);
x2("鬼|\\G車", "け車鬼", 4, 6);
x2("鬼|\\G車", "車鬼", 0, 2);
x2("鬼|\\A車", "b車鬼", 3, 5);
x2("鬼|\\A車", "", 0, 2);
x2("鬼|車\\Z", "車鬼", 2, 4);
x2("鬼|車\\Z", "", 0, 2);
x2("鬼|車\\Z", "\n", 0, 2);
x2("鬼|車\\z", "車鬼", 2, 4);
x2("鬼|車\\z", "", 0, 2);
x2("\\w|\\s", "", 0, 2);
x2("\\w|%", "%お", 0, 1);
x2("\\w|[&$]", "う&", 0, 2);
x2("[い-け]", "", 0, 2);
x2("[い-け]|[^か-こ]", "", 0, 2);
x2("[い-け]|[^か-こ]", "", 0, 2);
x2("[^あ]", "\n", 0, 1);
x2("(?:あ|[う-き])|いを", "うを", 0, 2);
x2("(?:あ|[う-き])|いを", "いを", 0, 4);
x2("あいう|(?=けけ)..ほ", "けけほ", 0, 6);
x2("あいう|(?!けけ)..ほ", "あいほ", 0, 6);
x2("(?=をあ)..あ|(?=をを)..あ", "ををあ", 0, 6);
x2("(?<=あ|いう)い", "いうい", 4, 6);
n("(?>あ|あいえ)う", "あいえう");
x2("(?>あいえ|あ)う", "あいえう", 0, 8);
x2("あ?|い", "", 0, 2);
x2("あ?|い", "", 0, 0);
x2("あ?|い", "", 0, 0);
x2("あ*|い", "ああ", 0, 4);
x2("あ*|い*", "いあ", 0, 0);
x2("あ*|い*", "あい", 0, 2);
x2("[aあ]*|い*", "aあいいい", 0, 3);
x2("あ+|い*", "", 0, 0);
x2("あ+|い*", "いいい", 0, 6);
x2("あ+|い*", "あいいい", 0, 2);
x2("あ+|い*", "aあいいい", 0, 0);
n("あ+|い+", "");
x2("(あ|い)?", "", 0, 2);
x2("(あ|い)*", "いあ", 0, 4);
x2("(あ|い)+", "いあい", 0, 6);
x2("(あい|うあ)+", "うああいうえ", 0, 8);
x2("(あい|うえ)+", "うああいうえ", 4, 12);
x2("(あい|うあ)+", "ああいうあ", 2, 10);
x2("(あい|うあ)+", "あいをうあ", 0, 4);
x2("(あい|うあ)+", "$$zzzzあいをうあ", 6, 10);
x2("(あ|いあい)+", "あいあいあ", 0, 10);
x2("(あ|いあい)+", "いあ", 2, 4);
x2("(あ|いあい)+", "いあああいあ", 2, 8);
x2("(?:あ|い)(?:あ|い)", "あい", 0, 4);
x2("(?:あ*|い*)(?:あ*|い*)", "あああいいい", 0, 6);
x2("(?:あ*|い*)(?:あ+|い+)", "あああいいい", 0, 12);
x2("(?:あ+|い+){2}", "あああいいい", 0, 12);
x2("(?:あ+|い+){1,2}", "あああいいい", 0, 12);
x2("(?:あ+|\\Aい*)うう", "うう", 0, 4);
n("(?:あ+|\\Aい*)うう", "あいうう");
x2("(?:^あ+|い+)*う", "ああいいいあいう", 12, 16);
x2("(?:^あ+|い+)*う", "ああいいいいう", 0, 14);
x2("う{0,}", "うううう", 0, 8);
x2("あ|(?i)c", "C", 0, 1);
x2("(?i)c|あ", "C", 0, 1);
x2("(?i:あ)|a", "a", 0, 1);
n("(?i:あ)|a", "A");
x2("[あいう]?", "あいう", 0, 2);
x2("[あいう]*", "あいう", 0, 6);
x2("[^あいう]*", "あいう", 0, 0);
n("[^あいう]+", "あいう");
x2("あ??", "あああ", 0, 0);
x2("いあ??い", "いあい", 0, 6);
x2("あ*?", "あああ", 0, 0);
x2("いあ*?", "いああ", 0, 2);
x2("いあ*?い", "いああい", 0, 8);
x2("あ+?", "あああ", 0, 2);
x2("いあ+?", "いああ", 0, 4);
x2("いあ+?い", "いああい", 0, 8);
x2("(?:天?)??", "", 0, 0);
x2("(?:天??)?", "", 0, 0);
x2("(?:夢?)+?", "夢夢夢", 0, 2);
x2("(?:風+)??", "風風風", 0, 0);
x2("(?:雪+)??霜", "雪雪雪霜", 0, 8);
x2("(?:あい)?{2}", "", 0, 0);
x2("(?:鬼車)?{2}", "鬼車鬼車鬼", 0, 8);
x2("(?:鬼車)*{0}", "鬼車鬼車鬼", 0, 0);
x2("(?:鬼車){3,}", "鬼車鬼車鬼車鬼車", 0, 16);
n("(?:鬼車){3,}", "鬼車鬼車");
x2("(?:鬼車){2,4}", "鬼車鬼車鬼車", 0, 12);
x2("(?:鬼車){2,4}", "鬼車鬼車鬼車鬼車鬼車", 0, 16);
x2("(?:鬼車){2,4}?", "鬼車鬼車鬼車鬼車鬼車", 0, 8);
x2("(?:鬼車){,}", "鬼車{,}", 0, 7);
x2("(?:かきく)+?{2}", "かきくかきくかきく", 0, 12);
x3("(火)", "", 0, 2, 1);
x3("(火水)", "火水", 0, 4, 1);
x2("((時間))", "時間", 0, 4);
x3("((風水))", "風水", 0, 4, 1);
x3("((昨日))", "昨日", 0, 4, 2);
x3("((((((((((((((((((((量子))))))))))))))))))))", "量子", 0, 4, 20);
x3("(あい)(うえ)", "あいうえ", 0, 4, 1);
x3("(あい)(うえ)", "あいうえ", 4, 8, 2);
x3("()(あ)いう(えおか)きくけこ", "あいうえおかきくけこ", 6, 12, 3);
x3("(()(あ)いう(えおか)きくけこ)", "あいうえおかきくけこ", 6, 12, 4);
x3(".*(フォ)ン・マ(ン()シュタ)イン", "フォン・マンシュタイン", 10, 18, 2);
x2("(^あ)", "", 0, 2);
x3("(あ)|(あ)", "いあ", 2, 4, 1);
x3("(^あ)|(あ)", "いあ", 2, 4, 2);
x3("(あ?)", "あああ", 0, 2, 1);
x3("(ま*)", "ままま", 0, 6, 1);
x3("(と*)", "", 0, 0, 1);
x3("(る+)", "るるるるるるる", 0, 14, 1);
x3("(ふ+|へ*)", "ふふふへへ", 0, 6, 1);
x3("(あ+|い?)", "いいいああ", 0, 2, 1);
x3("(あいう)?", "あいう", 0, 6, 1);
x3("(あいう)*", "あいう", 0, 6, 1);
x3("(あいう)+", "あいう", 0, 6, 1);
x3("(さしす|あいう)+", "あいう", 0, 6, 1);
x3("([なにぬ][かきく]|かきく)+", "かきく", 0, 6, 1);
x3("((?i:あいう))", "あいう", 0, 6, 1);
x3("((?m:あ.う))", "\n", 0, 5, 1);
x3("((?=あん)あ)", "あんい", 0, 2, 1);
x3("あいう|(.あいえ)", "んあいえ", 0, 8, 1);
x3("あ*(.)", "ああああん", 8, 10, 1);
x3("あ*?(.)", "ああああん", 0, 2, 1);
x3("あ*?(ん)", "ああああん", 8, 10, 1);
x3("[いうえ]あ*(.)", "えああああん", 10, 12, 1);
x3("(\\Aいい)うう", "いいうう", 0, 4, 1);
n("(\\Aいい)うう", "んいいうう");
x3("(^いい)うう", "いいうう", 0, 4, 1);
n("(^いい)うう", "んいいうう");
x3("ろろ(るる$)", "ろろるる", 4, 8, 1);
n("ろろ(るる$)", "ろろるるる");
x2("(無)\\1", "無無", 0, 4);
n("(無)\\1", "無武");
x2("(空?)\\1", "空空", 0, 4);
x2("(空??)\\1", "空空", 0, 0);
x2("(空*)\\1", "空空空空空", 0, 8);
x3("(空*)\\1", "空空空空空", 0, 4, 1);
x2("あ(い*)\\1", "あいいいい", 0, 10);
x2("あ(い*)\\1", "あい", 0, 2);
x2("(あ*)(い*)\\1\\2", "あああいいあああいい", 0, 20);
x2("(あ*)(い*)\\2", "あああいいいい", 0, 14);
x3("(あ*)(い*)\\2", "あああいいいい", 6, 10, 2);
x2("(((((((ぽ*)ぺ))))))ぴ\\7", "ぽぽぽぺぴぽぽぽ", 0, 16);
x3("(((((((ぽ*)ぺ))))))ぴ\\7", "ぽぽぽぺぴぽぽぽ", 0, 6, 7);
x2("(は)(ひ)(ふ)\\2\\1\\3", "はひふひはふ", 0, 12);
x2("([き-け])\\1", "くく", 0, 4);
x2("(\\w\\d\\s)\\1", "あ5 あ5 ", 0, 8);
n("(\\w\\d\\s)\\1", "あ5 あ5");
x2("(誰?|[あ-う]{3})\\1", "誰?誰?", 0, 8);
x2("...(誰?|[あ-う]{3})\\1", "あaあ誰", 0, 13);
x2("(誰?|[あ-う]{3})\\1", "ういうういう", 0, 12);
x2("(^こ)\\1", "ここ", 0, 4);
n("(^む)\\1", "めむむ");
n("(あ$)\\1", "ああ");
n("(あい\\Z)\\1", "あい");
x2("(あ*\\Z)\\1", "", 2, 2);
x2(".(あ*\\Z)\\1", "いあ", 2, 4);
x3("(.(やいゆ)\\2)", "zやいゆやいゆ", 0, 13, 1);
x3("(.(..\\d.)\\2)", "あ12341234", 0, 10, 1);
x2("((?i:あvず))\\1", "あvずあvず", 0, 10);
x2("(?<愚か>変|\\(\\g<愚か>\\))", "((((((変))))))", 0, 14);
x2("\\A(?:\\g<阿_1>|\\g<云_2>|\\z終了 (?<阿_1>観|自\\g<云_2>自)(?<云_2>在|菩薩\\g<阿_1>菩薩))$", "菩薩自菩薩自在自菩薩自菩薩", 0, 26);
x2("[[ひふ]]", "", 0, 2);
x2("[[いおう]か]", "", 0, 2);
n("[[^あ]]", "");
n("[^[あ]]", "");
x2("[^[^あ]]", "", 0, 2);
x2("[[かきく]&&きく]", "", 0, 2);
n("[[かきく]&&きく]", "");
n("[[かきく]&&きく]", "");
x2("[あ-ん&&い-を&&う-ゑ]", "", 0, 2);
n("[^あ-ん&&い-を&&う-ゑ]", "");
x2("[[^あ&&あ]&&あ-ん]", "", 0, 2);
n("[[^あ&&あ]&&あ-ん]", "");
x2("[[^あ-ん&&いうえお]&&[^う-か]]", "", 0, 2);
n("[[^あ-ん&&いうえお]&&[^う-か]]", "");
x2("[^[^あいう]&&[^うえお]]", "", 0, 2);
x2("[^[^あいう]&&[^うえお]]", "", 0, 2);
n("[^[^あいう]&&[^うえお]]", "");
x2("[あ-&&-あ]", "-", 0, 1);
x2("[^[^a-zあいう]&&[^bcdefgうえお]q-w]", "", 0, 2);
x2("[^[^a-zあいう]&&[^bcdefgうえお]g-w]", "f", 0, 1);
x2("[^[^a-zあいう]&&[^bcdefgうえお]g-w]", "g", 0, 1);
n("[^[^a-zあいう]&&[^bcdefgうえお]g-w]", "2");
fprintf(stdout, "\nRESULT SUCC: %d, FAIL: %d, ERROR: %d\n",
nsucc, nfail, nerror);
#ifndef POSIX_TEST
regex_region_free(region, 1);
regex_end();
onig_region_free(region, 1);
onig_end();
#endif
return 0;

View file

@ -1,223 +0,0 @@
#!/usr/local/bin/ruby -Ke
# testconv.rb
# Copyright (C) 2003 K.Kosako (kosako@sofnec.co.jp)
WINDOWS = (ARGV.size > 0 && /^-win/i =~ ARGV[0])
ARGV.shift if WINDOWS
if WINDOWS
REGCODE = 'REGCODE_SJIS'
REGENC = 'REG_ENCODING_SJIS'
else
REGCODE = 'REGCODE_EUCJP'
REGENC = 'REG_ENCODING_EUC_JP'
end
def conv_reg(s)
s = s.gsub(/\\/, '\\\\\\\\') #'
if (WINDOWS)
s = s.gsub(/\?\?/, '?\\\\?') # escape ANSI trigraph
end
s
end
def conv_str(s)
if (s[0] == ?')
s = s[1..-2]
return s.gsub(/\\/, '\\\\\\\\') #'
else
return s[1..-2]
end
end
print(<<"EOS")
/*
* This program was generated by testconv.rb.
*/
#include<stdio.h>
#ifdef POSIX_TEST
#include "onigposix.h"
#else
#include "oniguruma.h"
#endif
static int nsucc = 0;
static int nfail = 0;
#ifndef POSIX_TEST
static RegRegion* region;
#endif
static void xx(char* pattern, char* str, int from, int to, int mem, int not)
{
int r;
#ifdef POSIX_TEST
regex_t reg;
char buf[200];
regmatch_t pmatch[20];
r = regcomp(&reg, pattern, REG_EXTENDED | REG_NEWLINE);
if (r) {
regerror(r, &reg, buf, sizeof(buf));
fprintf(stderr, "ERROR: %s\\n", buf);
exit(-1);
}
r = regexec(&reg, str, reg.re_nsub + 1, pmatch, 0);
if (r != 0 && r != REG_NOMATCH) {
regerror(r, &reg, buf, sizeof(buf));
fprintf(stderr, "ERROR: %s\\n", buf);
exit(-1);
}
if (r == REG_NOMATCH) {
if (not) {
fprintf(stdout, "OK(N): /%s/ '%s'\\n", pattern, str);
nsucc++;
}
else {
fprintf(stdout, "FAIL: /%s/ '%s'\\n", pattern, str);
nfail++;
}
}
else {
if (not) {
fprintf(stdout, "FAIL(N): /%s/ '%s'\\n", pattern, str);
nfail++;
}
else {
if (pmatch[mem].rm_so == from && pmatch[mem].rm_eo == to) {
fprintf(stdout, "OK: /%s/ '%s'\\n", pattern, str);
nsucc++;
}
else {
fprintf(stdout, "FAIL: /%s/ '%s' %d-%d : %d-%d\\n", pattern, str,
from, to, pmatch[mem].rm_so, pmatch[mem].rm_eo);
nfail++;
}
}
}
regfree(&reg);
#else
regex_t* reg;
RegErrorInfo einfo;
r = regex_new(&reg, (UChar* )pattern, (UChar* )(pattern + strlen(pattern)),
REG_OPTION_DEFAULT, #{REGCODE}, REG_SYNTAX_DEFAULT, &einfo);
if (r) {
char s[REG_MAX_ERROR_MESSAGE_LEN];
regex_error_code_to_str(s, r, &einfo);
fprintf(stderr, "ERROR: %s\\n", s);
exit(-1);
}
r = regex_search(reg, (UChar* )str, (UChar* )(str + strlen(str)),
(UChar* )str, (UChar* )(str + strlen(str)),
region, REG_OPTION_NONE);
if (r < REG_MISMATCH) {
char s[REG_MAX_ERROR_MESSAGE_LEN];
regex_error_code_to_str(s, r);
fprintf(stderr, "ERROR: %s\\n", s);
exit(-1);
}
if (r == REG_MISMATCH) {
if (not) {
fprintf(stdout, "OK(N): /%s/ '%s'\\n", pattern, str);
nsucc++;
}
else {
fprintf(stdout, "FAIL: /%s/ '%s'\\n", pattern, str);
nfail++;
}
}
else {
if (not) {
fprintf(stdout, "FAIL(N): /%s/ '%s'\\n", pattern, str);
nfail++;
}
else {
if (region->beg[mem] == from && region->end[mem] == to) {
fprintf(stdout, "OK: /%s/ '%s'\\n", pattern, str);
nsucc++;
}
else {
fprintf(stdout, "FAIL: /%s/ '%s' %d-%d : %d-%d\\n", pattern, str,
from, to, region->beg[mem], region->end[mem]);
nfail++;
}
}
}
regex_free(reg);
#endif
}
static void x2(char* pattern, char* str, int from, int to)
{
xx(pattern, str, from, to, 0, 0);
}
static void x3(char* pattern, char* str, int from, int to, int mem)
{
xx(pattern, str, from, to, mem, 0);
}
static void n(char* pattern, char* str)
{
xx(pattern, str, 0, 0, 0, 1);
}
extern int main(int argc, char* argv[])
{
#ifdef POSIX_TEST
reg_set_encoding(#{REGENC});
#else
region = regex_region_new();
#endif
EOS
CM = '\s*,\s*'
RX2 = %r{^x\(/([^\/]*)/#{CM}('[^']*'|"[^"]*")#{CM}(\S+)#{CM}(\S+)\)$}
RI2 = %r{^i\(/([^\/]*)/#{CM}('[^']*'|"[^"]*")#{CM}(\S+)#{CM}(\S+)\)}
RX3 = %r{^x\(/([^\/]*)/#{CM}('[^']*'|"[^"]*")#{CM}(\S+)#{CM}(\S+)#{CM}(\S+)\)$}
RN = %r{^n\(/([^\/]*)/#{CM}('[^']*'|"[^"]*")\)$} #'
while line = gets()
if (m = RX2.match(line))
reg = conv_reg(m[1])
str = conv_str(m[2])
printf(" x2(\"%s\", \"%s\", %s, %s);\n", reg, str, m[3], m[4])
elsif (m = RI2.match(line))
reg = conv_reg(m[1])
str = conv_str(m[2])
printf(" x2(\"%s\", \"%s\", %s, %s);\n", reg, str, m[3], m[4])
elsif (m = RX3.match(line))
reg = conv_reg(m[1])
str = conv_str(m[2])
printf(" x3(\"%s\", \"%s\", %s, %s, %s);\n", reg, str, m[3], m[4], m[5])
elsif (m = RN.match(line))
reg = conv_reg(m[1])
str = conv_str(m[2])
printf(" n(\"%s\", \"%s\");\n", reg, str)
else
end
end
print(<<'EOS')
fprintf(stdout, "\nRESULT SUCC: %d, FAIL: %d\n", nsucc, nfail);
#ifndef POSIX_TEST
regex_region_free(region, 1);
regex_end();
#endif
return 0;
}
EOS
# END OF SCRIPT

View file

@ -1,131 +0,0 @@
# Oniguruma Makefile for Win32
product_name = oniguruma
CPPFLAGS =
CFLAGS = -O2 -nologo
LDFLAGS =
LOADLIBES =
ARLIB = lib
ARLIB_FLAGS = -nologo
ARDLL = cl
ARDLL_FLAGS = -nologo -LD $(LINKFLAGS) -dll
LINKFLAGS = -link -incremental:no -pdb:none
INSTALL = install -c
CP = copy
CC = cl
DEFS = -DHAVE_CONFIG_H -DNOT_RUBY -DEXPORT
RUBYDIR = ..
subdirs =
libbase = onig
libname = $(libbase)_s.lib
dllname = $(libbase).dll
dlllib = $(libbase).lib
onigheaders = oniguruma.h regint.h regparse.h
posixheaders = onigposix.h
headers = $(posixheaders) $(onigheaders)
onigobjs = reggnu.obj regerror.obj regparse.obj regcomp.obj regexec.obj
posixobjs = regposix.obj regposerr.obj
libobjs = $(onigobjs) $(posixobjs)
onigsources = regerror.c regparse.c regcomp.c regexec.c reggnu.c
posixsources = regposix.c regposerr.c
libsources = $(posixsources) $(onigsources)
rubysources = regex.c $(onigsources)
patchfiles = re.c.168.patch re.c.180.patch
distfiles = README COPYING INSTALL-RUBY HISTORY \
Makefile.in configure.in config.h.in configure \
$(headers) $(libsources) regex.c $(patchfiles) \
test.rb testconv.rb
testc = testc
testp = testp
makeargs = $(MFLAGS) CPPFLAGS='$(CPPFLAGS)' CFLAGS='$(CFLAGS)' CC='$(CC)'
.SUFFIXES:
.SUFFIXES: .obj .c .h .ps .dvi .info .texinfo
.c.obj:
$(CC) $(CFLAGS) $(CPPFLAGS) $(DEFS) /I. /c $<
# targets
default: all
all: $(libname) $(dllname)
$(libname): $(libobjs)
$(ARLIB) $(ARLIB_FLAGS) -out:$@ $(libobjs)
$(dllname): $(libobjs)
$(ARDLL) $(libobjs) -Fe$@ $(ARDLL_FLAGS)
regparse.obj: regparse.c $(onigheaders) config.h
regcomp.obj: regcomp.c $(onigheaders) config.h
regexec.obj: regexec.c regint.h oniguruma.h config.h
reggnu.obj: reggnu.c regint.h oniguruma.h config.h
regerror.obj: regerror.c regint.h oniguruma.h config.h
regposix.obj: regposix.c $(posixheaders) oniguruma.h config.h
regposerr.obj: regposerr.c $(posixheaders) config.h
# Ruby test
rtest:
$(RUBYDIR)\win32\ruby -w -Ke test.rb
# C library test
ctest: $(testc)
.\$(testc)
# POSIX C library test
ptest: $(testp)
.\$(testp)
$(testc): $(testc).c $(libname)
$(CC) -nologo -o $(testc) $(testc).c $(libname)
$(testp): $(testc).c $(dlllib)
$(CC) -nologo -DPOSIX_TEST -DIMPORT -o $(testp) $(testc).c $(dlllib)
clean:
del *.obj *.lib *.exp *.dll $(testp).exe $(testc).exe $(testc).obj
16: cpruby
patch -d $(RUBYDIR) -p0 < re.c.168.patch
18: cpruby
patch -d $(RUBYDIR) -p0 < re.c.180.patch
# backup file suffix
SORIG = ruby_orig
cpruby:
$(CP) $(RUBYDIR)\regex.c $(RUBYDIR)\regex.c.$(SORIG)
$(CP) $(RUBYDIR)\regex.h $(RUBYDIR)\regex.h.$(SORIG)
$(CP) $(RUBYDIR)\re.c $(RUBYDIR)\re.c.$(SORIG)
# $(rubysources)
$(CP) regex.c $(RUBYDIR)
$(CP) regerror.c $(RUBYDIR)
$(CP) regparse.c $(RUBYDIR)
$(CP) regcomp.c $(RUBYDIR)
$(CP) regexec.c $(RUBYDIR)
$(CP) reggnu.c $(RUBYDIR)
# $(onigheaders)
$(CP) oniguruma.h $(RUBYDIR)\regex.h
$(CP) regint.h $(RUBYDIR)
$(CP) regparse.h $(RUBYDIR)
rback:
$(CP) $(RUBYDIR)\regex.c.$(SORIG) $(RUBYDIR)\regex.c
$(CP) $(RUBYDIR)\regex.h.$(SORIG) $(RUBYDIR)\regex.h
$(CP) $(RUBYDIR)\re.c.$(SORIG) $(RUBYDIR)\re.c
samples:
$(CC) $(CFLAGS) -I. -DIMPORT -o simple sample\simple.c $(dlllib)
$(CC) $(CFLAGS) -I. -DIMPORT -o posix sample\posix.c $(dlllib)
$(CC) $(CFLAGS) -I. -DIMPORT -o names sample\names.c $(dlllib)

View file

@ -1,84 +0,0 @@
#define STDC_HEADERS 1
#define HAVE_SYS_TYPES_H 1
#define HAVE_SYS_STAT_H 1
#define HAVE_STDLIB_H 1
#define HAVE_STRING_H 1
#define HAVE_MEMORY_H 1
#define HAVE_FLOAT_H 1
#define HAVE_OFF_T 1
#define SIZEOF_INT 4
#define SIZEOF_SHORT 2
#define SIZEOF_LONG 4
#define SIZEOF_LONG_LONG 0
#define SIZEOF___INT64 8
#define SIZEOF_OFF_T 4
#define SIZEOF_VOIDP 4
#define SIZEOF_FLOAT 4
#define SIZEOF_DOUBLE 8
#define HAVE_PROTOTYPES 1
#define TOKEN_PASTE(x,y) x##y
#define HAVE_STDARG_PROTOTYPES 1
#ifndef NORETURN
#if _MSC_VER > 1100
#define NORETURN(x) __declspec(noreturn) x
#else
#define NORETURN(x) x
#endif
#endif
#define HAVE_DECL_SYS_NERR 1
#define STDC_HEADERS 1
#define HAVE_STDLIB_H 1
#define HAVE_STRING_H 1
#define HAVE_LIMITS_H 1
#define HAVE_FCNTL_H 1
#define HAVE_SYS_UTIME_H 1
#define HAVE_MEMORY_H 1
#define uid_t int
#define gid_t int
#define HAVE_STRUCT_STAT_ST_RDEV 1
#define HAVE_ST_RDEV 1
#define GETGROUPS_T int
#define RETSIGTYPE void
#define HAVE_ALLOCA 1
#define HAVE_DUP2 1
#define HAVE_MEMCMP 1
#define HAVE_MEMMOVE 1
#define HAVE_MKDIR 1
#define HAVE_STRCASECMP 1
#define HAVE_STRNCASECMP 1
#define HAVE_STRERROR 1
#define HAVE_STRFTIME 1
#define HAVE_STRCHR 1
#define HAVE_STRSTR 1
#define HAVE_STRTOD 1
#define HAVE_STRTOL 1
#define HAVE_STRTOUL 1
#define HAVE_FLOCK 1
#define HAVE_VSNPRINTF 1
#define HAVE_FINITE 1
#define HAVE_FMOD 1
#define HAVE_FREXP 1
#define HAVE_HYPOT 1
#define HAVE_MODF 1
#define HAVE_WAITPID 1
#define HAVE_CHSIZE 1
#define HAVE_TIMES 1
#define HAVE__SETJMP 1
#define HAVE_TELLDIR 1
#define HAVE_SEEKDIR 1
#define HAVE_MKTIME 1
#define HAVE_COSH 1
#define HAVE_SINH 1
#define HAVE_TANH 1
#define HAVE_EXECVE 1
#define HAVE_TZNAME 1
#define HAVE_DAYLIGHT 1
#define SETPGRP_VOID 1
#define inline __inline
#define NEED_IO_SEEK_BETWEEN_RW 1
#define RSHIFT(x,y) ((x)>>(int)y)
#define FILE_COUNT _cnt
#define FILE_READPTR _ptr
#define DEFAULT_KCODE KCODE_NONE
#define DLEXT ".so"
#define DLEXT2 ".dll"

View file

@ -37,22 +37,22 @@ ZEND_EXTERN_MODULE_GLOBALS(mbstring)
/* {{{ static void php_mb_regex_free_cache() */
static void php_mb_regex_free_cache(php_mb_regex_t **pre)
{
php_mb_regex_free(*pre);
onig_free(*pre);
}
/* }}} */
/* {{{ _php_mb_regex_globals_ctor */
void _php_mb_regex_globals_ctor(zend_mbstring_globals *pglobals TSRMLS_DC)
{
MBSTRG(default_mbctype) = REGCODE_EUCJP;
MBSTRG(current_mbctype) = REGCODE_EUCJP;
MBSTRG(default_mbctype) = ONIG_ENCODING_EUC_JP;
MBSTRG(current_mbctype) = ONIG_ENCODING_EUC_JP;
zend_hash_init(&(MBSTRG(ht_rc)), 0, NULL, (void (*)(void *)) php_mb_regex_free_cache, 1);
MBSTRG(search_str) = (zval*) NULL;
MBSTRG(search_re) = (php_mb_regex_t*)NULL;
MBSTRG(search_pos) = 0;
MBSTRG(search_regs) = (php_mb_reg_region*)NULL;
MBSTRG(regex_default_options) = RE_OPTION_POSIXLINE;
MBSTRG(regex_default_syntax) = REG_SYNTAX_RUBY;
MBSTRG(search_regs) = (OnigRegion*)NULL;
MBSTRG(regex_default_options) = ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE;
MBSTRG(regex_default_syntax) = ONIG_SYNTAX_RUBY;
}
/* }}} */
@ -96,8 +96,8 @@ PHP_RSHUTDOWN_FUNCTION(mb_regex)
MBSTRG(search_pos) = 0;
if (MBSTRG(search_regs) != NULL) {
php_mb_regex_region_free(MBSTRG(search_regs), 1);
MBSTRG(search_regs) = (php_mb_reg_region *)NULL;
onig_region_free(MBSTRG(search_regs), 1);
MBSTRG(search_regs) = (OnigRegion *)NULL;
}
zend_hash_clean(&MBSTRG(ht_rc));
@ -108,53 +108,152 @@ PHP_RSHUTDOWN_FUNCTION(mb_regex)
/*
* encoding name resolver
*/
/* {{{ php_mb_regex_name2mbctype */
php_mb_reg_char_encoding php_mb_regex_name2mbctype(const char *pname)
{
php_mb_reg_char_encoding mbctype;
mbctype = REGCODE_UNDEF;
if (pname != NULL) {
if (strcasecmp("EUC-JP", pname) == 0
|| strcasecmp("X-EUC-JP", pname) == 0
|| strcasecmp("UJIS", pname) == 0
|| strcasecmp("EUCJP", pname) == 0
|| strcasecmp("EUC_JP", pname) == 0
|| strcasecmp("EUCJP-WIN", pname) == 0) {
mbctype = REGCODE_EUCJP;
} else if (strcasecmp("UTF-8", pname) == 0
|| strcasecmp("UTF8", pname) == 0) {
mbctype = REGCODE_UTF8;
} else if (strcasecmp("SJIS", pname) == 0
|| strcasecmp("CP932", pname) == 0
|| strcasecmp("MS932", pname) == 0
|| strcasecmp("SHIFT_JIS", pname) == 0
|| strcasecmp("SJIS-WIN", pname) == 0) {
mbctype = REGCODE_SJIS;
} else if (strcasecmp("ASCII", pname) == 0) {
mbctype = REGCODE_ASCII;
/* {{{ encoding name map */
typedef struct _php_mb_regex_enc_name_map_t {
const char *names;
OnigEncoding code;
} php_mb_regex_enc_name_map_t;
php_mb_regex_enc_name_map_t enc_name_map[] ={
{
"EUC-JP\0EUCJP\0X-EUC-JP\0UJIS\0EUCJP\0EUCJP-WIN\0",
ONIG_ENCODING_EUC_JP
},
{
"UTF-8\0UTF8\0",
ONIG_ENCODING_UTF8
},
{
"SJIS\0CP932\0MS932\0SHIFT_JIS\0SJIS-WIN\0WINDOWS-31J\0",
ONIG_ENCODING_SJIS
},
{
"BIG5\0BIG-5\0BIGFIVE\0CN-BIG5\0BIG-FIVE\0",
ONIG_ENCODING_BIG5
},
{
"EUC-CN\0EUCCN\0EUC_CN\0GB-2312\0GB2312\0",
ONIG_ENCODING_EUC_CN
},
{
"EUC-TW\0EUCTW\0EUC_TW\0",
ONIG_ENCODING_EUC_TW
},
{
"EUC-KR\0EUCKR\0EUC_KR\0",
ONIG_ENCODING_EUC_KR
},
{
"KOI8\0KOI-8\0",
ONIG_ENCODING_KOI8
},
{
"KOI8R\0KOI8-R\0KOI-8R\0",
ONIG_ENCODING_KOI8_R
},
{
"ISO-8859-1\0ISO8859-1\0ISO_8859_1\0ISO8859_1\0",
ONIG_ENCODING_ISO_8859_1
},
{
"ISO-8859-2\0ISO8859-2\0ISO_8859_2\0ISO8859_2\0",
ONIG_ENCODING_ISO_8859_2
},
{
"ISO-8859-3\0ISO8859-3\0ISO_8859_3\0ISO8859_3\0",
ONIG_ENCODING_ISO_8859_3
},
{
"ISO-8859-4\0ISO8859-4\0ISO_8859_4\0ISO8859_4\0",
ONIG_ENCODING_ISO_8859_4
},
{
"ISO-8859-5\0ISO8859-5\0ISO_8859_5\0ISO8859_5\0",
ONIG_ENCODING_ISO_8859_5
},
{
"ISO-8859-6\0ISO8859-6\0ISO_8859_6\0ISO8859_6\0",
ONIG_ENCODING_ISO_8859_6
},
{
"ISO-8859-7\0ISO8859-7\0ISO_8859_7\0ISO8859_7\0",
ONIG_ENCODING_ISO_8859_7
},
{
"ISO-8859-8\0ISO8859-8\0ISO_8859_8\0ISO8859_8\0",
ONIG_ENCODING_ISO_8859_8
},
{
"ISO-8859-9\0ISO8859-9\0ISO_8859_9\0ISO8859_9\0",
ONIG_ENCODING_ISO_8859_9
},
{
"ISO-8859-10\0ISO8859-10\0ISO_8859_10\0ISO8859_10\0",
ONIG_ENCODING_ISO_8859_10
},
{
"ISO-8859-11\0ISO8859-11\0ISO_8859_11\0ISO8859_11\0",
ONIG_ENCODING_ISO_8859_11
},
{
"ISO-8859-13\0ISO8859-13\0ISO_8859_13\0ISO8859_13\0",
ONIG_ENCODING_ISO_8859_13
},
{
"ISO-8859-14\0ISO8859-14\0ISO_8859_14\0ISO8859_14\0",
ONIG_ENCODING_ISO_8859_14
},
{
"ISO-8859-15\0ISO8859-15\0ISO_8859_15\0ISO8859_15\0",
ONIG_ENCODING_ISO_8859_15
},
{
"ISO-8859-16\0ISO8859-16\0ISO_8859_16\0ISO8859_16\0",
ONIG_ENCODING_ISO_8859_16
},
{
"ASCII\0US-ASCII\0US_ASCII\0ISO646\0",
ONIG_ENCODING_ASCII
},
{ NULL, ONIG_ENCODING_UNDEF }
};
/* }}} */
/* {{{ php_mb_regex_name2mbctype */
OnigEncoding php_mb_regex_name2mbctype(const char *pname)
{
const char *p;
php_mb_regex_enc_name_map_t *mapping;
if (pname == NULL) {
return ONIG_ENCODING_UNDEF;
}
for (mapping = enc_name_map; mapping->names != NULL; mapping++) {
for (p = mapping->names; *p != '\0'; p += (strlen(p) + 1)) {
if (strcasecmp(p, pname) == 0) {
return mapping->code;
}
}
}
return mbctype;
return ONIG_ENCODING_UNDEF;
}
/* }}} */
/* {{{ php_mbregex_mbctype2name */
const char *php_mb_regex_mbctype2name(php_mb_reg_char_encoding mbctype)
const char *php_mb_regex_mbctype2name(OnigEncoding mbctype)
{
const char *p = NULL;
php_mb_regex_enc_name_map_t *mapping;
if (mbctype == REGCODE_EUCJP) {
p = "EUC-JP";
} else if(mbctype == REGCODE_UTF8) {
p = "UTF-8";
} else if(mbctype == REGCODE_SJIS) {
p = "SJIS";
} else if(mbctype == REGCODE_ASCII) {
p = "ascii";
for (mapping = enc_name_map; mapping->names != NULL; mapping++) {
if (mapping->code == mbctype) {
return mapping->names;
}
}
return p;
return NULL;
}
/* }}} */
@ -162,18 +261,18 @@ const char *php_mb_regex_mbctype2name(php_mb_reg_char_encoding mbctype)
* regex cache
*/
/* {{{ php_mbregex_compile_pattern */
static php_mb_regex_t *php_mbregex_compile_pattern(const char *pattern, int patlen, php_mb_reg_option_type options, php_mb_reg_char_encoding enc, php_mb_reg_syntax_type *syntax TSRMLS_DC)
static php_mb_regex_t *php_mbregex_compile_pattern(const char *pattern, int patlen, OnigOptionType options, OnigEncoding enc, OnigSyntaxType *syntax TSRMLS_DC)
{
int err_code = 0;
int found = 0;
php_mb_regex_t *retval = NULL, **rc = NULL;
php_mb_reg_error_info err_info;
UChar err_str[REG_MAX_ERROR_MESSAGE_LEN];
OnigErrorInfo err_info;
UChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
found = zend_hash_find(&MBSTRG(ht_rc), (char *)pattern, patlen+1, (void **) &rc);
if (found == FAILURE || (*rc)->options != options || (*rc)->enc != enc || (*rc)->syntax != syntax) {
if ((err_code = php_mb_regex_new(&retval, (UChar *)pattern, (UChar *)(pattern + patlen), options, enc, syntax, &err_info)) != REG_NORMAL) {
php_mb_regex_error_code_to_str(err_str, err_code, err_info);
if ((err_code = onig_new(&retval, (UChar *)pattern, (UChar *)(pattern + patlen), options, enc, syntax, &err_info)) != ONIG_NORMAL) {
onig_error_code_to_str(err_str, err_code, err_info);
php_error_docref(NULL TSRMLS_CC, E_WARNING, "mbregex compile err: %s", err_str);
retval = NULL;
goto out;
@ -188,14 +287,14 @@ out:
/* }}} */
/* {{{ _php_mb_regex_get_option_string */
static size_t _php_mb_regex_get_option_string(char *str, size_t len, php_mb_reg_option_type option, php_mb_reg_syntax_type *syntax)
static size_t _php_mb_regex_get_option_string(char *str, size_t len, OnigOptionType option, OnigSyntaxType *syntax)
{
size_t len_left = len;
size_t len_req = 0;
char *p = str;
char c;
if ((option & RE_OPTION_IGNORECASE) != 0) {
if ((option & ONIG_OPTION_IGNORECASE) != 0) {
if (len_left > 0) {
--len_left;
*(p++) = 'i';
@ -203,7 +302,7 @@ static size_t _php_mb_regex_get_option_string(char *str, size_t len, php_mb_reg_
++len_req;
}
if ((option & RE_OPTION_EXTENDED) != 0) {
if ((option & ONIG_OPTION_EXTEND) != 0) {
if (len_left > 0) {
--len_left;
*(p++) = 'x';
@ -211,14 +310,15 @@ static size_t _php_mb_regex_get_option_string(char *str, size_t len, php_mb_reg_
++len_req;
}
if ((option & RE_OPTION_POSIXLINE) == RE_OPTION_POSIXLINE) {
if ((option & (ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE)) ==
(ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE)) {
if (len_left > 0) {
--len_left;
*(p++) = 'p';
}
++len_req;
} else {
if ((option & RE_OPTION_MULTILINE) != 0) {
if ((option & ONIG_OPTION_MULTILINE) != 0) {
if (len_left > 0) {
--len_left;
*(p++) = 'm';
@ -226,7 +326,7 @@ static size_t _php_mb_regex_get_option_string(char *str, size_t len, php_mb_reg_
++len_req;
}
if ((option & RE_OPTION_SINGLELINE) != 0) {
if ((option & ONIG_OPTION_SINGLELINE) != 0) {
if (len_left > 0) {
--len_left;
*(p++) = 's';
@ -234,14 +334,14 @@ static size_t _php_mb_regex_get_option_string(char *str, size_t len, php_mb_reg_
++len_req;
}
}
if ((option & RE_OPTION_LONGEST) != 0) {
if ((option & ONIG_OPTION_FIND_LONGEST) != 0) {
if (len_left > 0) {
--len_left;
*(p++) = 'l';
}
++len_req;
}
if ((option & REG_OPTION_FIND_NOT_EMPTY) != 0) {
if ((option & ONIG_OPTION_FIND_NOT_EMPTY) != 0) {
if (len_left > 0) {
--len_left;
*(p++) = 'n';
@ -251,21 +351,21 @@ static size_t _php_mb_regex_get_option_string(char *str, size_t len, php_mb_reg_
c = 0;
if (syntax == REG_SYNTAX_JAVA) {
if (syntax == ONIG_SYNTAX_JAVA) {
c = 'j';
} else if (syntax == REG_SYNTAX_GNU_REGEX) {
} else if (syntax == ONIG_SYNTAX_GNU_REGEX) {
c = 'u';
} else if (syntax == REG_SYNTAX_GREP) {
} else if (syntax == ONIG_SYNTAX_GREP) {
c = 'g';
} else if (syntax == REG_SYNTAX_EMACS) {
} else if (syntax == ONIG_SYNTAX_EMACS) {
c = 'c';
} else if (syntax == REG_SYNTAX_RUBY) {
} else if (syntax == ONIG_SYNTAX_RUBY) {
c = 'r';
} else if (syntax == REG_SYNTAX_PERL) {
} else if (syntax == ONIG_SYNTAX_PERL) {
c = 'z';
} else if (syntax == REG_SYNTAX_POSIX_BASIC) {
} else if (syntax == ONIG_SYNTAX_POSIX_BASIC) {
c = 'b';
} else if (syntax == REG_SYNTAX_POSIX_EXTENDED) {
} else if (syntax == ONIG_SYNTAX_POSIX_EXTENDED) {
c = 'd';
}
@ -293,13 +393,13 @@ static size_t _php_mb_regex_get_option_string(char *str, size_t len, php_mb_reg_
/* {{{ _php_mb_regex_init_options */
static void
_php_mb_regex_init_options(const char *parg, int narg, php_mb_reg_option_type *option, php_mb_reg_syntax_type **syntax, int *eval)
_php_mb_regex_init_options(const char *parg, int narg, OnigOptionType *option, OnigSyntaxType **syntax, int *eval)
{
int n;
char c;
int optm = 0;
*syntax = REG_SYNTAX_RUBY;
*syntax = ONIG_SYNTAX_RUBY;
if (parg != NULL) {
n = 0;
@ -307,49 +407,49 @@ _php_mb_regex_init_options(const char *parg, int narg, php_mb_reg_option_type *o
c = parg[n++];
switch (c) {
case 'i':
optm |= RE_OPTION_IGNORECASE;
optm |= ONIG_OPTION_IGNORECASE;
break;
case 'x':
optm |= RE_OPTION_EXTENDED;
optm |= ONIG_OPTION_EXTEND;
break;
case 'm':
optm |= RE_OPTION_MULTILINE;
optm |= ONIG_OPTION_MULTILINE;
break;
case 's':
optm |= RE_OPTION_SINGLELINE;
optm |= ONIG_OPTION_SINGLELINE;
break;
case 'p':
optm |= RE_OPTION_POSIXLINE;
optm |= ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE;
break;
case 'l':
optm |= RE_OPTION_LONGEST;
optm |= ONIG_OPTION_FIND_LONGEST;
break;
case 'n':
optm |= REG_OPTION_FIND_NOT_EMPTY;
optm |= ONIG_OPTION_FIND_NOT_EMPTY;
break;
case 'j':
*syntax = REG_SYNTAX_JAVA;
*syntax = ONIG_SYNTAX_JAVA;
break;
case 'u':
*syntax = REG_SYNTAX_GNU_REGEX;
*syntax = ONIG_SYNTAX_GNU_REGEX;
break;
case 'g':
*syntax = REG_SYNTAX_GREP;
*syntax = ONIG_SYNTAX_GREP;
break;
case 'c':
*syntax = REG_SYNTAX_EMACS;
*syntax = ONIG_SYNTAX_EMACS;
break;
case 'r':
*syntax = REG_SYNTAX_RUBY;
*syntax = ONIG_SYNTAX_RUBY;
break;
case 'z':
*syntax = REG_SYNTAX_PERL;
*syntax = ONIG_SYNTAX_PERL;
break;
case 'b':
*syntax = REG_SYNTAX_POSIX_BASIC;
*syntax = ONIG_SYNTAX_POSIX_BASIC;
break;
case 'd':
*syntax = REG_SYNTAX_POSIX_EXTENDED;
*syntax = ONIG_SYNTAX_POSIX_EXTENDED;
break;
case 'e':
if (eval != NULL) *eval = 1;
@ -372,7 +472,7 @@ _php_mb_regex_init_options(const char *parg, int narg, php_mb_reg_option_type *o
PHP_FUNCTION(mb_regex_encoding)
{
zval **arg1;
php_mb_reg_char_encoding mbctype;
OnigEncoding mbctype;
if (ZEND_NUM_ARGS() == 0) {
const char *retval = php_mb_regex_mbctype2name(MBSTRG(current_mbctype));
@ -406,8 +506,9 @@ static void _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAMETERS, int icase)
char *string;
int string_len;
php_mb_regex_t *re;
php_mb_reg_region *regs = NULL;
int i, match_len, option, beg, end;
OnigRegion *regs = NULL;
int i, match_len, beg, end;
OnigOptionType options;
char *str;
array = NULL;
@ -416,9 +517,9 @@ static void _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAMETERS, int icase)
RETURN_FALSE;
}
option = MBSTRG(regex_default_options);
options = MBSTRG(regex_default_options);
if (icase) {
option |= RE_OPTION_IGNORECASE;
options |= ONIG_OPTION_IGNORECASE;
}
/* compile the regular expression from the supplied regex */
@ -433,16 +534,16 @@ static void _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAMETERS, int icase)
arg_pattern = &tmp;
/* don't bother doing an extended regex with just a number */
}
re = php_mbregex_compile_pattern(Z_STRVAL_P(arg_pattern), Z_STRLEN_P(arg_pattern), option, MBSTRG(current_mbctype), MBSTRG(regex_default_syntax) TSRMLS_CC);
re = php_mbregex_compile_pattern(Z_STRVAL_P(arg_pattern), Z_STRLEN_P(arg_pattern), options, MBSTRG(current_mbctype), MBSTRG(regex_default_syntax) TSRMLS_CC);
if (re == NULL) {
RETVAL_FALSE;
goto out;
}
regs = php_mb_regex_region_new();
regs = onig_region_new();
/* actually execute the regular expression */
if (php_mb_regex_search(re, (UChar *)string, (UChar *)(string + string_len), string, (UChar *)(string + string_len), regs, 0) < 0) {
if (onig_search(re, (UChar *)string, (UChar *)(string + string_len), string, (UChar *)(string + string_len), regs, 0) < 0) {
RETVAL_FALSE;
goto out;
}
@ -471,7 +572,7 @@ static void _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAMETERS, int icase)
RETVAL_LONG(match_len);
out:
if (regs != NULL) {
php_mb_regex_region_free(regs, 1);
onig_region_free(regs, 1);
}
if (arg_pattern == &tmp) {
zval_dtor(&tmp);
@ -496,7 +597,7 @@ PHP_FUNCTION(mb_eregi)
/* }}} */
/* {{{ _php_mb_regex_ereg_replace_exec */
static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, int option)
static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, OnigOptionType options)
{
zval *arg_pattern_zval;
@ -511,8 +612,8 @@ static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, int op
char *p;
php_mb_regex_t *re;
php_mb_reg_syntax_type *syntax;
php_mb_reg_region *regs = NULL;
OnigSyntaxType *syntax;
OnigRegion *regs = NULL;
smart_str out_buf = { 0 };
smart_str eval_buf = { 0 };
smart_str *pbuf;
@ -547,9 +648,9 @@ static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, int op
}
if (option_str != NULL) {
_php_mb_regex_init_options(option_str, option_str_len, &option, &syntax, &eval);
_php_mb_regex_init_options(option_str, option_str_len, &options, &syntax, &eval);
} else {
option |= MBSTRG(regex_default_options);
options |= MBSTRG(regex_default_options);
syntax = MBSTRG(regex_default_syntax);
}
}
@ -566,7 +667,7 @@ static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, int op
arg_pattern_len = 1;
}
/* create regex pattern buffer */
re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, MBSTRG(current_mbctype), syntax TSRMLS_CC);
re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, options, MBSTRG(current_mbctype), syntax TSRMLS_CC);
if (re == NULL) {
RETURN_FALSE;
}
@ -583,12 +684,12 @@ static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, int op
err = 0;
pos = string;
string_lim = (UChar*)(string + string_len);
regs = php_mb_regex_region_new();
regs = onig_region_new();
while (err >= 0) {
err = php_mb_regex_search(re, (UChar *)string, (UChar *)string_lim, pos, (UChar *)string_lim, regs, 0);
err = onig_search(re, (UChar *)string, (UChar *)string_lim, pos, (UChar *)string_lim, regs, 0);
if (err <= -2) {
UChar err_str[REG_MAX_ERROR_MESSAGE_LEN];
php_mb_regex_error_code_to_str(err_str, err);
UChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
onig_error_code_to_str(err_str, err);
php_error_docref(NULL TSRMLS_CC, E_WARNING, "mbregex search failure in php_mbereg_replace_exec(): %s", err_str);
break;
}
@ -651,14 +752,14 @@ static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, int op
smart_str_appendl(&out_buf, pos, string_lim - pos);
}
}
php_mb_regex_region_free(regs, 0);
onig_region_free(regs, 0);
}
if (description) {
efree(description);
}
if (regs != NULL) {
php_mb_regex_region_free(regs, 1);
onig_region_free(regs, 1);
}
smart_str_free(&eval_buf);
@ -684,7 +785,7 @@ PHP_FUNCTION(mb_ereg_replace)
Case insensitive replace regular expression for multibyte string */
PHP_FUNCTION(mb_eregi_replace)
{
_php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, RE_OPTION_IGNORECASE);
_php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, ONIG_OPTION_IGNORECASE);
}
/* }}} */
@ -695,7 +796,7 @@ PHP_FUNCTION(mb_split)
char *arg_pattern;
int arg_pattern_len;
php_mb_regex_t *re;
php_mb_reg_region *regs = NULL;
OnigRegion *regs = NULL;
char *string;
UChar *pos;
int string_len;
@ -720,10 +821,10 @@ PHP_FUNCTION(mb_split)
pos = (UChar *)string;
err = 0;
regs = php_mb_regex_region_new();
regs = onig_region_new();
/* churn through str, generating array entries as we go */
while ((--count != 0) &&
(err = php_mb_regex_search(re, (UChar *)string, (UChar *)(string + string_len), pos, (UChar *)(string + string_len), regs, 0)) >= 0) {
(err = onig_search(re, (UChar *)string, (UChar *)(string + string_len), pos, (UChar *)(string + string_len), regs, 0)) >= 0) {
if (regs->beg[0] == regs->end[0]) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty regular expression");
break;
@ -744,15 +845,15 @@ PHP_FUNCTION(mb_split)
if (count < 0) {
count = 0;
}
php_mb_regex_region_free(regs, 0);
onig_region_free(regs, 0);
}
php_mb_regex_region_free(regs, 1);
onig_region_free(regs, 1);
/* see if we encountered an error */
if (err <= -2) {
UChar err_str[REG_MAX_ERROR_MESSAGE_LEN];
php_mb_regex_error_code_to_str(err_str, err);
UChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
onig_error_code_to_str(err_str, err);
php_error_docref(NULL TSRMLS_CC, E_WARNING, "mbregex search failure in mbsplit(): %s", err_str);
zval_dtor(return_value);
RETURN_FALSE;
@ -779,7 +880,7 @@ PHP_FUNCTION(mb_ereg_match)
int string_len;
php_mb_regex_t *re;
php_mb_reg_syntax_type *syntax;
OnigSyntaxType *syntax;
int option = 0, err;
{
@ -805,7 +906,7 @@ PHP_FUNCTION(mb_ereg_match)
}
/* match */
err = php_mb_regex_match(re, (UChar *)string, (UChar *)(string + string_len), (UChar *)string, NULL, 0);
err = onig_match(re, (UChar *)string, (UChar *)(string + string_len), (UChar *)string, NULL, 0);
if (err >= 0) {
RETVAL_TRUE;
} else {
@ -822,7 +923,7 @@ _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAMETERS, int mode)
zval **arg_pattern, **arg_options;
int n, i, err, pos, len, beg, end, option;
UChar *str;
php_mb_reg_syntax_type *syntax;
OnigSyntaxType *syntax;
option = MBSTRG(regex_default_options);
switch (ZEND_NUM_ARGS()) {
@ -873,17 +974,17 @@ _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAMETERS, int mode)
}
if (MBSTRG(search_regs)) {
php_mb_regex_region_free(MBSTRG(search_regs), 1);
onig_region_free(MBSTRG(search_regs), 1);
}
MBSTRG(search_regs) = php_mb_regex_region_new();
MBSTRG(search_regs) = onig_region_new();
err = php_mb_regex_search(MBSTRG(search_re), str, str + len, str + pos, str + len, MBSTRG(search_regs), 0);
if (err == REG_MISMATCH) {
err = onig_search(MBSTRG(search_re), str, str + len, str + pos, str + len, MBSTRG(search_regs), 0);
if (err == ONIG_MISMATCH) {
MBSTRG(search_pos) = len;
RETVAL_FALSE;
} else if (err <= -2) {
UChar err_str[REG_MAX_ERROR_MESSAGE_LEN];
php_mb_regex_error_code_to_str(err_str, err);
UChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
onig_error_code_to_str(err_str, err);
php_error_docref(NULL TSRMLS_CC, E_WARNING, "mbregex search failure in mbregex_search(): %s", err_str);
RETVAL_FALSE;
} else {
@ -924,8 +1025,8 @@ _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAMETERS, int mode)
}
if (err < 0) {
php_mb_regex_region_free(MBSTRG(search_regs), 1);
MBSTRG(search_regs) = (php_mb_reg_region *)NULL;
onig_region_free(MBSTRG(search_regs), 1);
MBSTRG(search_regs) = (OnigRegion *)NULL;
}
}
/* }}} */
@ -959,7 +1060,7 @@ PHP_FUNCTION(mb_ereg_search_regs)
PHP_FUNCTION(mb_ereg_search_init)
{
zval **arg_str, **arg_pattern, **arg_options;
php_mb_reg_syntax_type *syntax = NULL;
OnigSyntaxType *syntax = NULL;
int option;
option = MBSTRG(regex_default_options);
@ -1008,8 +1109,8 @@ PHP_FUNCTION(mb_ereg_search_init)
MBSTRG(search_pos) = 0;
if (MBSTRG(search_regs) != NULL) {
php_mb_regex_region_free(MBSTRG(search_regs), 1);
MBSTRG(search_regs) = (php_mb_reg_region *) NULL;
onig_region_free(MBSTRG(search_regs), 1);
MBSTRG(search_regs) = (OnigRegion *) NULL;
}
RETURN_TRUE;
@ -1076,7 +1177,7 @@ PHP_FUNCTION(mb_ereg_search_setpos)
/* }}} */
/* {{{ php_mb_regex_set_options */
void php_mb_regex_set_options(php_mb_reg_option_type options, php_mb_reg_syntax_type *syntax, php_mb_reg_option_type *prev_options, php_mb_reg_syntax_type **prev_syntax TSRMLS_DC)
void php_mb_regex_set_options(OnigOptionType options, OnigSyntaxType *syntax, OnigOptionType *prev_options, OnigSyntaxType **prev_syntax TSRMLS_DC)
{
if (prev_options != NULL) {
*prev_options = MBSTRG(regex_default_options);
@ -1093,8 +1194,8 @@ void php_mb_regex_set_options(php_mb_reg_option_type options, php_mb_reg_syntax_
Set or get the default options for mbregex functions */
PHP_FUNCTION(mb_regex_set_options)
{
php_mb_reg_option_type opt;
php_mb_reg_syntax_type *syntax;
OnigOptionType opt;
OnigSyntaxType *syntax;
char *string = NULL;
int string_len;
char buf[16];

View file

@ -29,16 +29,16 @@
/* {{{ PHP_MBREGEX_GLOBALS */
#define PHP_MBREGEX_GLOBALS \
php_mb_reg_char_encoding default_mbctype; \
php_mb_reg_char_encoding current_mbctype; \
OnigEncoding default_mbctype; \
OnigEncoding current_mbctype; \
HashTable ht_rc; \
zval *search_str; \
zval *search_str_val; \
unsigned int search_pos; \
php_mb_regex_t *search_re; \
struct php_mb_re_registers *search_regs; \
int regex_default_options; \
php_mb_reg_syntax_type *regex_default_syntax;
OnigRegion *search_regs; \
OnigOptionType regex_default_options; \
OnigSyntaxType *regex_default_syntax;
/* }}} */
/* {{{ PHP_MBREGEX_FUNCTION_ENTRIES */
@ -83,10 +83,10 @@ PHP_MSHUTDOWN_FUNCTION(mb_regex);
PHP_RINIT_FUNCTION(mb_regex);
PHP_RSHUTDOWN_FUNCTION(mb_regex);
void _php_mb_regex_globals_ctor(zend_mbstring_globals_ptr pglobals TSRMLS_DC);
void php_mb_regex_set_options(php_mb_reg_option_type options, php_mb_reg_syntax_type *syntax, php_mb_reg_option_type *prev_options, php_mb_reg_syntax_type **prev_syntax TSRMLS_DC);
void php_mb_regex_set_options(OnigOptionType options, OnigSyntaxType *syntax, OnigOptionType *prev_options, OnigSyntaxType **prev_syntax TSRMLS_DC);
void _php_mb_regex_globals_dtor(zend_mbstring_globals_ptr pglobals TSRMLS_DC);
php_mb_reg_char_encoding php_mb_regex_name2mbctype(const char *pname);
const char *php_mb_regex_mbctype2name(php_mb_reg_char_encoding mbctype);
OnigEncoding php_mb_regex_name2mbctype(const char *pname);
const char *php_mb_regex_mbctype2name(OnigEncoding mbctype);
PHP_FUNCTION(mb_regex_encoding);
PHP_FUNCTION(mb_ereg);