Patch core for PCRE2 support

RFC https://wiki.php.net/rfc/pcre2-migration
This commit is contained in:
Anatol Belski 2017-10-12 12:48:36 +02:00
parent fd463cfbad
commit a5bc5aed71
186 changed files with 39246 additions and 125222 deletions

1
.gitignore vendored
View file

@ -225,3 +225,4 @@ ext/sqlite3/tests/phpsql*
!ext/fileinfo/libmagic.patch
!ext/mbstring/oniguruma.patch
!scripts/dev/generate-phpt/tests/*.php
!ext/pcre/pcre2lib/config.h

View file

@ -471,11 +471,11 @@ file_replace(struct magic_set *ms, const char *pat, const char *rep)
pcre_cache_entry *pce;
zend_string *res;
zend_string *repl;
int rep_cnt = 0;
size_t rep_cnt = 0;
(void)setlocale(LC_CTYPE, "C");
opts |= PCRE_MULTILINE;
opts |= PCRE2_MULTILINE;
convert_libmagic_pattern(&patt, (char*)pat, strlen(pat), opts);
if ((pce = pcre_get_compiled_regex_cache(Z_STR(patt))) == NULL) {
zval_ptr_dtor(&patt);

View file

@ -412,9 +412,9 @@ flush:
private int
check_fmt(struct magic_set *ms, struct magic *m)
{
pcre *pce;
int re_options, rv = -1;
pcre_extra *re_extra;
pcre2_code *pce;
uint32_t re_options, capture_count;
int rv = -1;
zend_string *pattern;
if (strchr(m->desc, '%') == NULL)
@ -422,10 +422,14 @@ check_fmt(struct magic_set *ms, struct magic *m)
(void)setlocale(LC_CTYPE, "C");
pattern = zend_string_init("~%[-0-9.]*s~", sizeof("~%[-0-9.]*s~") - 1, 0);
if ((pce = pcre_get_compiled_regex(pattern, &re_extra, &re_options)) == NULL) {
if ((pce = pcre_get_compiled_regex(pattern, &capture_count, &re_options)) == NULL) {
rv = -1;
} else {
rv = !pcre_exec(pce, re_extra, m->desc, strlen(m->desc), 0, re_options, NULL, 0);
pcre2_match_data *match_data = php_pcre_create_match_data(capture_count, pce);
if (match_data) {
rv = pcre2_match(pce, m->desc, strlen(m->desc), 0, re_options, match_data, php_pcre_mctx()) > 0;
php_pcre_free_match_data(match_data);
}
}
zend_string_release(pattern);
(void)setlocale(LC_CTYPE, "");
@ -1725,10 +1729,10 @@ convert_libmagic_pattern(zval *pattern, char *val, int len, int options)
}
ZSTR_VAL(t)[j++] = '~';
if (options & PCRE_CASELESS)
if (options & PCRE2_CASELESS)
ZSTR_VAL(t)[j++] = 'i';
if (options & PCRE_MULTILINE)
if (options & PCRE2_MULTILINE)
ZSTR_VAL(t)[j++] = 'm';
ZSTR_VAL(t)[j]='\0';
@ -1901,10 +1905,10 @@ magiccheck(struct magic_set *ms, struct magic *m)
int options = 0;
pcre_cache_entry *pce;
options |= PCRE_MULTILINE;
options |= PCRE2_MULTILINE;
if (m->str_flags & STRING_IGNORE_CASE) {
options |= PCRE_CASELESS;
options |= PCRE2_CASELESS;
}
convert_libmagic_pattern(&pattern, (char *)m->value.s, m->vallen, options);

View file

@ -428,11 +428,10 @@ void php_filter_validate_regexp(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
zval *option_val;
zend_string *regexp;
int regexp_set;
pcre *re = NULL;
pcre_extra *pcre_extra = NULL;
int preg_options = 0;
int ovector[3];
int matches;
pcre2_code *re = NULL;
pcre2_match_data *match_data = NULL;
uint32_t preg_options, capture_count;
int rc;
/* Parse options */
FETCH_STR_OPTION(regexp, "regexp");
@ -442,14 +441,19 @@ void php_filter_validate_regexp(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
RETURN_VALIDATION_FAILED
}
re = pcre_get_compiled_regex(regexp, &pcre_extra, &preg_options);
re = pcre_get_compiled_regex(regexp, &capture_count, &preg_options);
if (!re) {
RETURN_VALIDATION_FAILED
}
matches = pcre_exec(re, NULL, Z_STRVAL_P(value), (int)Z_STRLEN_P(value), 0, 0, ovector, 3);
match_data = php_pcre_create_match_data(capture_count, re);
if (!match_data) {
RETURN_VALIDATION_FAILED
}
rc = pcre2_match(re, Z_STRVAL_P(value), Z_STRLEN_P(value), 0, preg_options, match_data, php_pcre_mctx());
php_pcre_free_match_data(match_data);
/* 0 means that the vector is too small to hold all the captured substring offsets */
if (matches < 0) {
if (rc < 0) {
RETURN_VALIDATION_FAILED
}
}
@ -599,12 +603,11 @@ void php_filter_validate_email(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
* Feel free to use and redistribute this code. But please keep this copyright notice.
*
*/
pcre *re = NULL;
pcre_extra *pcre_extra = NULL;
int preg_options = 0;
int ovector[150]; /* Needs to be a multiple of 3 */
int matches;
pcre2_code *re = NULL;
pcre2_match_data *match_data = NULL;
uint32_t preg_options = 0, capture_count;
zend_string *sregexp;
int rc;
const char regexp0[] = "/^(?!(?:(?:\\x22?\\x5C[\\x00-\\x7E]\\x22?)|(?:\\x22?[^\\x5C\\x22]\\x22?)){255,})(?!(?:(?:\\x22?\\x5C[\\x00-\\x7E]\\x22?)|(?:\\x22?[^\\x5C\\x22]\\x22?)){65,}@)(?:(?:[\\x21\\x23-\\x27\\x2A\\x2B\\x2D\\x2F-\\x39\\x3D\\x3F\\x5E-\\x7E\\pL\\pN]+)|(?:\\x22(?:[\\x01-\\x08\\x0B\\x0C\\x0E-\\x1F\\x21\\x23-\\x5B\\x5D-\\x7F\\pL\\pN]|(?:\\x5C[\\x00-\\x7F]))*\\x22))(?:\\.(?:(?:[\\x21\\x23-\\x27\\x2A\\x2B\\x2D\\x2F-\\x39\\x3D\\x3F\\x5E-\\x7E\\pL\\pN]+)|(?:\\x22(?:[\\x01-\\x08\\x0B\\x0C\\x0E-\\x1F\\x21\\x23-\\x5B\\x5D-\\x7F\\pL\\pN]|(?:\\x5C[\\x00-\\x7F]))*\\x22)))*@(?:(?:(?!.*[^.]{64,})(?:(?:(?:xn--)?[a-z0-9]+(?:-+[a-z0-9]+)*\\.){1,126}){1,}(?:(?:[a-z][a-z0-9]*)|(?:(?:xn--)[a-z0-9]+))(?:-+[a-z0-9]+)*)|(?:\\[(?:(?:IPv6:(?:(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){7})|(?:(?!(?:.*[a-f0-9][:\\]]){7,})(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,5})?::(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,5})?)))|(?:(?:IPv6:(?:(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){5}:)|(?:(?!(?:.*[a-f0-9]:){5,})(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,3})?::(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,3}:)?)))?(?:(?:25[0-5])|(?:2[0-4][0-9])|(?:1[0-9]{2})|(?:[1-9]?[0-9]))(?:\\.(?:(?:25[0-5])|(?:2[0-4][0-9])|(?:1[0-9]{2})|(?:[1-9]?[0-9]))){3}))\\]))$/iDu";
const char regexp1[] = "/^(?!(?:(?:\\x22?\\x5C[\\x00-\\x7E]\\x22?)|(?:\\x22?[^\\x5C\\x22]\\x22?)){255,})(?!(?:(?:\\x22?\\x5C[\\x00-\\x7E]\\x22?)|(?:\\x22?[^\\x5C\\x22]\\x22?)){65,}@)(?:(?:[\\x21\\x23-\\x27\\x2A\\x2B\\x2D\\x2F-\\x39\\x3D\\x3F\\x5E-\\x7E]+)|(?:\\x22(?:[\\x01-\\x08\\x0B\\x0C\\x0E-\\x1F\\x21\\x23-\\x5B\\x5D-\\x7F]|(?:\\x5C[\\x00-\\x7F]))*\\x22))(?:\\.(?:(?:[\\x21\\x23-\\x27\\x2A\\x2B\\x2D\\x2F-\\x39\\x3D\\x3F\\x5E-\\x7E]+)|(?:\\x22(?:[\\x01-\\x08\\x0B\\x0C\\x0E-\\x1F\\x21\\x23-\\x5B\\x5D-\\x7F]|(?:\\x5C[\\x00-\\x7F]))*\\x22)))*@(?:(?:(?!.*[^.]{64,})(?:(?:(?:xn--)?[a-z0-9]+(?:-+[a-z0-9]+)*\\.){1,126}){1,}(?:(?:[a-z][a-z0-9]*)|(?:(?:xn--)[a-z0-9]+))(?:-+[a-z0-9]+)*)|(?:\\[(?:(?:IPv6:(?:(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){7})|(?:(?!(?:.*[a-f0-9][:\\]]){7,})(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,5})?::(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,5})?)))|(?:(?:IPv6:(?:(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){5}:)|(?:(?!(?:.*[a-f0-9]:){5,})(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,3})?::(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,3}:)?)))?(?:(?:25[0-5])|(?:2[0-4][0-9])|(?:1[0-9]{2})|(?:[1-9]?[0-9]))(?:\\.(?:(?:25[0-5])|(?:2[0-4][0-9])|(?:1[0-9]{2})|(?:[1-9]?[0-9]))){3}))\\]))$/iD";
const char *regexp;
@ -624,16 +627,21 @@ void php_filter_validate_email(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
}
sregexp = zend_string_init(regexp, regexp_len, 0);
re = pcre_get_compiled_regex(sregexp, &pcre_extra, &preg_options);
re = pcre_get_compiled_regex(sregexp, &capture_count, &preg_options);
if (!re) {
zend_string_release(sregexp);
RETURN_VALIDATION_FAILED
}
zend_string_release(sregexp);
matches = pcre_exec(re, NULL, Z_STRVAL_P(value), (int)Z_STRLEN_P(value), 0, 0, ovector, 3);
match_data = php_pcre_create_match_data(capture_count, re);
if (!match_data) {
RETURN_VALIDATION_FAILED
}
rc = pcre2_match(re, Z_STRVAL_P(value), Z_STRLEN_P(value), 0, preg_options, match_data, php_pcre_mctx());
php_pcre_free_match_data(match_data);
/* 0 means that the vector is too small to hold all the captured substring offsets */
if (matches < 0) {
if (rc < 0) {
RETURN_VALIDATION_FAILED
}

View file

@ -43,7 +43,7 @@
#define ZEND_BLACKLIST_BLOCK_SIZE 32
struct _zend_regexp_list {
pcre *re;
pcre2_code *re;
zend_regexp_list *next;
};
@ -73,10 +73,12 @@ static void blacklist_report_regexp_error(const char *pcre_error, int pcre_error
static void zend_accel_blacklist_update_regexp(zend_blacklist *blacklist)
{
const char *pcre_error;
int i, pcre_error_offset;
PCRE2_UCHAR pcre_error[256];
int i, errnumber;
PCRE2_SIZE pcre_error_offset;
zend_regexp_list **regexp_list_it, *it;
char regexp[12*1024], *p, *end, *c, *backtrack = NULL;
pcre2_compile_context *cctx = php_pcre_cctx();
if (blacklist->pos == 0) {
/* we have no blacklist to talk about */
@ -177,8 +179,9 @@ static void zend_accel_blacklist_update_regexp(zend_blacklist *blacklist)
}
it->next = NULL;
if ((it->re = pcre_compile(regexp, PCRE_NO_AUTO_CAPTURE, &pcre_error, &pcre_error_offset, 0)) == NULL) {
if ((it->re = pcre2_compile(regexp, PCRE2_ZERO_TERMINATED, PCRE2_NO_AUTO_CAPTURE, &errnumber, &pcre_error_offset, cctx)) == NULL) {
free(it);
pcre2_get_error_message(errnumber, pcre_error, sizeof(pcre_error));
blacklist_report_regexp_error(pcre_error, pcre_error_offset);
return;
}
@ -207,7 +210,7 @@ void zend_accel_blacklist_shutdown(zend_blacklist *blacklist)
if (blacklist->regexp_list) {
zend_regexp_list *temp, *it = blacklist->regexp_list;
while (it) {
pcre_free(it->re);
pcre2_code_free(it->re);
temp = it;
it = it->next;
free(temp);
@ -338,15 +341,24 @@ zend_bool zend_accel_blacklist_is_blacklisted(zend_blacklist *blacklist, char *v
{
int ret = 0;
zend_regexp_list *regexp_list_it = blacklist->regexp_list;
pcre2_match_context *mctx = php_pcre_mctx();
if (regexp_list_it == NULL) {
return 0;
}
while (regexp_list_it != NULL) {
if (pcre_exec(regexp_list_it->re, NULL, verify_path, strlen(verify_path), 0, 0, NULL, 0) >= 0) {
pcre2_match_data *match_data = php_pcre_create_match_data(0, regexp_list_it->re);
if (!match_data) {
/* Alloc failed, but next one could still come through and match. */
continue;
}
int rc = pcre2_match(regexp_list_it->re, verify_path, strlen(verify_path), 0, 0, match_data, mctx);
if (rc >= 0) {
ret = 1;
php_pcre_free_match_data(match_data);
break;
}
php_pcre_free_match_data(match_data);
regexp_list_it = regexp_list_it->next;
}
return ret;

View file

@ -2,14 +2,16 @@
// vim:ft=javascript
EXTENSION("pcre", "php_pcre.c", false /* never shared */,
"-Iext/pcre/pcrelib -DZEND_ENABLE_STATIC_TSRMLS_CACHE=1");
ADD_SOURCES("ext/pcre/pcrelib", "pcre_chartables.c pcre_ucd.c pcre_compile.c pcre_config.c pcre_exec.c pcre_fullinfo.c pcre_get.c pcre_globals.c pcre_maketables.c pcre_newline.c pcre_ord2utf8.c pcre_refcount.c pcre_study.c pcre_tables.c pcre_valid_utf8.c pcre_version.c pcre_xclass.c pcre_jit_compile.c", "pcre");
"-Iext/pcre/pcre2lib -DZEND_ENABLE_STATIC_TSRMLS_CACHE=1");
ADD_SOURCES("ext/pcre/pcre2lib", "pcre2_auto_possess.c pcre2_chartables.c pcre2_compile.c pcre2_config.c pcre2_context.c pcre2_dfa_match.c pcre2_error.c pcre2_jit_compile.c pcre2_maketables.c pcre2_match.c pcre2_match_data.c pcre2_newline.c pcre2_ord2utf.c pcre2_pattern_info.c pcre2_serialize.c pcre2_string_utils.c pcre2_study.c pcre2_substitute.c pcre2_substring.c pcre2_tables.c pcre2_ucd.c pcre2_valid_utf.c pcre2_xclass.c pcre2_find_bracket.c pcre2_convert.c ", "pcre");
ADD_DEF_FILE("ext\\pcre\\php_pcre.def");
AC_DEFINE('HAVE_BUNDLED_PCRE', 1, 'Using bundled PCRE library');
AC_DEFINE('HAVE_PCRE', 1, 'Have PCRE library');
AC_DEFINE('PCRE2_CODE_UNIT_WIDTH', 8, 'Have PCRE library');
AC_DEFINE("PCRE2_STATIC", 1, "");
PHP_PCRE="yes";
PHP_INSTALL_HEADERS("ext/pcre", "php_pcre.h pcrelib/");
PHP_INSTALL_HEADERS("ext/pcre", "php_pcre.h pcre2lib/");
ADD_FLAG("CFLAGS_PCRE", " /D HAVE_CONFIG_H");
ARG_WITH("pcre-jit", "Enable PCRE JIT support", "yes");

View file

@ -14,66 +14,67 @@ PHP_ARG_WITH(pcre-jit,,[ --with-pcre-jit Enable PCRE JIT functionality
if test "$PHP_PCRE_REGEX" != "yes" && test "$PHP_PCRE_REGEX" != "no"; then
AC_MSG_CHECKING([for PCRE headers location])
for i in $PHP_PCRE_REGEX $PHP_PCRE_REGEX/include $PHP_PCRE_REGEX/include/pcre $PHP_PCRE_REGEX/local/include; do
test -f $i/pcre.h && PCRE_INCDIR=$i
test -f $i/pcre2.h && PCRE_INCDIR=$i
done
if test -z "$PCRE_INCDIR"; then
AC_MSG_ERROR([Could not find pcre.h in $PHP_PCRE_REGEX])
AC_MSG_ERROR([Could not find pcre2.h in $PHP_PCRE_REGEX])
fi
AC_MSG_RESULT([$PCRE_INCDIR])
AC_MSG_CHECKING([for PCRE library location])
for j in $PHP_PCRE_REGEX $PHP_PCRE_REGEX/$PHP_LIBDIR; do
test -f $j/libpcre.a || test -f $j/libpcre.$SHLIB_SUFFIX_NAME && PCRE_LIBDIR=$j
test -f $j/libpcre2.a || test -f $j/libpcre2.$SHLIB_SUFFIX_NAME && PCRE_LIBDIR=$j
done
if test -z "$PCRE_LIBDIR" ; then
AC_MSG_ERROR([Could not find libpcre.(a|$SHLIB_SUFFIX_NAME) in $PHP_PCRE_REGEX])
AC_MSG_ERROR([Could not find libpcre2.(a|$SHLIB_SUFFIX_NAME) in $PHP_PCRE_REGEX])
fi
AC_MSG_RESULT([$PCRE_LIBDIR])
changequote({,})
pcre_major=`grep PCRE_MAJOR $PCRE_INCDIR/pcre.h | sed -e 's/[^0-9]//g'`
pcre_minor=`grep PCRE_MINOR $PCRE_INCDIR/pcre.h | sed -e 's/[^0-9]//g'`
pcre_major=`grep PCRE2_MAJOR $PCRE_INCDIR/pcre2.h | sed -e 's/[^0-9]//g'`
pcre_minor=`grep PCRE2_MINOR $PCRE_INCDIR/pcre2.h | sed -e 's/[^0-9]//g'`
changequote([,])
pcre_minor_length=`echo "$pcre_minor" | wc -c | sed -e 's/[^0-9]//g'`
if test "$pcre_minor_length" -eq 2 ; then
pcre_minor="$pcre_minor"0
fi
pcre_version=$pcre_major$pcre_minor
if test "$pcre_version" -lt 660; then
AC_MSG_ERROR([The PCRE extension requires PCRE library version >= 6.6])
if test "$pcre_version" -lt 1030; then
AC_MSG_ERROR([The PCRE extension requires PCRE library version >= 10.30])
fi
PHP_CHECK_LIBRARY(pcre, pcre_jit_exec,
PHP_CHECK_LIBRARY(pcre2, pcre2_jit_exec,
[
AC_DEFINE(HAVE_PCRE_JIT_SUPPORT, 1, [ ])
],[
],[
-L$PCRE_LIBDIR
])
PHP_ADD_LIBRARY_WITH_PATH(pcre, $PCRE_LIBDIR)
PHP_ADD_LIBRARY_WITH_PATH(pcre2, $PCRE_LIBDIR)
AC_DEFINE(HAVE_PCRE, 1, [ ])
AC_DEFINE(PCRE2_CODE_UNIT_WIDTH, 8, [ ])
PHP_ADD_INCLUDE($PCRE_INCDIR)
PHP_NEW_EXTENSION(pcre, php_pcre.c, no,, -DZEND_ENABLE_STATIC_TSRMLS_CACHE=1)
PHP_INSTALL_HEADERS([ext/pcre], [php_pcre.h])
else
AC_MSG_CHECKING([for PCRE library to use])
AC_MSG_RESULT([bundled])
pcrelib_sources="pcrelib/pcre_chartables.c pcrelib/pcre_ucd.c \
pcrelib/pcre_compile.c pcrelib/pcre_config.c pcrelib/pcre_exec.c \
pcrelib/pcre_fullinfo.c pcrelib/pcre_get.c pcrelib/pcre_globals.c \
pcrelib/pcre_maketables.c pcrelib/pcre_newline.c \
pcrelib/pcre_ord2utf8.c pcrelib/pcre_refcount.c pcrelib/pcre_study.c \
pcrelib/pcre_tables.c pcrelib/pcre_valid_utf8.c \
pcrelib/pcre_version.c pcrelib/pcre_xclass.c \
pcrelib/pcre_jit_compile.c"
PHP_PCRE_CFLAGS="-DHAVE_CONFIG_H -I@ext_srcdir@/pcrelib -DZEND_ENABLE_STATIC_TSRMLS_CACHE=1"
pcrelib_sources="pcre2lib/pcre2_auto_possess.c pcre2lib/pcre2_chartables.c pcre2lib/pcre2_compile.c \
pcre2lib/pcre2_config.c pcre2lib/pcre2_context.c pcre2lib/pcre2_dfa_match.c pcre2lib/pcre2_error.c \
pcre2lib/pcre2_jit_compile.c pcre2lib/pcre2_maketables.c pcre2lib/pcre2_match.c pcre2lib/pcre2_match_data.c \
pcre2lib/pcre2_newline.c pcre2lib/pcre2_ord2utf.c pcre2lib/pcre2_pattern_info.c pcre2lib/pcre2_serialize.c \
pcre2lib/pcre2_string_utils.c pcre2lib/pcre2_study.c pcre2lib/pcre2_substitute.c pcre2lib/pcre2_substring.c \
pcre2lib/pcre2_tables.c pcre2lib/pcre2_ucd.c pcre2lib/pcre2_valid_utf.c pcre2lib/pcre2_xclass.c \
pcre2lib/pcre2_find_bracket.c pcre2lib/pcre2_convert.c"
PHP_PCRE_CFLAGS="-DHAVE_CONFIG_H -I@ext_srcdir@/pcre2lib -DZEND_ENABLE_STATIC_TSRMLS_CACHE=1"
PHP_NEW_EXTENSION(pcre, $pcrelib_sources php_pcre.c, no,,$PHP_PCRE_CFLAGS)
PHP_ADD_BUILD_DIR($ext_builddir/pcrelib)
PHP_INSTALL_HEADERS([ext/pcre], [php_pcre.h pcrelib/])
PHP_ADD_BUILD_DIR($ext_builddir/pcre2lib)
PHP_INSTALL_HEADERS([ext/pcre], [php_pcre.h pcre2lib/])
AC_DEFINE(HAVE_BUNDLED_PCRE, 1, [ ])
AC_DEFINE(PCRE2_CODE_UNIT_WIDTH, 8, [ ])
if test "$PHP_PCRE_REGEX" != "no"; then
AC_MSG_CHECKING([whether to enable PCRE JIT functionality])

105
ext/pcre/pcre2lib/config.h Normal file
View file

@ -0,0 +1,105 @@
#include <php_compat.h>
#ifdef PHP_WIN32
# include <config.w32.h>
#else
# include <php_config.h>
#endif
#undef PACKAGE_NAME
#undef PACKAGE_VERSION
#undef PACKAGE_TARNAME
#undef PACKAGE_STRING
#define SUPPORT_UNICODE 1
#define SUPPORT_PCRE2_8 1
#if defined(__GNUC__) && __GNUC__ >= 4
# ifdef __cplusplus
# define PCRE2_EXP_DECL extern "C" __attribute__ ((visibility("default")))
# else
# define PCRE2_EXP_DECL extern __attribute__ ((visibility("default")))
# endif
# define PCRE2_EXP_DEFN __attribute__ ((visibility("default")))
#endif
/* Define to any value for valgrind support to find invalid memory reads. */
#if HAVE_PCRE_VALGRIND_SUPPORT
#define SUPPORT_VALGRIND 1
#endif
/* Define to any value to enable support for Just-In-Time compiling. */
#if HAVE_PCRE_JIT_SUPPORT
#define SUPPORT_JIT
#endif
/* This limits the amount of memory that pcre2_match() may use while matching
a pattern. The value is in kilobytes. */
#ifndef HEAP_LIMIT
#define HEAP_LIMIT 20000000
#endif
/* The value of PARENS_NEST_LIMIT specifies the maximum depth of nested
parentheses (of any kind) in a pattern. This limits the amount of system
stack that is used while compiling a pattern. */
#ifndef PARENS_NEST_LIMIT
#define PARENS_NEST_LIMIT 250
#endif
/* The value of MATCH_LIMIT determines the default number of times the
pcre2_match() function can record a backtrack position during a single
matching attempt. There is a runtime interface for setting a different
limit. The limit exists in order to catch runaway regular expressions that
take for ever to determine that they do not match. The default is set very
large so that it does not accidentally catch legitimate cases. */
#ifndef MATCH_LIMIT
#define MATCH_LIMIT 10000000
#endif
/* The above limit applies to all backtracks, whether or not they are nested.
In some environments it is desirable to limit the nesting of backtracking
(that is, the depth of tree that is searched) more strictly, in order to
restrict the maximum amount of heap memory that is used. The value of
MATCH_LIMIT_DEPTH provides this facility. To have any useful effect, it
must be less than the value of MATCH_LIMIT. The default is to use the same
value as MATCH_LIMIT. There is a runtime method for setting a different
limit. */
#ifndef MATCH_LIMIT_DEPTH
#define MATCH_LIMIT_DEPTH MATCH_LIMIT
#endif
/* This limit is parameterized just in case anybody ever wants to change it.
Care must be taken if it is increased, because it guards against integer
overflow caused by enormously large patterns. */
#ifndef MAX_NAME_COUNT
#define MAX_NAME_COUNT 10000
#endif
/* This limit is parameterized just in case anybody ever wants to change it.
Care must be taken if it is increased, because it guards against integer
overflow caused by enormously large patterns. */
#ifndef MAX_NAME_SIZE
#define MAX_NAME_SIZE 32
#endif
/* Defining NEVER_BACKSLASH_C locks out the use of \C in all patterns. */
/* #undef NEVER_BACKSLASH_C */
/* The value of NEWLINE_DEFAULT determines the default newline character
sequence. PCRE2 client programs can override this by selecting other values
at run time. The valid values are 1 (CR), 2 (LF), 3 (CRLF), 4 (ANY), 5
(ANYCRLF), and 6 (NUL). */
#ifndef NEWLINE_DEFAULT
#define NEWLINE_DEFAULT 2
#endif
/* The value of LINK_SIZE determines the number of bytes used to store links
as offsets within the compiled regex. The default is 2, which allows for
compiled patterns up to 64K long. This covers the vast majority of cases.
However, PCRE2 can also be compiled to use 3 or 4 bytes instead. This
allows for longer patterns in extreme cases. */
#ifndef LINK_SIZE
#define LINK_SIZE 2
#endif

View file

@ -6,7 +6,8 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2012 University of Cambridge
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@ -39,9 +40,9 @@ POSSIBILITY OF SUCH DAMAGE.
/* This is a freestanding support program to generate a file containing
character tables for PCRE. The tables are built according to the current
locale. Now that pcre_maketables is a function visible to the outside world, we
make use of its code from here in order to be consistent. */
character tables for PCRE2. The tables are built according to the current
locale using the pcre2_maketables() function, which is part of the PCRE2 API.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
@ -52,11 +53,11 @@ make use of its code from here in order to be consistent. */
#include <string.h>
#include <locale.h>
#include "pcre_internal.h"
#define DFTABLES /* pcre_maketables.c notices this */
#include "pcre_maketables.c"
#define PCRE2_CODE_UNIT_WIDTH 0 /* Must be set, but not relevant here */
#include "pcre2_internal.h"
#define DFTABLES /* pcre2_maketables.c notices this */
#include "pcre2_maketables.c"
int main(int argc, char **argv)
{
@ -81,7 +82,7 @@ if (argc < i + 1)
return 1;
}
tables = pcre_maketables();
tables = maketables();
base_of_tables = tables;
f = fopen(argv[i], "wb");
@ -100,15 +101,8 @@ fprintf(f,
"*************************************************/\n\n"
"/* This file was automatically written by the dftables auxiliary\n"
"program. It contains character tables that are used when no external\n"
"tables are passed to PCRE by the application that calls it. The tables\n"
"are used only for characters whose code values are less than 256.\n\n");
fprintf(f,
"The following #includes are present because without them gcc 4.x may remove\n"
"the array definition from the final binary if PCRE is built into a static\n"
"library and dead code stripping is activated. This leads to link errors.\n"
"Pulling in the header ensures that the array gets flagged as \"someone\n"
"outside this compilation unit might reference this\" and so it will always\n"
"be supplied to the linker. */\n\n");
"tables are passed to PCRE2 by the application that calls it. The tables\n"
"are used only for characters whose code values are less than 256. */\n\n");
/* Force config.h in z/OS */
@ -120,14 +114,22 @@ fprintf(f,
"#endif\n\n");
#endif
fprintf(f,
"/* The following #includes are present because without them gcc 4.x may remove\n"
"the array definition from the final binary if PCRE2 is built into a static\n"
"library and dead code stripping is activated. This leads to link errors.\n"
"Pulling in the header ensures that the array gets flagged as \"someone\n"
"outside this compilation unit might reference this\" and so it will always\n"
"be supplied to the linker. */\n\n");
fprintf(f,
"#ifdef HAVE_CONFIG_H\n"
"#include \"config.h\"\n"
"#endif\n\n"
"#include \"pcre_internal.h\"\n\n");
"#include \"pcre2_internal.h\"\n\n");
fprintf(f,
"const pcre_uint8 PRIV(default_tables)[] = {\n\n"
"const uint8_t PRIV(default_tables)[] = {\n\n"
"/* This table is a lower casing table. */\n\n");
fprintf(f, " ");
@ -202,7 +204,7 @@ if (isprint(i-8)) fprintf(f, " %c -", i-8);
else fprintf(f, "%3d-", i-8);
if (isprint(i-1)) fprintf(f, " %c ", i-1);
else fprintf(f, "%3d", i-1);
fprintf(f, " */\n\n/* End of pcre_chartables.c */\n");
fprintf(f, " */\n\n/* End of pcre2_chartables.c */\n");
fclose(f);
free((void *)base_of_tables);

850
ext/pcre/pcre2lib/pcre2.h Normal file
View file

@ -0,0 +1,850 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* This is the public header file for the PCRE library, second API, to be
#included by applications that call PCRE2 functions.
Copyright (c) 2016-2017 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
#ifndef PCRE2_H_IDEMPOTENT_GUARD
#define PCRE2_H_IDEMPOTENT_GUARD
/* The current PCRE version information. */
#define PCRE2_MAJOR 10
#define PCRE2_MINOR 30
#define PCRE2_PRERELEASE
#define PCRE2_DATE 2017-08-14
/* When an application links to a PCRE DLL in Windows, the symbols that are
imported have to be identified as such. When building PCRE2, the appropriate
export setting is defined in pcre2_internal.h, which includes this file. So we
don't change existing definitions of PCRE2_EXP_DECL. */
#if defined(_WIN32) && !defined(PCRE2_STATIC)
# ifndef PCRE2_EXP_DECL
# define PCRE2_EXP_DECL extern __declspec(dllimport)
# endif
#endif
/* By default, we use the standard "extern" declarations. */
#ifndef PCRE2_EXP_DECL
# ifdef __cplusplus
# define PCRE2_EXP_DECL extern "C"
# else
# define PCRE2_EXP_DECL extern
# endif
#endif
/* When compiling with the MSVC compiler, it is sometimes necessary to include
a "calling convention" before exported function names. (This is secondhand
information; I know nothing about MSVC myself). For example, something like
void __cdecl function(....)
might be needed. In order so make this easy, all the exported functions have
PCRE2_CALL_CONVENTION just before their names. It is rarely needed; if not
set, we ensure here that it has no effect. */
#ifndef PCRE2_CALL_CONVENTION
#define PCRE2_CALL_CONVENTION
#endif
/* Have to include limits.h, stdlib.h and stdint.h to ensure that size_t and
uint8_t, UCHAR_MAX, etc are defined. */
#include <limits.h>
#include <stdlib.h>
#include <stdint.h>
/* Allow for C++ users compiling this directly. */
#ifdef __cplusplus
extern "C" {
#endif
/* The following option bits can be passed to pcre2_compile(), pcre2_match(),
or pcre2_dfa_match(). PCRE2_NO_UTF_CHECK affects only the function to which it
is passed. Put these bits at the most significant end of the options word so
others can be added next to them */
#define PCRE2_ANCHORED 0x80000000u
#define PCRE2_NO_UTF_CHECK 0x40000000u
#define PCRE2_ENDANCHORED 0x20000000u
/* The following option bits can be passed only to pcre2_compile(). However,
they may affect compilation, JIT compilation, and/or interpretive execution.
The following tags indicate which:
C alters what is compiled by pcre2_compile()
J alters what is compiled by pcre2_jit_compile()
M is inspected during pcre2_match() execution
D is inspected during pcre2_dfa_match() execution
*/
#define PCRE2_ALLOW_EMPTY_CLASS 0x00000001u /* C */
#define PCRE2_ALT_BSUX 0x00000002u /* C */
#define PCRE2_AUTO_CALLOUT 0x00000004u /* C */
#define PCRE2_CASELESS 0x00000008u /* C */
#define PCRE2_DOLLAR_ENDONLY 0x00000010u /* J M D */
#define PCRE2_DOTALL 0x00000020u /* C */
#define PCRE2_DUPNAMES 0x00000040u /* C */
#define PCRE2_EXTENDED 0x00000080u /* C */
#define PCRE2_FIRSTLINE 0x00000100u /* J M D */
#define PCRE2_MATCH_UNSET_BACKREF 0x00000200u /* C J M */
#define PCRE2_MULTILINE 0x00000400u /* C */
#define PCRE2_NEVER_UCP 0x00000800u /* C */
#define PCRE2_NEVER_UTF 0x00001000u /* C */
#define PCRE2_NO_AUTO_CAPTURE 0x00002000u /* C */
#define PCRE2_NO_AUTO_POSSESS 0x00004000u /* C */
#define PCRE2_NO_DOTSTAR_ANCHOR 0x00008000u /* C */
#define PCRE2_NO_START_OPTIMIZE 0x00010000u /* J M D */
#define PCRE2_UCP 0x00020000u /* C J M D */
#define PCRE2_UNGREEDY 0x00040000u /* C */
#define PCRE2_UTF 0x00080000u /* C J M D */
#define PCRE2_NEVER_BACKSLASH_C 0x00100000u /* C */
#define PCRE2_ALT_CIRCUMFLEX 0x00200000u /* J M D */
#define PCRE2_ALT_VERBNAMES 0x00400000u /* C */
#define PCRE2_USE_OFFSET_LIMIT 0x00800000u /* J M D */
#define PCRE2_EXTENDED_MORE 0x01000000u /* C */
#define PCRE2_LITERAL 0x02000000u /* C */
/* An additional compile options word is available in the compile context. */
#define PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES 0x00000001u /* C */
#define PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL 0x00000002u /* C */
#define PCRE2_EXTRA_MATCH_WORD 0x00000004u /* C */
#define PCRE2_EXTRA_MATCH_LINE 0x00000008u /* C */
/* These are for pcre2_jit_compile(). */
#define PCRE2_JIT_COMPLETE 0x00000001u /* For full matching */
#define PCRE2_JIT_PARTIAL_SOFT 0x00000002u
#define PCRE2_JIT_PARTIAL_HARD 0x00000004u
/* These are for pcre2_match(), pcre2_dfa_match(), and pcre2_jit_match(). Note
that PCRE2_ANCHORED and PCRE2_NO_UTF_CHECK can also be passed to these
functions (though pcre2_jit_match() ignores the latter since it bypasses all
sanity checks). */
#define PCRE2_NOTBOL 0x00000001u
#define PCRE2_NOTEOL 0x00000002u
#define PCRE2_NOTEMPTY 0x00000004u /* ) These two must be kept */
#define PCRE2_NOTEMPTY_ATSTART 0x00000008u /* ) adjacent to each other. */
#define PCRE2_PARTIAL_SOFT 0x00000010u
#define PCRE2_PARTIAL_HARD 0x00000020u
/* These are additional options for pcre2_dfa_match(). */
#define PCRE2_DFA_RESTART 0x00000040u
#define PCRE2_DFA_SHORTEST 0x00000080u
/* These are additional options for pcre2_substitute(), which passes any others
through to pcre2_match(). */
#define PCRE2_SUBSTITUTE_GLOBAL 0x00000100u
#define PCRE2_SUBSTITUTE_EXTENDED 0x00000200u
#define PCRE2_SUBSTITUTE_UNSET_EMPTY 0x00000400u
#define PCRE2_SUBSTITUTE_UNKNOWN_UNSET 0x00000800u
#define PCRE2_SUBSTITUTE_OVERFLOW_LENGTH 0x00001000u
/* A further option for pcre2_match(), not allowed for pcre2_dfa_match(),
ignored for pcre2_jit_match(). */
#define PCRE2_NO_JIT 0x00002000u
/* Options for pcre2_pattern_convert(). */
#define PCRE2_CONVERT_UTF 0x00000001u
#define PCRE2_CONVERT_NO_UTF_CHECK 0x00000002u
#define PCRE2_CONVERT_POSIX_BASIC 0x00000004u
#define PCRE2_CONVERT_POSIX_EXTENDED 0x00000008u
#define PCRE2_CONVERT_GLOB 0x00000010u
#define PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR 0x00000030u
#define PCRE2_CONVERT_GLOB_NO_STARSTAR 0x00000050u
/* Newline and \R settings, for use in compile contexts. The newline values
must be kept in step with values set in config.h and both sets must all be
greater than zero. */
#define PCRE2_NEWLINE_CR 1
#define PCRE2_NEWLINE_LF 2
#define PCRE2_NEWLINE_CRLF 3
#define PCRE2_NEWLINE_ANY 4
#define PCRE2_NEWLINE_ANYCRLF 5
#define PCRE2_NEWLINE_NUL 6
#define PCRE2_BSR_UNICODE 1
#define PCRE2_BSR_ANYCRLF 2
/* Error codes: no match and partial match are "expected" errors. */
#define PCRE2_ERROR_NOMATCH (-1)
#define PCRE2_ERROR_PARTIAL (-2)
/* Error codes for UTF-8 validity checks */
#define PCRE2_ERROR_UTF8_ERR1 (-3)
#define PCRE2_ERROR_UTF8_ERR2 (-4)
#define PCRE2_ERROR_UTF8_ERR3 (-5)
#define PCRE2_ERROR_UTF8_ERR4 (-6)
#define PCRE2_ERROR_UTF8_ERR5 (-7)
#define PCRE2_ERROR_UTF8_ERR6 (-8)
#define PCRE2_ERROR_UTF8_ERR7 (-9)
#define PCRE2_ERROR_UTF8_ERR8 (-10)
#define PCRE2_ERROR_UTF8_ERR9 (-11)
#define PCRE2_ERROR_UTF8_ERR10 (-12)
#define PCRE2_ERROR_UTF8_ERR11 (-13)
#define PCRE2_ERROR_UTF8_ERR12 (-14)
#define PCRE2_ERROR_UTF8_ERR13 (-15)
#define PCRE2_ERROR_UTF8_ERR14 (-16)
#define PCRE2_ERROR_UTF8_ERR15 (-17)
#define PCRE2_ERROR_UTF8_ERR16 (-18)
#define PCRE2_ERROR_UTF8_ERR17 (-19)
#define PCRE2_ERROR_UTF8_ERR18 (-20)
#define PCRE2_ERROR_UTF8_ERR19 (-21)
#define PCRE2_ERROR_UTF8_ERR20 (-22)
#define PCRE2_ERROR_UTF8_ERR21 (-23)
/* Error codes for UTF-16 validity checks */
#define PCRE2_ERROR_UTF16_ERR1 (-24)
#define PCRE2_ERROR_UTF16_ERR2 (-25)
#define PCRE2_ERROR_UTF16_ERR3 (-26)
/* Error codes for UTF-32 validity checks */
#define PCRE2_ERROR_UTF32_ERR1 (-27)
#define PCRE2_ERROR_UTF32_ERR2 (-28)
/* Error codes for pcre2[_dfa]_match(), substring extraction functions, context
functions, and serializing functions. They are in numerical order. Originally
they were in alphabetical order too, but now that PCRE2 is released, the
numbers must not be changed. */
#define PCRE2_ERROR_BADDATA (-29)
#define PCRE2_ERROR_MIXEDTABLES (-30) /* Name was changed */
#define PCRE2_ERROR_BADMAGIC (-31)
#define PCRE2_ERROR_BADMODE (-32)
#define PCRE2_ERROR_BADOFFSET (-33)
#define PCRE2_ERROR_BADOPTION (-34)
#define PCRE2_ERROR_BADREPLACEMENT (-35)
#define PCRE2_ERROR_BADUTFOFFSET (-36)
#define PCRE2_ERROR_CALLOUT (-37) /* Never used by PCRE2 itself */
#define PCRE2_ERROR_DFA_BADRESTART (-38)
#define PCRE2_ERROR_DFA_RECURSE (-39)
#define PCRE2_ERROR_DFA_UCOND (-40)
#define PCRE2_ERROR_DFA_UFUNC (-41)
#define PCRE2_ERROR_DFA_UITEM (-42)
#define PCRE2_ERROR_DFA_WSSIZE (-43)
#define PCRE2_ERROR_INTERNAL (-44)
#define PCRE2_ERROR_JIT_BADOPTION (-45)
#define PCRE2_ERROR_JIT_STACKLIMIT (-46)
#define PCRE2_ERROR_MATCHLIMIT (-47)
#define PCRE2_ERROR_NOMEMORY (-48)
#define PCRE2_ERROR_NOSUBSTRING (-49)
#define PCRE2_ERROR_NOUNIQUESUBSTRING (-50)
#define PCRE2_ERROR_NULL (-51)
#define PCRE2_ERROR_RECURSELOOP (-52)
#define PCRE2_ERROR_DEPTHLIMIT (-53)
#define PCRE2_ERROR_RECURSIONLIMIT (-53) /* Obsolete synonym */
#define PCRE2_ERROR_UNAVAILABLE (-54)
#define PCRE2_ERROR_UNSET (-55)
#define PCRE2_ERROR_BADOFFSETLIMIT (-56)
#define PCRE2_ERROR_BADREPESCAPE (-57)
#define PCRE2_ERROR_REPMISSINGBRACE (-58)
#define PCRE2_ERROR_BADSUBSTITUTION (-59)
#define PCRE2_ERROR_BADSUBSPATTERN (-60)
#define PCRE2_ERROR_TOOMANYREPLACE (-61)
#define PCRE2_ERROR_BADSERIALIZEDDATA (-62)
#define PCRE2_ERROR_HEAPLIMIT (-63)
#define PCRE2_ERROR_CONVERT_SYNTAX (-64)
/* Request types for pcre2_pattern_info() */
#define PCRE2_INFO_ALLOPTIONS 0
#define PCRE2_INFO_ARGOPTIONS 1
#define PCRE2_INFO_BACKREFMAX 2
#define PCRE2_INFO_BSR 3
#define PCRE2_INFO_CAPTURECOUNT 4
#define PCRE2_INFO_FIRSTCODEUNIT 5
#define PCRE2_INFO_FIRSTCODETYPE 6
#define PCRE2_INFO_FIRSTBITMAP 7
#define PCRE2_INFO_HASCRORLF 8
#define PCRE2_INFO_JCHANGED 9
#define PCRE2_INFO_JITSIZE 10
#define PCRE2_INFO_LASTCODEUNIT 11
#define PCRE2_INFO_LASTCODETYPE 12
#define PCRE2_INFO_MATCHEMPTY 13
#define PCRE2_INFO_MATCHLIMIT 14
#define PCRE2_INFO_MAXLOOKBEHIND 15
#define PCRE2_INFO_MINLENGTH 16
#define PCRE2_INFO_NAMECOUNT 17
#define PCRE2_INFO_NAMEENTRYSIZE 18
#define PCRE2_INFO_NAMETABLE 19
#define PCRE2_INFO_NEWLINE 20
#define PCRE2_INFO_DEPTHLIMIT 21
#define PCRE2_INFO_RECURSIONLIMIT 21 /* Obsolete synonym */
#define PCRE2_INFO_SIZE 22
#define PCRE2_INFO_HASBACKSLASHC 23
#define PCRE2_INFO_FRAMESIZE 24
#define PCRE2_INFO_HEAPLIMIT 25
/* Request types for pcre2_config(). */
#define PCRE2_CONFIG_BSR 0
#define PCRE2_CONFIG_JIT 1
#define PCRE2_CONFIG_JITTARGET 2
#define PCRE2_CONFIG_LINKSIZE 3
#define PCRE2_CONFIG_MATCHLIMIT 4
#define PCRE2_CONFIG_NEWLINE 5
#define PCRE2_CONFIG_PARENSLIMIT 6
#define PCRE2_CONFIG_DEPTHLIMIT 7
#define PCRE2_CONFIG_RECURSIONLIMIT 7 /* Obsolete synonym */
#define PCRE2_CONFIG_STACKRECURSE 8 /* Obsolete */
#define PCRE2_CONFIG_UNICODE 9
#define PCRE2_CONFIG_UNICODE_VERSION 10
#define PCRE2_CONFIG_VERSION 11
#define PCRE2_CONFIG_HEAPLIMIT 12
/* Types for code units in patterns and subject strings. */
typedef uint8_t PCRE2_UCHAR8;
typedef uint16_t PCRE2_UCHAR16;
typedef uint32_t PCRE2_UCHAR32;
typedef const PCRE2_UCHAR8 *PCRE2_SPTR8;
typedef const PCRE2_UCHAR16 *PCRE2_SPTR16;
typedef const PCRE2_UCHAR32 *PCRE2_SPTR32;
/* The PCRE2_SIZE type is used for all string lengths and offsets in PCRE2,
including pattern offsets for errors and subject offsets after a match. We
define special values to indicate zero-terminated strings and unset offsets in
the offset vector (ovector). */
#define PCRE2_SIZE size_t
#define PCRE2_SIZE_MAX SIZE_MAX
#define PCRE2_ZERO_TERMINATED (~(PCRE2_SIZE)0)
#define PCRE2_UNSET (~(PCRE2_SIZE)0)
/* Generic types for opaque structures and JIT callback functions. These
declarations are defined in a macro that is expanded for each width later. */
#define PCRE2_TYPES_LIST \
struct pcre2_real_general_context; \
typedef struct pcre2_real_general_context pcre2_general_context; \
\
struct pcre2_real_compile_context; \
typedef struct pcre2_real_compile_context pcre2_compile_context; \
\
struct pcre2_real_match_context; \
typedef struct pcre2_real_match_context pcre2_match_context; \
\
struct pcre2_real_convert_context; \
typedef struct pcre2_real_convert_context pcre2_convert_context; \
\
struct pcre2_real_code; \
typedef struct pcre2_real_code pcre2_code; \
\
struct pcre2_real_match_data; \
typedef struct pcre2_real_match_data pcre2_match_data; \
\
struct pcre2_real_jit_stack; \
typedef struct pcre2_real_jit_stack pcre2_jit_stack; \
\
typedef pcre2_jit_stack *(*pcre2_jit_callback)(void *);
/* The structure for passing out data via the pcre_callout_function. We use a
structure so that new fields can be added on the end in future versions,
without changing the API of the function, thereby allowing old clients to work
without modification. Define the generic version in a macro; the width-specific
versions are generated from this macro below. */
#define PCRE2_STRUCTURE_LIST \
typedef struct pcre2_callout_block { \
uint32_t version; /* Identifies version of block */ \
/* ------------------------ Version 0 ------------------------------- */ \
uint32_t callout_number; /* Number compiled into pattern */ \
uint32_t capture_top; /* Max current capture */ \
uint32_t capture_last; /* Most recently closed capture */ \
PCRE2_SIZE *offset_vector; /* The offset vector */ \
PCRE2_SPTR mark; /* Pointer to current mark or NULL */ \
PCRE2_SPTR subject; /* The subject being matched */ \
PCRE2_SIZE subject_length; /* The length of the subject */ \
PCRE2_SIZE start_match; /* Offset to start of this match attempt */ \
PCRE2_SIZE current_position; /* Where we currently are in the subject */ \
PCRE2_SIZE pattern_position; /* Offset to next item in the pattern */ \
PCRE2_SIZE next_item_length; /* Length of next item in the pattern */ \
/* ------------------- Added for Version 1 -------------------------- */ \
PCRE2_SIZE callout_string_offset; /* Offset to string within pattern */ \
PCRE2_SIZE callout_string_length; /* Length of string compiled into pattern */ \
PCRE2_SPTR callout_string; /* String compiled into pattern */ \
/* ------------------------------------------------------------------ */ \
} pcre2_callout_block; \
\
typedef struct pcre2_callout_enumerate_block { \
uint32_t version; /* Identifies version of block */ \
/* ------------------------ Version 0 ------------------------------- */ \
PCRE2_SIZE pattern_position; /* Offset to next item in the pattern */ \
PCRE2_SIZE next_item_length; /* Length of next item in the pattern */ \
uint32_t callout_number; /* Number compiled into pattern */ \
PCRE2_SIZE callout_string_offset; /* Offset to string within pattern */ \
PCRE2_SIZE callout_string_length; /* Length of string compiled into pattern */ \
PCRE2_SPTR callout_string; /* String compiled into pattern */ \
/* ------------------------------------------------------------------ */ \
} pcre2_callout_enumerate_block;
/* List the generic forms of all other functions in macros, which will be
expanded for each width below. Start with functions that give general
information. */
#define PCRE2_GENERAL_INFO_FUNCTIONS \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION pcre2_config(uint32_t, void *);
/* Functions for manipulating contexts. */
#define PCRE2_GENERAL_CONTEXT_FUNCTIONS \
PCRE2_EXP_DECL pcre2_general_context PCRE2_CALL_CONVENTION \
*pcre2_general_context_copy(pcre2_general_context *); \
PCRE2_EXP_DECL pcre2_general_context PCRE2_CALL_CONVENTION \
*pcre2_general_context_create(void *(*)(PCRE2_SIZE, void *), \
void (*)(void *, void *), void *); \
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
pcre2_general_context_free(pcre2_general_context *);
#define PCRE2_COMPILE_CONTEXT_FUNCTIONS \
PCRE2_EXP_DECL pcre2_compile_context PCRE2_CALL_CONVENTION \
*pcre2_compile_context_copy(pcre2_compile_context *); \
PCRE2_EXP_DECL pcre2_compile_context PCRE2_CALL_CONVENTION \
*pcre2_compile_context_create(pcre2_general_context *);\
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
pcre2_compile_context_free(pcre2_compile_context *); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_set_bsr(pcre2_compile_context *, uint32_t); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_set_character_tables(pcre2_compile_context *, const unsigned char *); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_set_compile_extra_options(pcre2_compile_context *, uint32_t); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_set_max_pattern_length(pcre2_compile_context *, PCRE2_SIZE); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_set_newline(pcre2_compile_context *, uint32_t); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_set_parens_nest_limit(pcre2_compile_context *, uint32_t); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_set_compile_recursion_guard(pcre2_compile_context *, \
int (*)(uint32_t, void *), void *);
#define PCRE2_MATCH_CONTEXT_FUNCTIONS \
PCRE2_EXP_DECL pcre2_match_context PCRE2_CALL_CONVENTION \
*pcre2_match_context_copy(pcre2_match_context *); \
PCRE2_EXP_DECL pcre2_match_context PCRE2_CALL_CONVENTION \
*pcre2_match_context_create(pcre2_general_context *); \
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
pcre2_match_context_free(pcre2_match_context *); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_set_callout(pcre2_match_context *, \
int (*)(pcre2_callout_block *, void *), void *); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_set_depth_limit(pcre2_match_context *, uint32_t); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_set_heap_limit(pcre2_match_context *, uint32_t); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_set_match_limit(pcre2_match_context *, uint32_t); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_set_offset_limit(pcre2_match_context *, PCRE2_SIZE); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_set_recursion_limit(pcre2_match_context *, uint32_t); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_set_recursion_memory_management(pcre2_match_context *, \
void *(*)(PCRE2_SIZE, void *), void (*)(void *, void *), void *);
#define PCRE2_CONVERT_CONTEXT_FUNCTIONS \
PCRE2_EXP_DECL pcre2_convert_context PCRE2_CALL_CONVENTION \
*pcre2_convert_context_copy(pcre2_convert_context *); \
PCRE2_EXP_DECL pcre2_convert_context PCRE2_CALL_CONVENTION \
*pcre2_convert_context_create(pcre2_general_context *); \
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
pcre2_convert_context_free(pcre2_convert_context *); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_set_glob_escape(pcre2_convert_context *, uint32_t); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_set_glob_separator(pcre2_convert_context *, uint32_t);
/* Functions concerned with compiling a pattern to PCRE internal code. */
#define PCRE2_COMPILE_FUNCTIONS \
PCRE2_EXP_DECL pcre2_code PCRE2_CALL_CONVENTION \
*pcre2_compile(PCRE2_SPTR, PCRE2_SIZE, uint32_t, int *, PCRE2_SIZE *, \
pcre2_compile_context *); \
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
pcre2_code_free(pcre2_code *); \
PCRE2_EXP_DECL pcre2_code PCRE2_CALL_CONVENTION \
*pcre2_code_copy(const pcre2_code *); \
PCRE2_EXP_DECL pcre2_code PCRE2_CALL_CONVENTION \
*pcre2_code_copy_with_tables(const pcre2_code *);
/* Functions that give information about a compiled pattern. */
#define PCRE2_PATTERN_INFO_FUNCTIONS \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_pattern_info(const pcre2_code *, uint32_t, void *); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_callout_enumerate(const pcre2_code *, \
int (*)(pcre2_callout_enumerate_block *, void *), void *);
/* Functions for running a match and inspecting the result. */
#define PCRE2_MATCH_FUNCTIONS \
PCRE2_EXP_DECL pcre2_match_data PCRE2_CALL_CONVENTION \
*pcre2_match_data_create(uint32_t, pcre2_general_context *); \
PCRE2_EXP_DECL pcre2_match_data PCRE2_CALL_CONVENTION \
*pcre2_match_data_create_from_pattern(const pcre2_code *, \
pcre2_general_context *); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_dfa_match(const pcre2_code *, PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, \
uint32_t, pcre2_match_data *, pcre2_match_context *, int *, PCRE2_SIZE); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_match(const pcre2_code *, PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, \
uint32_t, pcre2_match_data *, pcre2_match_context *); \
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
pcre2_match_data_free(pcre2_match_data *); \
PCRE2_EXP_DECL PCRE2_SPTR PCRE2_CALL_CONVENTION \
pcre2_get_mark(pcre2_match_data *); \
PCRE2_EXP_DECL uint32_t PCRE2_CALL_CONVENTION \
pcre2_get_ovector_count(pcre2_match_data *); \
PCRE2_EXP_DECL PCRE2_SIZE PCRE2_CALL_CONVENTION \
*pcre2_get_ovector_pointer(pcre2_match_data *); \
PCRE2_EXP_DECL PCRE2_SIZE PCRE2_CALL_CONVENTION \
pcre2_get_startchar(pcre2_match_data *);
/* Convenience functions for handling matched substrings. */
#define PCRE2_SUBSTRING_FUNCTIONS \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_substring_copy_byname(pcre2_match_data *, PCRE2_SPTR, PCRE2_UCHAR *, \
PCRE2_SIZE *); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_substring_copy_bynumber(pcre2_match_data *, uint32_t, PCRE2_UCHAR *, \
PCRE2_SIZE *); \
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
pcre2_substring_free(PCRE2_UCHAR *); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_substring_get_byname(pcre2_match_data *, PCRE2_SPTR, PCRE2_UCHAR **, \
PCRE2_SIZE *); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_substring_get_bynumber(pcre2_match_data *, uint32_t, PCRE2_UCHAR **, \
PCRE2_SIZE *); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_substring_length_byname(pcre2_match_data *, PCRE2_SPTR, PCRE2_SIZE *); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_substring_length_bynumber(pcre2_match_data *, uint32_t, PCRE2_SIZE *); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_substring_nametable_scan(const pcre2_code *, PCRE2_SPTR, PCRE2_SPTR *, \
PCRE2_SPTR *); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_substring_number_from_name(const pcre2_code *, PCRE2_SPTR); \
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
pcre2_substring_list_free(PCRE2_SPTR *); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_substring_list_get(pcre2_match_data *, PCRE2_UCHAR ***, PCRE2_SIZE **);
/* Functions for serializing / deserializing compiled patterns. */
#define PCRE2_SERIALIZE_FUNCTIONS \
PCRE2_EXP_DECL int32_t PCRE2_CALL_CONVENTION \
pcre2_serialize_encode(const pcre2_code **, int32_t, uint8_t **, \
PCRE2_SIZE *, pcre2_general_context *); \
PCRE2_EXP_DECL int32_t PCRE2_CALL_CONVENTION \
pcre2_serialize_decode(pcre2_code **, int32_t, const uint8_t *, \
pcre2_general_context *); \
PCRE2_EXP_DECL int32_t PCRE2_CALL_CONVENTION \
pcre2_serialize_get_number_of_codes(const uint8_t *); \
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
pcre2_serialize_free(uint8_t *);
/* Convenience function for match + substitute. */
#define PCRE2_SUBSTITUTE_FUNCTION \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_substitute(const pcre2_code *, PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, \
uint32_t, pcre2_match_data *, pcre2_match_context *, PCRE2_SPTR, \
PCRE2_SIZE, PCRE2_UCHAR *, PCRE2_SIZE *);
/* Functions for converting pattern source strings. */
#define PCRE2_CONVERT_FUNCTIONS \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_pattern_convert(PCRE2_SPTR, PCRE2_SIZE, uint32_t, PCRE2_UCHAR **, \
PCRE2_SIZE *, pcre2_convert_context *); \
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
pcre2_converted_pattern_free(PCRE2_UCHAR *);
/* Functions for JIT processing */
#define PCRE2_JIT_FUNCTIONS \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_jit_compile(pcre2_code *, uint32_t); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_jit_match(const pcre2_code *, PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, \
uint32_t, pcre2_match_data *, pcre2_match_context *); \
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
pcre2_jit_free_unused_memory(pcre2_general_context *); \
PCRE2_EXP_DECL pcre2_jit_stack PCRE2_CALL_CONVENTION \
*pcre2_jit_stack_create(PCRE2_SIZE, PCRE2_SIZE, pcre2_general_context *); \
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
pcre2_jit_stack_assign(pcre2_match_context *, pcre2_jit_callback, void *); \
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
pcre2_jit_stack_free(pcre2_jit_stack *);
/* Other miscellaneous functions. */
#define PCRE2_OTHER_FUNCTIONS \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_get_error_message(int, PCRE2_UCHAR *, PCRE2_SIZE); \
PCRE2_EXP_DECL const uint8_t PCRE2_CALL_CONVENTION \
*pcre2_maketables(pcre2_general_context *); \
/* Define macros that generate width-specific names from generic versions. The
three-level macro scheme is necessary to get the macros expanded when we want
them to be. First we get the width from PCRE2_LOCAL_WIDTH, which is used for
generating three versions of everything below. After that, PCRE2_SUFFIX will be
re-defined to use PCRE2_CODE_UNIT_WIDTH, for use when macros such as
pcre2_compile are called by application code. */
#define PCRE2_JOIN(a,b) a ## b
#define PCRE2_GLUE(a,b) PCRE2_JOIN(a,b)
#define PCRE2_SUFFIX(a) PCRE2_GLUE(a,PCRE2_LOCAL_WIDTH)
/* Data types */
#define PCRE2_UCHAR PCRE2_SUFFIX(PCRE2_UCHAR)
#define PCRE2_SPTR PCRE2_SUFFIX(PCRE2_SPTR)
#define pcre2_code PCRE2_SUFFIX(pcre2_code_)
#define pcre2_jit_callback PCRE2_SUFFIX(pcre2_jit_callback_)
#define pcre2_jit_stack PCRE2_SUFFIX(pcre2_jit_stack_)
#define pcre2_real_code PCRE2_SUFFIX(pcre2_real_code_)
#define pcre2_real_general_context PCRE2_SUFFIX(pcre2_real_general_context_)
#define pcre2_real_compile_context PCRE2_SUFFIX(pcre2_real_compile_context_)
#define pcre2_real_convert_context PCRE2_SUFFIX(pcre2_real_convert_context_)
#define pcre2_real_match_context PCRE2_SUFFIX(pcre2_real_match_context_)
#define pcre2_real_jit_stack PCRE2_SUFFIX(pcre2_real_jit_stack_)
#define pcre2_real_match_data PCRE2_SUFFIX(pcre2_real_match_data_)
/* Data blocks */
#define pcre2_callout_block PCRE2_SUFFIX(pcre2_callout_block_)
#define pcre2_callout_enumerate_block PCRE2_SUFFIX(pcre2_callout_enumerate_block_)
#define pcre2_general_context PCRE2_SUFFIX(pcre2_general_context_)
#define pcre2_compile_context PCRE2_SUFFIX(pcre2_compile_context_)
#define pcre2_convert_context PCRE2_SUFFIX(pcre2_convert_context_)
#define pcre2_match_context PCRE2_SUFFIX(pcre2_match_context_)
#define pcre2_match_data PCRE2_SUFFIX(pcre2_match_data_)
/* Functions: the complete list in alphabetical order */
#define pcre2_callout_enumerate PCRE2_SUFFIX(pcre2_callout_enumerate_)
#define pcre2_code_copy PCRE2_SUFFIX(pcre2_code_copy_)
#define pcre2_code_copy_with_tables PCRE2_SUFFIX(pcre2_code_copy_with_tables_)
#define pcre2_code_free PCRE2_SUFFIX(pcre2_code_free_)
#define pcre2_compile PCRE2_SUFFIX(pcre2_compile_)
#define pcre2_compile_context_copy PCRE2_SUFFIX(pcre2_compile_context_copy_)
#define pcre2_compile_context_create PCRE2_SUFFIX(pcre2_compile_context_create_)
#define pcre2_compile_context_free PCRE2_SUFFIX(pcre2_compile_context_free_)
#define pcre2_config PCRE2_SUFFIX(pcre2_config_)
#define pcre2_convert_context_copy PCRE2_SUFFIX(pcre2_convert_context_copy_)
#define pcre2_convert_context_create PCRE2_SUFFIX(pcre2_convert_context_create_)
#define pcre2_convert_context_free PCRE2_SUFFIX(pcre2_convert_context_free_)
#define pcre2_converted_pattern_free PCRE2_SUFFIX(pcre2_converted_pattern_free_)
#define pcre2_dfa_match PCRE2_SUFFIX(pcre2_dfa_match_)
#define pcre2_general_context_copy PCRE2_SUFFIX(pcre2_general_context_copy_)
#define pcre2_general_context_create PCRE2_SUFFIX(pcre2_general_context_create_)
#define pcre2_general_context_free PCRE2_SUFFIX(pcre2_general_context_free_)
#define pcre2_get_error_message PCRE2_SUFFIX(pcre2_get_error_message_)
#define pcre2_get_mark PCRE2_SUFFIX(pcre2_get_mark_)
#define pcre2_get_ovector_pointer PCRE2_SUFFIX(pcre2_get_ovector_pointer_)
#define pcre2_get_ovector_count PCRE2_SUFFIX(pcre2_get_ovector_count_)
#define pcre2_get_startchar PCRE2_SUFFIX(pcre2_get_startchar_)
#define pcre2_jit_compile PCRE2_SUFFIX(pcre2_jit_compile_)
#define pcre2_jit_match PCRE2_SUFFIX(pcre2_jit_match_)
#define pcre2_jit_free_unused_memory PCRE2_SUFFIX(pcre2_jit_free_unused_memory_)
#define pcre2_jit_stack_assign PCRE2_SUFFIX(pcre2_jit_stack_assign_)
#define pcre2_jit_stack_create PCRE2_SUFFIX(pcre2_jit_stack_create_)
#define pcre2_jit_stack_free PCRE2_SUFFIX(pcre2_jit_stack_free_)
#define pcre2_maketables PCRE2_SUFFIX(pcre2_maketables_)
#define pcre2_match PCRE2_SUFFIX(pcre2_match_)
#define pcre2_match_context_copy PCRE2_SUFFIX(pcre2_match_context_copy_)
#define pcre2_match_context_create PCRE2_SUFFIX(pcre2_match_context_create_)
#define pcre2_match_context_free PCRE2_SUFFIX(pcre2_match_context_free_)
#define pcre2_match_data_create PCRE2_SUFFIX(pcre2_match_data_create_)
#define pcre2_match_data_create_from_pattern PCRE2_SUFFIX(pcre2_match_data_create_from_pattern_)
#define pcre2_match_data_free PCRE2_SUFFIX(pcre2_match_data_free_)
#define pcre2_pattern_convert PCRE2_SUFFIX(pcre2_pattern_convert_)
#define pcre2_pattern_info PCRE2_SUFFIX(pcre2_pattern_info_)
#define pcre2_serialize_decode PCRE2_SUFFIX(pcre2_serialize_decode_)
#define pcre2_serialize_encode PCRE2_SUFFIX(pcre2_serialize_encode_)
#define pcre2_serialize_free PCRE2_SUFFIX(pcre2_serialize_free_)
#define pcre2_serialize_get_number_of_codes PCRE2_SUFFIX(pcre2_serialize_get_number_of_codes_)
#define pcre2_set_bsr PCRE2_SUFFIX(pcre2_set_bsr_)
#define pcre2_set_callout PCRE2_SUFFIX(pcre2_set_callout_)
#define pcre2_set_character_tables PCRE2_SUFFIX(pcre2_set_character_tables_)
#define pcre2_set_compile_extra_options PCRE2_SUFFIX(pcre2_set_compile_extra_options_)
#define pcre2_set_compile_recursion_guard PCRE2_SUFFIX(pcre2_set_compile_recursion_guard_)
#define pcre2_set_depth_limit PCRE2_SUFFIX(pcre2_set_depth_limit_)
#define pcre2_set_glob_escape PCRE2_SUFFIX(pcre2_set_glob_escape_)
#define pcre2_set_glob_separator PCRE2_SUFFIX(pcre2_set_glob_separator_)
#define pcre2_set_heap_limit PCRE2_SUFFIX(pcre2_set_heap_limit_)
#define pcre2_set_match_limit PCRE2_SUFFIX(pcre2_set_match_limit_)
#define pcre2_set_max_pattern_length PCRE2_SUFFIX(pcre2_set_max_pattern_length_)
#define pcre2_set_newline PCRE2_SUFFIX(pcre2_set_newline_)
#define pcre2_set_parens_nest_limit PCRE2_SUFFIX(pcre2_set_parens_nest_limit_)
#define pcre2_set_offset_limit PCRE2_SUFFIX(pcre2_set_offset_limit_)
#define pcre2_substitute PCRE2_SUFFIX(pcre2_substitute_)
#define pcre2_substring_copy_byname PCRE2_SUFFIX(pcre2_substring_copy_byname_)
#define pcre2_substring_copy_bynumber PCRE2_SUFFIX(pcre2_substring_copy_bynumber_)
#define pcre2_substring_free PCRE2_SUFFIX(pcre2_substring_free_)
#define pcre2_substring_get_byname PCRE2_SUFFIX(pcre2_substring_get_byname_)
#define pcre2_substring_get_bynumber PCRE2_SUFFIX(pcre2_substring_get_bynumber_)
#define pcre2_substring_length_byname PCRE2_SUFFIX(pcre2_substring_length_byname_)
#define pcre2_substring_length_bynumber PCRE2_SUFFIX(pcre2_substring_length_bynumber_)
#define pcre2_substring_list_get PCRE2_SUFFIX(pcre2_substring_list_get_)
#define pcre2_substring_list_free PCRE2_SUFFIX(pcre2_substring_list_free_)
#define pcre2_substring_nametable_scan PCRE2_SUFFIX(pcre2_substring_nametable_scan_)
#define pcre2_substring_number_from_name PCRE2_SUFFIX(pcre2_substring_number_from_name_)
/* Keep this old function name for backwards compatibility */
#define pcre2_set_recursion_limit PCRE2_SUFFIX(pcre2_set_recursion_limit_)
/* Keep this obsolete function for backwards compatibility: it is now a noop. */
#define pcre2_set_recursion_memory_management PCRE2_SUFFIX(pcre2_set_recursion_memory_management_)
/* Now generate all three sets of width-specific structures and function
prototypes. */
#define PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS \
PCRE2_TYPES_LIST \
PCRE2_STRUCTURE_LIST \
PCRE2_GENERAL_INFO_FUNCTIONS \
PCRE2_GENERAL_CONTEXT_FUNCTIONS \
PCRE2_COMPILE_CONTEXT_FUNCTIONS \
PCRE2_CONVERT_CONTEXT_FUNCTIONS \
PCRE2_CONVERT_FUNCTIONS \
PCRE2_MATCH_CONTEXT_FUNCTIONS \
PCRE2_COMPILE_FUNCTIONS \
PCRE2_PATTERN_INFO_FUNCTIONS \
PCRE2_MATCH_FUNCTIONS \
PCRE2_SUBSTRING_FUNCTIONS \
PCRE2_SERIALIZE_FUNCTIONS \
PCRE2_SUBSTITUTE_FUNCTION \
PCRE2_JIT_FUNCTIONS \
PCRE2_OTHER_FUNCTIONS
#define PCRE2_LOCAL_WIDTH 8
PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS
#undef PCRE2_LOCAL_WIDTH
#define PCRE2_LOCAL_WIDTH 16
PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS
#undef PCRE2_LOCAL_WIDTH
#define PCRE2_LOCAL_WIDTH 32
PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS
#undef PCRE2_LOCAL_WIDTH
/* Undefine the list macros; they are no longer needed. */
#undef PCRE2_TYPES_LIST
#undef PCRE2_STRUCTURE_LIST
#undef PCRE2_GENERAL_INFO_FUNCTIONS
#undef PCRE2_GENERAL_CONTEXT_FUNCTIONS
#undef PCRE2_COMPILE_CONTEXT_FUNCTIONS
#undef PCRE2_CONVERT_CONTEXT_FUNCTIONS
#undef PCRE2_MATCH_CONTEXT_FUNCTIONS
#undef PCRE2_COMPILE_FUNCTIONS
#undef PCRE2_PATTERN_INFO_FUNCTIONS
#undef PCRE2_MATCH_FUNCTIONS
#undef PCRE2_SUBSTRING_FUNCTIONS
#undef PCRE2_SERIALIZE_FUNCTIONS
#undef PCRE2_SUBSTITUTE_FUNCTION
#undef PCRE2_JIT_FUNCTIONS
#undef PCRE2_OTHER_FUNCTIONS
#undef PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS
/* PCRE2_CODE_UNIT_WIDTH must be defined. If it is 8, 16, or 32, redefine
PCRE2_SUFFIX to use it. If it is 0, undefine the other macros and make
PCRE2_SUFFIX a no-op. Otherwise, generate an error. */
#undef PCRE2_SUFFIX
#ifndef PCRE2_CODE_UNIT_WIDTH
#error PCRE2_CODE_UNIT_WIDTH must be defined before including pcre2.h.
#error Use 8, 16, or 32; or 0 for a multi-width application.
#else /* PCRE2_CODE_UNIT_WIDTH is defined */
#if PCRE2_CODE_UNIT_WIDTH == 8 || \
PCRE2_CODE_UNIT_WIDTH == 16 || \
PCRE2_CODE_UNIT_WIDTH == 32
#define PCRE2_SUFFIX(a) PCRE2_GLUE(a, PCRE2_CODE_UNIT_WIDTH)
#elif PCRE2_CODE_UNIT_WIDTH == 0
#undef PCRE2_JOIN
#undef PCRE2_GLUE
#define PCRE2_SUFFIX(a) a
#else
#error PCRE2_CODE_UNIT_WIDTH must be 0, 8, 16, or 32.
#endif
#endif /* PCRE2_CODE_UNIT_WIDTH is defined */
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* PCRE2_H_IDEMPOTENT_GUARD */
/* End of pcre2.h */

File diff suppressed because it is too large Load diff

View file

@ -3,28 +3,30 @@
*************************************************/
/* This file contains character tables that are used when no external tables
are passed to PCRE by the application that calls it. The tables are used only
are passed to PCRE2 by the application that calls it. The tables are used only
for characters whose code values are less than 256.
This is a default version of the tables that assumes ASCII encoding. A program
called dftables (which is distributed with PCRE) can be used to build
called dftables (which is distributed with PCRE2) can be used to build
alternative versions of this file. This is necessary if you are running in an
EBCDIC environment, or if you want to default to a different encoding, for
example ISO-8859-1. When dftables is run, it creates these tables in the
current locale. If PCRE is configured with --enable-rebuild-chartables, this
current locale. If PCRE2 is configured with --enable-rebuild-chartables, this
happens automatically.
The following #includes are present because without them gcc 4.x may remove the
array definition from the final binary if PCRE is built into a static library
array definition from the final binary if PCRE2 is built into a static library
and dead code stripping is activated. This leads to link errors. Pulling in the
header ensures that the array gets flagged as "someone outside this compilation
unit might reference this" and so it will always be supplied to the linker. */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "pcre_internal.h"
#include "pcre2_internal.h"
const pcre_uint8 PRIV(default_tables)[] = {
const uint8_t PRIV(default_tables)[] = {
/* This table is a lower casing table. */
@ -193,4 +195,4 @@ graph, print, punct, and cntrl. Other classes are built from combinations. */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
/* End of pcre_chartables.c */
/* End of pcre2_chartables.c */

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,222 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016-2017 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
/* Save the configured link size, which is in bytes. In 16-bit and 32-bit modes
its value gets changed by pcre2_internal.h to be in code units. */
static int configured_link_size = LINK_SIZE;
#include "pcre2_internal.h"
/* These macros are the standard way of turning unquoted text into C strings.
They allow macros like PCRE2_MAJOR to be defined without quotes, which is
convenient for user programs that want to test their values. */
#define STRING(a) # a
#define XSTRING(s) STRING(s)
/*************************************************
* Return info about what features are configured *
*************************************************/
/* If where is NULL, the length of memory required is returned.
Arguments:
what what information is required
where where to put the information
Returns: 0 if a numerical value is returned
>= 0 if a string value
PCRE2_ERROR_BADOPTION if "where" not recognized
or JIT target requested when JIT not enabled
*/
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_config(uint32_t what, void *where)
{
if (where == NULL) /* Requests a length */
{
switch(what)
{
default:
return PCRE2_ERROR_BADOPTION;
case PCRE2_CONFIG_BSR:
case PCRE2_CONFIG_HEAPLIMIT:
case PCRE2_CONFIG_JIT:
case PCRE2_CONFIG_LINKSIZE:
case PCRE2_CONFIG_MATCHLIMIT:
case PCRE2_CONFIG_DEPTHLIMIT:
case PCRE2_CONFIG_NEWLINE:
case PCRE2_CONFIG_PARENSLIMIT:
case PCRE2_CONFIG_STACKRECURSE: /* Obsolete */
case PCRE2_CONFIG_UNICODE:
return sizeof(uint32_t);
/* These are handled below */
case PCRE2_CONFIG_JITTARGET:
case PCRE2_CONFIG_UNICODE_VERSION:
case PCRE2_CONFIG_VERSION:
break;
}
}
switch (what)
{
default:
return PCRE2_ERROR_BADOPTION;
case PCRE2_CONFIG_BSR:
#ifdef BSR_ANYCRLF
*((uint32_t *)where) = PCRE2_BSR_ANYCRLF;
#else
*((uint32_t *)where) = PCRE2_BSR_UNICODE;
#endif
break;
case PCRE2_CONFIG_HEAPLIMIT:
*((uint32_t *)where) = HEAP_LIMIT;
break;
case PCRE2_CONFIG_JIT:
#ifdef SUPPORT_JIT
*((uint32_t *)where) = 1;
#else
*((uint32_t *)where) = 0;
#endif
break;
case PCRE2_CONFIG_JITTARGET:
#ifdef SUPPORT_JIT
{
const char *v = PRIV(jit_get_target)();
return (int)(1 + ((where == NULL)?
strlen(v) : PRIV(strcpy_c8)((PCRE2_UCHAR *)where, v)));
}
#else
return PCRE2_ERROR_BADOPTION;
#endif
case PCRE2_CONFIG_LINKSIZE:
*((uint32_t *)where) = (uint32_t)configured_link_size;
break;
case PCRE2_CONFIG_MATCHLIMIT:
*((uint32_t *)where) = MATCH_LIMIT;
break;
case PCRE2_CONFIG_DEPTHLIMIT:
*((uint32_t *)where) = MATCH_LIMIT_DEPTH;
break;
case PCRE2_CONFIG_NEWLINE:
*((uint32_t *)where) = NEWLINE_DEFAULT;
break;
case PCRE2_CONFIG_PARENSLIMIT:
*((uint32_t *)where) = PARENS_NEST_LIMIT;
break;
/* This is now obsolete. The stack is no longer used via recursion for
handling backtracking in pcre2_match(). */
case PCRE2_CONFIG_STACKRECURSE:
*((uint32_t *)where) = 0;
break;
case PCRE2_CONFIG_UNICODE_VERSION:
{
#if defined SUPPORT_UNICODE
const char *v = PRIV(unicode_version);
#else
const char *v = "Unicode not supported";
#endif
return (int)(1 + ((where == NULL)?
strlen(v) : PRIV(strcpy_c8)((PCRE2_UCHAR *)where, v)));
}
break;
case PCRE2_CONFIG_UNICODE:
#if defined SUPPORT_UNICODE
*((uint32_t *)where) = 1;
#else
*((uint32_t *)where) = 0;
#endif
break;
/* The hackery in setting "v" below is to cope with the case when
PCRE2_PRERELEASE is set to an empty string (which it is for real releases).
If the second alternative is used in this case, it does not leave a space
before the date. On the other hand, if all four macros are put into a single
XSTRING when PCRE2_PRERELEASE is not empty, an unwanted space is inserted.
There are problems using an "obvious" approach like this:
XSTRING(PCRE2_MAJOR) "." XSTRING(PCRE_MINOR)
XSTRING(PCRE2_PRERELEASE) " " XSTRING(PCRE_DATE)
because, when PCRE2_PRERELEASE is empty, this leads to an attempted expansion
of STRING(). The C standard states: "If (before argument substitution) any
argument consists of no preprocessing tokens, the behavior is undefined." It
turns out the gcc treats this case as a single empty string - which is what
we really want - but Visual C grumbles about the lack of an argument for the
macro. Unfortunately, both are within their rights. As there seems to be no
way to test for a macro's value being empty at compile time, we have to
resort to a runtime test. */
case PCRE2_CONFIG_VERSION:
{
const char *v = (XSTRING(Z PCRE2_PRERELEASE)[1] == 0)?
XSTRING(PCRE2_MAJOR.PCRE2_MINOR PCRE2_DATE) :
XSTRING(PCRE2_MAJOR.PCRE2_MINOR) XSTRING(PCRE2_PRERELEASE PCRE2_DATE);
return (int)(1 + ((where == NULL)?
strlen(v) : PRIV(strcpy_c8)((PCRE2_UCHAR *)where, v)));
}
}
return 0;
}
/* End of pcre2_config.c */

View file

@ -0,0 +1,476 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016-2017 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "pcre2_internal.h"
/*************************************************
* Default malloc/free functions *
*************************************************/
/* Ignore the "user data" argument in each case. */
static void *default_malloc(size_t size, void *data)
{
(void)data;
return malloc(size);
}
static void default_free(void *block, void *data)
{
(void)data;
free(block);
}
/*************************************************
* Get a block and save memory control *
*************************************************/
/* This internal function is called to get a block of memory in which the
memory control data is to be stored at the start for future use.
Arguments:
size amount of memory required
memctl pointer to a memctl block or NULL
Returns: pointer to memory or NULL on failure
*/
extern void *
PRIV(memctl_malloc)(size_t size, pcre2_memctl *memctl)
{
pcre2_memctl *newmemctl;
void *yield = (memctl == NULL)? malloc(size) :
memctl->malloc(size, memctl->memory_data);
if (yield == NULL) return NULL;
newmemctl = (pcre2_memctl *)yield;
if (memctl == NULL)
{
newmemctl->malloc = default_malloc;
newmemctl->free = default_free;
newmemctl->memory_data = NULL;
}
else *newmemctl = *memctl;
return yield;
}
/*************************************************
* Create and initialize contexts *
*************************************************/
/* Initializing for compile and match contexts is done in separate, private
functions so that these can be called from functions such as pcre2_compile()
when an external context is not supplied. The initializing functions have an
option to set up default memory management. */
PCRE2_EXP_DEFN pcre2_general_context * PCRE2_CALL_CONVENTION
pcre2_general_context_create(void *(*private_malloc)(size_t, void *),
void (*private_free)(void *, void *), void *memory_data)
{
pcre2_general_context *gcontext;
if (private_malloc == NULL) private_malloc = default_malloc;
if (private_free == NULL) private_free = default_free;
gcontext = private_malloc(sizeof(pcre2_real_general_context), memory_data);
if (gcontext == NULL) return NULL;
gcontext->memctl.malloc = private_malloc;
gcontext->memctl.free = private_free;
gcontext->memctl.memory_data = memory_data;
return gcontext;
}
/* A default compile context is set up to save having to initialize at run time
when no context is supplied to the compile function. */
const pcre2_compile_context PRIV(default_compile_context) = {
{ default_malloc, default_free, NULL }, /* Default memory handling */
NULL, /* Stack guard */
NULL, /* Stack guard data */
PRIV(default_tables), /* Character tables */
PCRE2_UNSET, /* Max pattern length */
BSR_DEFAULT, /* Backslash R default */
NEWLINE_DEFAULT, /* Newline convention */
PARENS_NEST_LIMIT, /* As it says */
0 }; /* Extra options */
/* The create function copies the default into the new memory, but must
override the default memory handling functions if a gcontext was provided. */
PCRE2_EXP_DEFN pcre2_compile_context * PCRE2_CALL_CONVENTION
pcre2_compile_context_create(pcre2_general_context *gcontext)
{
pcre2_compile_context *ccontext = PRIV(memctl_malloc)(
sizeof(pcre2_real_compile_context), (pcre2_memctl *)gcontext);
if (ccontext == NULL) return NULL;
*ccontext = PRIV(default_compile_context);
if (gcontext != NULL)
*((pcre2_memctl *)ccontext) = *((pcre2_memctl *)gcontext);
return ccontext;
}
/* A default match context is set up to save having to initialize at run time
when no context is supplied to a match function. */
const pcre2_match_context PRIV(default_match_context) = {
{ default_malloc, default_free, NULL },
#ifdef SUPPORT_JIT
NULL,
NULL,
#endif
NULL,
NULL,
PCRE2_UNSET, /* Offset limit */
HEAP_LIMIT,
MATCH_LIMIT,
MATCH_LIMIT_DEPTH };
/* The create function copies the default into the new memory, but must
override the default memory handling functions if a gcontext was provided. */
PCRE2_EXP_DEFN pcre2_match_context * PCRE2_CALL_CONVENTION
pcre2_match_context_create(pcre2_general_context *gcontext)
{
pcre2_match_context *mcontext = PRIV(memctl_malloc)(
sizeof(pcre2_real_match_context), (pcre2_memctl *)gcontext);
if (mcontext == NULL) return NULL;
*mcontext = PRIV(default_match_context);
if (gcontext != NULL)
*((pcre2_memctl *)mcontext) = *((pcre2_memctl *)gcontext);
return mcontext;
}
/* A default convert context is set up to save having to initialize at run time
when no context is supplied to the convert function. */
const pcre2_convert_context PRIV(default_convert_context) = {
{ default_malloc, default_free, NULL }, /* Default memory handling */
#ifdef _WIN32
CHAR_BACKSLASH, /* Default path separator */
CHAR_GRAVE_ACCENT /* Default escape character */
#else /* Not Windows */
CHAR_SLASH, /* Default path separator */
CHAR_BACKSLASH /* Default escape character */
#endif
};
/* The create function copies the default into the new memory, but must
override the default memory handling functions if a gcontext was provided. */
PCRE2_EXP_DEFN pcre2_convert_context * PCRE2_CALL_CONVENTION
pcre2_convert_context_create(pcre2_general_context *gcontext)
{
pcre2_convert_context *ccontext = PRIV(memctl_malloc)(
sizeof(pcre2_real_convert_context), (pcre2_memctl *)gcontext);
if (ccontext == NULL) return NULL;
*ccontext = PRIV(default_convert_context);
if (gcontext != NULL)
*((pcre2_memctl *)ccontext) = *((pcre2_memctl *)gcontext);
return ccontext;
}
/*************************************************
* Context copy functions *
*************************************************/
PCRE2_EXP_DEFN pcre2_general_context * PCRE2_CALL_CONVENTION
pcre2_general_context_copy(pcre2_general_context *gcontext)
{
pcre2_general_context *new =
gcontext->memctl.malloc(sizeof(pcre2_real_general_context),
gcontext->memctl.memory_data);
if (new == NULL) return NULL;
memcpy(new, gcontext, sizeof(pcre2_real_general_context));
return new;
}
PCRE2_EXP_DEFN pcre2_compile_context * PCRE2_CALL_CONVENTION
pcre2_compile_context_copy(pcre2_compile_context *ccontext)
{
pcre2_compile_context *new =
ccontext->memctl.malloc(sizeof(pcre2_real_compile_context),
ccontext->memctl.memory_data);
if (new == NULL) return NULL;
memcpy(new, ccontext, sizeof(pcre2_real_compile_context));
return new;
}
PCRE2_EXP_DEFN pcre2_match_context * PCRE2_CALL_CONVENTION
pcre2_match_context_copy(pcre2_match_context *mcontext)
{
pcre2_match_context *new =
mcontext->memctl.malloc(sizeof(pcre2_real_match_context),
mcontext->memctl.memory_data);
if (new == NULL) return NULL;
memcpy(new, mcontext, sizeof(pcre2_real_match_context));
return new;
}
PCRE2_EXP_DEFN pcre2_convert_context * PCRE2_CALL_CONVENTION
pcre2_convert_context_copy(pcre2_convert_context *ccontext)
{
pcre2_convert_context *new =
ccontext->memctl.malloc(sizeof(pcre2_real_convert_context),
ccontext->memctl.memory_data);
if (new == NULL) return NULL;
memcpy(new, ccontext, sizeof(pcre2_real_convert_context));
return new;
}
/*************************************************
* Context free functions *
*************************************************/
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
pcre2_general_context_free(pcre2_general_context *gcontext)
{
if (gcontext != NULL)
gcontext->memctl.free(gcontext, gcontext->memctl.memory_data);
}
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
pcre2_compile_context_free(pcre2_compile_context *ccontext)
{
if (ccontext != NULL)
ccontext->memctl.free(ccontext, ccontext->memctl.memory_data);
}
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
pcre2_match_context_free(pcre2_match_context *mcontext)
{
if (mcontext != NULL)
mcontext->memctl.free(mcontext, mcontext->memctl.memory_data);
}
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
pcre2_convert_context_free(pcre2_convert_context *ccontext)
{
if (ccontext != NULL)
ccontext->memctl.free(ccontext, ccontext->memctl.memory_data);
}
/*************************************************
* Set values in contexts *
*************************************************/
/* All these functions return 0 for success or PCRE2_ERROR_BADDATA if invalid
data is given. Only some of the functions are able to test the validity of the
data. */
/* ------------ Compile context ------------ */
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_set_character_tables(pcre2_compile_context *ccontext,
const unsigned char *tables)
{
ccontext->tables = tables;
return 0;
}
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_set_bsr(pcre2_compile_context *ccontext, uint32_t value)
{
switch(value)
{
case PCRE2_BSR_ANYCRLF:
case PCRE2_BSR_UNICODE:
ccontext->bsr_convention = value;
return 0;
default:
return PCRE2_ERROR_BADDATA;
}
}
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_set_max_pattern_length(pcre2_compile_context *ccontext, PCRE2_SIZE length)
{
ccontext->max_pattern_length = length;
return 0;
}
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_set_newline(pcre2_compile_context *ccontext, uint32_t newline)
{
switch(newline)
{
case PCRE2_NEWLINE_CR:
case PCRE2_NEWLINE_LF:
case PCRE2_NEWLINE_CRLF:
case PCRE2_NEWLINE_ANY:
case PCRE2_NEWLINE_ANYCRLF:
case PCRE2_NEWLINE_NUL:
ccontext->newline_convention = newline;
return 0;
default:
return PCRE2_ERROR_BADDATA;
}
}
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_set_parens_nest_limit(pcre2_compile_context *ccontext, uint32_t limit)
{
ccontext->parens_nest_limit = limit;
return 0;
}
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_set_compile_extra_options(pcre2_compile_context *ccontext, uint32_t options)
{
ccontext->extra_options = options;
return 0;
}
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_set_compile_recursion_guard(pcre2_compile_context *ccontext,
int (*guard)(uint32_t, void *), void *user_data)
{
ccontext->stack_guard = guard;
ccontext->stack_guard_data = user_data;
return 0;
}
/* ------------ Match context ------------ */
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_set_callout(pcre2_match_context *mcontext,
int (*callout)(pcre2_callout_block *, void *), void *callout_data)
{
mcontext->callout = callout;
mcontext->callout_data = callout_data;
return 0;
}
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_set_heap_limit(pcre2_match_context *mcontext, uint32_t limit)
{
mcontext->heap_limit = limit;
return 0;
}
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_set_match_limit(pcre2_match_context *mcontext, uint32_t limit)
{
mcontext->match_limit = limit;
return 0;
}
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_set_depth_limit(pcre2_match_context *mcontext, uint32_t limit)
{
mcontext->depth_limit = limit;
return 0;
}
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_set_offset_limit(pcre2_match_context *mcontext, PCRE2_SIZE limit)
{
mcontext->offset_limit = limit;
return 0;
}
/* This function became obsolete at release 10.30. It is kept as a synonym for
backwards compatibility. */
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_set_recursion_limit(pcre2_match_context *mcontext, uint32_t limit)
{
return pcre2_set_depth_limit(mcontext, limit);
}
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_set_recursion_memory_management(pcre2_match_context *mcontext,
void *(*mymalloc)(size_t, void *), void (*myfree)(void *, void *),
void *mydata)
{
(void)mcontext;
(void)mymalloc;
(void)myfree;
(void)mydata;
return 0;
}
/* ------------ Convert context ------------ */
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_set_glob_separator(pcre2_convert_context *ccontext, uint32_t separator)
{
if (separator != CHAR_SLASH && separator != CHAR_BACKSLASH &&
separator != CHAR_DOT) return PCRE2_ERROR_BADDATA;
ccontext->glob_separator = separator;
return 0;
}
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_set_glob_escape(pcre2_convert_context *ccontext, uint32_t escape)
{
if (escape > 255 || (escape != 0 && !ispunct(escape)))
return PCRE2_ERROR_BADDATA;
ccontext->glob_escape = escape;
return 0;
}
/* End of pcre2_context.c */

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,329 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016-2017 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "pcre2_internal.h"
#define STRING(a) # a
#define XSTRING(s) STRING(s)
/* The texts of compile-time error messages. Compile-time error numbers start
at COMPILE_ERROR_BASE (100).
This used to be a table of strings, but in order to reduce the number of
relocations needed when a shared library is loaded dynamically, it is now one
long string. We cannot use a table of offsets, because the lengths of inserts
such as XSTRING(MAX_NAME_SIZE) are not known. Instead,
pcre2_get_error_message() counts through to the one it wants - this isn't a
performance issue because these strings are used only when there is an error.
Each substring ends with \0 to insert a null character. This includes the final
substring, so that the whole string ends with \0\0, which can be detected when
counting through. */
static const unsigned char compile_error_texts[] =
"no error\0"
"\\ at end of pattern\0"
"\\c at end of pattern\0"
"unrecognized character follows \\\0"
"numbers out of order in {} quantifier\0"
/* 5 */
"number too big in {} quantifier\0"
"missing terminating ] for character class\0"
"invalid escape sequence in character class\0"
"range out of order in character class\0"
"quantifier does not follow a repeatable item\0"
/* 10 */
"internal error: unexpected repeat\0"
"unrecognized character after (? or (?-\0"
"POSIX named classes are supported only within a class\0"
"POSIX collating elements are not supported\0"
"missing closing parenthesis\0"
/* 15 */
"reference to non-existent subpattern\0"
"pattern passed as NULL\0"
"unrecognised compile-time option bit(s)\0"
"missing ) after (?# comment\0"
"parentheses are too deeply nested\0"
/* 20 */
"regular expression is too large\0"
"failed to allocate heap memory\0"
"unmatched closing parenthesis\0"
"internal error: code overflow\0"
"missing closing parenthesis for condition\0"
/* 25 */
"lookbehind assertion is not fixed length\0"
"a relative value of zero is not allowed\0"
"conditional group contains more than two branches\0"
"assertion expected after (?( or (?(?C)\0"
"digit expected after (?+ or (?-\0"
/* 30 */
"unknown POSIX class name\0"
"internal error in pcre2_study(): should not occur\0"
"this version of PCRE2 does not have Unicode support\0"
"parentheses are too deeply nested (stack check)\0"
"character code point value in \\x{} or \\o{} is too large\0"
/* 35 */
"lookbehind is too complicated\0"
"\\C is not allowed in a lookbehind assertion in UTF-" XSTRING(PCRE2_CODE_UNIT_WIDTH) " mode\0"
"PCRE does not support \\L, \\l, \\N{name}, \\U, or \\u\0"
"number after (?C is greater than 255\0"
"closing parenthesis for (?C expected\0"
/* 40 */
"invalid escape sequence in (*VERB) name\0"
"unrecognized character after (?P\0"
"syntax error in subpattern name (missing terminator)\0"
"two named subpatterns have the same name (PCRE2_DUPNAMES not set)\0"
"group name must start with a non-digit\0"
/* 45 */
"this version of PCRE2 does not have support for \\P, \\p, or \\X\0"
"malformed \\P or \\p sequence\0"
"unknown property name after \\P or \\p\0"
"subpattern name is too long (maximum " XSTRING(MAX_NAME_SIZE) " characters)\0"
"too many named subpatterns (maximum " XSTRING(MAX_NAME_COUNT) ")\0"
/* 50 */
"invalid range in character class\0"
"octal value is greater than \\377 in 8-bit non-UTF-8 mode\0"
"internal error: overran compiling workspace\0"
"internal error: previously-checked referenced subpattern not found\0"
"DEFINE group contains more than one branch\0"
/* 55 */
"missing opening brace after \\o\0"
"internal error: unknown newline setting\0"
"\\g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number\0"
"(?R (recursive pattern call) must be followed by a closing parenthesis\0"
"an argument is not allowed for (*ACCEPT), (*FAIL), or (*COMMIT)\0"
/* 60 */
"(*VERB) not recognized or malformed\0"
"group number is too big\0"
"subpattern name expected\0"
"internal error: parsed pattern overflow\0"
"non-octal character in \\o{} (closing brace missing?)\0"
/* 65 */
"different names for subpatterns of the same number are not allowed\0"
"(*MARK) must have an argument\0"
"non-hex character in \\x{} (closing brace missing?)\0"
#ifndef EBCDIC
"\\c must be followed by a printable ASCII character\0"
#else
"\\c must be followed by a letter or one of [\\]^_?\0"
#endif
"\\k is not followed by a braced, angle-bracketed, or quoted name\0"
/* 70 */
"internal error: unknown meta code in check_lookbehinds()\0"
"\\N is not supported in a class\0"
"callout string is too long\0"
"disallowed Unicode code point (>= 0xd800 && <= 0xdfff)\0"
"using UTF is disabled by the application\0"
/* 75 */
"using UCP is disabled by the application\0"
"name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)\0"
"character code point value in \\u.... sequence is too large\0"
"digits missing in \\x{} or \\o{}\0"
"syntax error or number too big in (?(VERSION condition\0"
/* 80 */
"internal error: unknown opcode in auto_possessify()\0"
"missing terminating delimiter for callout with string argument\0"
"unrecognized string delimiter follows (?C\0"
"using \\C is disabled by the application\0"
"(?| and/or (?J: or (?x: parentheses are too deeply nested\0"
/* 85 */
"using \\C is disabled in this PCRE2 library\0"
"regular expression is too complicated\0"
"lookbehind assertion is too long\0"
"pattern string is longer than the limit set by the application\0"
"internal error: unknown code in parsed pattern\0"
/* 90 */
"internal error: bad code value in parsed_skip()\0"
"PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES is not allowed in UTF-16 mode\0"
"invalid option bits with PCRE2_LITERAL\0"
;
/* Match-time and UTF error texts are in the same format. */
static const unsigned char match_error_texts[] =
"no error\0"
"no match\0"
"partial match\0"
"UTF-8 error: 1 byte missing at end\0"
"UTF-8 error: 2 bytes missing at end\0"
/* 5 */
"UTF-8 error: 3 bytes missing at end\0"
"UTF-8 error: 4 bytes missing at end\0"
"UTF-8 error: 5 bytes missing at end\0"
"UTF-8 error: byte 2 top bits not 0x80\0"
"UTF-8 error: byte 3 top bits not 0x80\0"
/* 10 */
"UTF-8 error: byte 4 top bits not 0x80\0"
"UTF-8 error: byte 5 top bits not 0x80\0"
"UTF-8 error: byte 6 top bits not 0x80\0"
"UTF-8 error: 5-byte character is not allowed (RFC 3629)\0"
"UTF-8 error: 6-byte character is not allowed (RFC 3629)\0"
/* 15 */
"UTF-8 error: code points greater than 0x10ffff are not defined\0"
"UTF-8 error: code points 0xd800-0xdfff are not defined\0"
"UTF-8 error: overlong 2-byte sequence\0"
"UTF-8 error: overlong 3-byte sequence\0"
"UTF-8 error: overlong 4-byte sequence\0"
/* 20 */
"UTF-8 error: overlong 5-byte sequence\0"
"UTF-8 error: overlong 6-byte sequence\0"
"UTF-8 error: isolated byte with 0x80 bit set\0"
"UTF-8 error: illegal byte (0xfe or 0xff)\0"
"UTF-16 error: missing low surrogate at end\0"
/* 25 */
"UTF-16 error: invalid low surrogate\0"
"UTF-16 error: isolated low surrogate\0"
"UTF-32 error: code points 0xd800-0xdfff are not defined\0"
"UTF-32 error: code points greater than 0x10ffff are not defined\0"
"bad data value\0"
/* 30 */
"patterns do not all use the same character tables\0"
"magic number missing\0"
"pattern compiled in wrong mode: 8/16/32-bit error\0"
"bad offset value\0"
"bad option value\0"
/* 35 */
"invalid replacement string\0"
"bad offset into UTF string\0"
"callout error code\0" /* Never returned by PCRE2 itself */
"invalid data in workspace for DFA restart\0"
"too much recursion for DFA matching\0"
/* 40 */
"backreference condition or recursion test is not supported for DFA matching\0"
"function is not supported for DFA matching\0"
"pattern contains an item that is not supported for DFA matching\0"
"workspace size exceeded in DFA matching\0"
"internal error - pattern overwritten?\0"
/* 45 */
"bad JIT option\0"
"JIT stack limit reached\0"
"match limit exceeded\0"
"no more memory\0"
"unknown substring\0"
/* 50 */
"non-unique substring name\0"
"NULL argument passed\0"
"nested recursion at the same subject position\0"
"matching depth limit exceeded\0"
"requested value is not available\0"
/* 55 */
"requested value is not set\0"
"offset limit set without PCRE2_USE_OFFSET_LIMIT\0"
"bad escape sequence in replacement string\0"
"expected closing curly bracket in replacement string\0"
"bad substitution in replacement string\0"
/* 60 */
"match with end before start is not supported\0"
"too many replacements (more than INT_MAX)\0"
"bad serialized data\0"
"heap limit exceeded\0"
"invalid syntax\0"
;
/*************************************************
* Return error message *
*************************************************/
/* This function copies an error message into a buffer whose units are of an
appropriate width. Error numbers are positive for compile-time errors, and
negative for match-time errors (except for UTF errors), but the numbers are all
distinct.
Arguments:
enumber error number
buffer where to put the message (zero terminated)
size size of the buffer in code units
Returns: length of message if all is well
negative on error
*/
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_get_error_message(int enumber, PCRE2_UCHAR *buffer, PCRE2_SIZE size)
{
const unsigned char *message;
PCRE2_SIZE i;
int n;
if (size == 0) return PCRE2_ERROR_NOMEMORY;
if (enumber >= COMPILE_ERROR_BASE) /* Compile error */
{
message = compile_error_texts;
n = enumber - COMPILE_ERROR_BASE;
}
else if (enumber < 0) /* Match or UTF error */
{
message = match_error_texts;
n = -enumber;
}
else /* Invalid error number */
{
message = (unsigned char *)"\0"; /* Empty message list */
n = 1;
}
for (; n > 0; n--)
{
while (*message++ != CHAR_NUL) {};
if (*message == CHAR_NUL) return PCRE2_ERROR_BADDATA;
}
for (i = 0; *message != 0; i++)
{
if (i >= size - 1)
{
buffer[i] = 0; /* Terminate partial message */
return PCRE2_ERROR_NOMEMORY;
}
buffer[i] = *message++;
}
buffer[i] = 0;
return (int)i;
}
/* End of pcre2_error.c */

View file

@ -0,0 +1,218 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
/* This module contains a single function that scans through a compiled pattern
until it finds a capturing bracket with the given number, or, if the number is
negative, an instance of OP_REVERSE for a lookbehind. The function is called
from pcre2_compile.c and also from pcre2_study.c when finding the minimum
matching length. */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "pcre2_internal.h"
/*************************************************
* Scan compiled regex for specific bracket *
*************************************************/
/*
Arguments:
code points to start of expression
utf TRUE in UTF mode
number the required bracket number or negative to find a lookbehind
Returns: pointer to the opcode for the bracket, or NULL if not found
*/
PCRE2_SPTR
PRIV(find_bracket)(PCRE2_SPTR code, BOOL utf, int number)
{
for (;;)
{
PCRE2_UCHAR c = *code;
if (c == OP_END) return NULL;
/* XCLASS is used for classes that cannot be represented just by a bit map.
This includes negated single high-valued characters. CALLOUT_STR is used for
callouts with string arguments. In both cases the length in the table is
zero; the actual length is stored in the compiled code. */
if (c == OP_XCLASS) code += GET(code, 1);
else if (c == OP_CALLOUT_STR) code += GET(code, 1 + 2*LINK_SIZE);
/* Handle lookbehind */
else if (c == OP_REVERSE)
{
if (number < 0) return (PCRE2_UCHAR *)code;
code += PRIV(OP_lengths)[c];
}
/* Handle capturing bracket */
else if (c == OP_CBRA || c == OP_SCBRA ||
c == OP_CBRAPOS || c == OP_SCBRAPOS)
{
int n = (int)GET2(code, 1+LINK_SIZE);
if (n == number) return (PCRE2_UCHAR *)code;
code += PRIV(OP_lengths)[c];
}
/* Otherwise, we can get the item's length from the table, except that for
repeated character types, we have to test for \p and \P, which have an extra
two bytes of parameters, and for MARK/PRUNE/SKIP/THEN with an argument, we
must add in its length. */
else
{
switch(c)
{
case OP_TYPESTAR:
case OP_TYPEMINSTAR:
case OP_TYPEPLUS:
case OP_TYPEMINPLUS:
case OP_TYPEQUERY:
case OP_TYPEMINQUERY:
case OP_TYPEPOSSTAR:
case OP_TYPEPOSPLUS:
case OP_TYPEPOSQUERY:
if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2;
break;
case OP_TYPEUPTO:
case OP_TYPEMINUPTO:
case OP_TYPEEXACT:
case OP_TYPEPOSUPTO:
if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)
code += 2;
break;
case OP_MARK:
case OP_PRUNE_ARG:
case OP_SKIP_ARG:
case OP_THEN_ARG:
code += code[1];
break;
}
/* Add in the fixed length from the table */
code += PRIV(OP_lengths)[c];
/* In UTF-8 and UTF-16 modes, opcodes that are followed by a character may be
followed by a multi-byte character. The length in the table is a minimum, so
we have to arrange to skip the extra bytes. */
#ifdef MAYBE_UTF_MULTI
if (utf) switch(c)
{
case OP_CHAR:
case OP_CHARI:
case OP_NOT:
case OP_NOTI:
case OP_EXACT:
case OP_EXACTI:
case OP_NOTEXACT:
case OP_NOTEXACTI:
case OP_UPTO:
case OP_UPTOI:
case OP_NOTUPTO:
case OP_NOTUPTOI:
case OP_MINUPTO:
case OP_MINUPTOI:
case OP_NOTMINUPTO:
case OP_NOTMINUPTOI:
case OP_POSUPTO:
case OP_POSUPTOI:
case OP_NOTPOSUPTO:
case OP_NOTPOSUPTOI:
case OP_STAR:
case OP_STARI:
case OP_NOTSTAR:
case OP_NOTSTARI:
case OP_MINSTAR:
case OP_MINSTARI:
case OP_NOTMINSTAR:
case OP_NOTMINSTARI:
case OP_POSSTAR:
case OP_POSSTARI:
case OP_NOTPOSSTAR:
case OP_NOTPOSSTARI:
case OP_PLUS:
case OP_PLUSI:
case OP_NOTPLUS:
case OP_NOTPLUSI:
case OP_MINPLUS:
case OP_MINPLUSI:
case OP_NOTMINPLUS:
case OP_NOTMINPLUSI:
case OP_POSPLUS:
case OP_POSPLUSI:
case OP_NOTPOSPLUS:
case OP_NOTPOSPLUSI:
case OP_QUERY:
case OP_QUERYI:
case OP_NOTQUERY:
case OP_NOTQUERYI:
case OP_MINQUERY:
case OP_MINQUERYI:
case OP_NOTMINQUERY:
case OP_NOTMINQUERYI:
case OP_POSQUERY:
case OP_POSQUERYI:
case OP_NOTPOSQUERY:
case OP_NOTPOSQUERYI:
if (HAS_EXTRALEN(code[-1])) code += GET_EXTRALEN(code[-1]);
break;
}
#else
(void)(utf); /* Keep compiler happy by referencing function argument */
#endif /* MAYBE_UTF_MULTI */
}
}
}
/* End of pcre2_find_bracket.c */

View file

@ -0,0 +1,896 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016-2017 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
/* This module contains mode-dependent macro and structure definitions. The
file is #included by pcre2_internal.h if PCRE2_CODE_UNIT_WIDTH is defined.
These mode-dependent items are kept in a separate file so that they can also be
#included multiple times for different code unit widths by pcre2test in order
to have access to the hidden structures at all supported widths.
Some of the mode-dependent macros are required at different widths for
different parts of the pcre2test code (in particular, the included
pcre_printint.c file). We undefine them here so that they can be re-defined for
multiple inclusions. Not all of these are used in pcre2test, but it's easier
just to undefine them all. */
#undef ACROSSCHAR
#undef BACKCHAR
#undef BYTES2CU
#undef CHMAX_255
#undef CU2BYTES
#undef FORWARDCHAR
#undef FORWARDCHARTEST
#undef GET
#undef GET2
#undef GETCHAR
#undef GETCHARINC
#undef GETCHARINCTEST
#undef GETCHARLEN
#undef GETCHARLENTEST
#undef GETCHARTEST
#undef GET_EXTRALEN
#undef HAS_EXTRALEN
#undef IMM2_SIZE
#undef MAX_255
#undef MAX_MARK
#undef MAX_PATTERN_SIZE
#undef MAX_UTF_SINGLE_CU
#undef NOT_FIRSTCU
#undef PUT
#undef PUT2
#undef PUT2INC
#undef PUTCHAR
#undef PUTINC
#undef TABLE_GET
/* -------------------------- MACROS ----------------------------- */
/* PCRE keeps offsets in its compiled code as at least 16-bit quantities
(always stored in big-endian order in 8-bit mode) by default. These are used,
for example, to link from the start of a subpattern to its alternatives and its
end. The use of 16 bits per offset limits the size of an 8-bit compiled regex
to around 64K, which is big enough for almost everybody. However, I received a
request for an even bigger limit. For this reason, and also to make the code
easier to maintain, the storing and loading of offsets from the compiled code
unit string is now handled by the macros that are defined here.
The macros are controlled by the value of LINK_SIZE. This defaults to 2, but
values of 3 or 4 are also supported. */
/* ------------------- 8-bit support ------------------ */
#if PCRE2_CODE_UNIT_WIDTH == 8
#if LINK_SIZE == 2
#define PUT(a,n,d) \
(a[n] = (PCRE2_UCHAR)((d) >> 8)), \
(a[(n)+1] = (PCRE2_UCHAR)((d) & 255))
#define GET(a,n) \
(unsigned int)(((a)[n] << 8) | (a)[(n)+1])
#define MAX_PATTERN_SIZE (1 << 16)
#elif LINK_SIZE == 3
#define PUT(a,n,d) \
(a[n] = (PCRE2_UCHAR)((d) >> 16)), \
(a[(n)+1] = (PCRE2_UCHAR)((d) >> 8)), \
(a[(n)+2] = (PCRE2_UCHAR)((d) & 255))
#define GET(a,n) \
(unsigned int)(((a)[n] << 16) | ((a)[(n)+1] << 8) | (a)[(n)+2])
#define MAX_PATTERN_SIZE (1 << 24)
#elif LINK_SIZE == 4
#define PUT(a,n,d) \
(a[n] = (PCRE2_UCHAR)((d) >> 24)), \
(a[(n)+1] = (PCRE2_UCHAR)((d) >> 16)), \
(a[(n)+2] = (PCRE2_UCHAR)((d) >> 8)), \
(a[(n)+3] = (PCRE2_UCHAR)((d) & 255))
#define GET(a,n) \
(unsigned int)(((a)[n] << 24) | ((a)[(n)+1] << 16) | ((a)[(n)+2] << 8) | (a)[(n)+3])
#define MAX_PATTERN_SIZE (1 << 30) /* Keep it positive */
#else
#error LINK_SIZE must be 2, 3, or 4
#endif
/* ------------------- 16-bit support ------------------ */
#elif PCRE2_CODE_UNIT_WIDTH == 16
#if LINK_SIZE == 2
#undef LINK_SIZE
#define LINK_SIZE 1
#define PUT(a,n,d) \
(a[n] = (PCRE2_UCHAR)(d))
#define GET(a,n) \
(a[n])
#define MAX_PATTERN_SIZE (1 << 16)
#elif LINK_SIZE == 3 || LINK_SIZE == 4
#undef LINK_SIZE
#define LINK_SIZE 2
#define PUT(a,n,d) \
(a[n] = (PCRE2_UCHAR)((d) >> 16)), \
(a[(n)+1] = (PCRE2_UCHAR)((d) & 65535))
#define GET(a,n) \
(unsigned int)(((a)[n] << 16) | (a)[(n)+1])
#define MAX_PATTERN_SIZE (1 << 30) /* Keep it positive */
#else
#error LINK_SIZE must be 2, 3, or 4
#endif
/* ------------------- 32-bit support ------------------ */
#elif PCRE2_CODE_UNIT_WIDTH == 32
#undef LINK_SIZE
#define LINK_SIZE 1
#define PUT(a,n,d) \
(a[n] = (d))
#define GET(a,n) \
(a[n])
#define MAX_PATTERN_SIZE (1 << 30) /* Keep it positive */
#else
#error Unsupported compiling mode
#endif
/* --------------- Other mode-specific macros ----------------- */
/* PCRE uses some other (at least) 16-bit quantities that do not change when
the size of offsets changes. There are used for repeat counts and for other
things such as capturing parenthesis numbers in back references.
Define the number of code units required to hold a 16-bit count/offset, and
macros to load and store such a value. For reasons that I do not understand,
the expression in the 8-bit GET2 macro is treated by gcc as a signed
expression, even when a is declared as unsigned. It seems that any kind of
arithmetic results in a signed value. Hence the cast. */
#if PCRE2_CODE_UNIT_WIDTH == 8
#define IMM2_SIZE 2
#define GET2(a,n) (unsigned int)(((a)[n] << 8) | (a)[(n)+1])
#define PUT2(a,n,d) a[n] = (d) >> 8, a[(n)+1] = (d) & 255
#else /* Code units are 16 or 32 bits */
#define IMM2_SIZE 1
#define GET2(a,n) a[n]
#define PUT2(a,n,d) a[n] = d
#endif
/* Other macros that are different for 8-bit mode. The MAX_255 macro checks
whether its argument, which is assumed to be one code unit, is less than 256.
The CHMAX_255 macro does not assume one code unit. The maximum length of a MARK
name must fit in one code unit; currently it is set to 255 or 65535. The
TABLE_GET macro is used to access elements of tables containing exactly 256
items. When code points can be greater than 255, a check is needed before
accessing these tables. */
#if PCRE2_CODE_UNIT_WIDTH == 8
#define MAX_255(c) TRUE
#define MAX_MARK ((1u << 8) - 1)
#ifdef SUPPORT_UNICODE
#define SUPPORT_WIDE_CHARS
#define CHMAX_255(c) ((c) <= 255u)
#else
#define CHMAX_255(c) TRUE
#endif /* SUPPORT_UNICODE */
#define TABLE_GET(c, table, default) ((table)[c])
#else /* Code units are 16 or 32 bits */
#define CHMAX_255(c) ((c) <= 255u)
#define MAX_255(c) ((c) <= 255u)
#define MAX_MARK ((1u << 16) - 1)
#define SUPPORT_WIDE_CHARS
#define TABLE_GET(c, table, default) (MAX_255(c)? ((table)[c]):(default))
#endif
/* ----------------- Character-handling macros ----------------- */
/* There is a proposed future special "UTF-21" mode, in which only the lowest
21 bits of a 32-bit character are interpreted as UTF, with the remaining 11
high-order bits available to the application for other uses. In preparation for
the future implementation of this mode, there are macros that load a data item
and, if in this special mode, mask it to 21 bits. These macros all have names
starting with UCHAR21. In all other modes, including the normal 32-bit
library, the macros all have the same simple definitions. When the new mode is
implemented, it is expected that these definitions will be varied appropriately
using #ifdef when compiling the library that supports the special mode. */
#define UCHAR21(eptr) (*(eptr))
#define UCHAR21TEST(eptr) (*(eptr))
#define UCHAR21INC(eptr) (*(eptr)++)
#define UCHAR21INCTEST(eptr) (*(eptr)++)
/* When UTF encoding is being used, a character is no longer just a single
byte in 8-bit mode or a single short in 16-bit mode. The macros for character
handling generate simple sequences when used in the basic mode, and more
complicated ones for UTF characters. GETCHARLENTEST and other macros are not
used when UTF is not supported. To make sure they can never even appear when
UTF support is omitted, we don't even define them. */
#ifndef SUPPORT_UNICODE
/* #define MAX_UTF_SINGLE_CU */
/* #define HAS_EXTRALEN(c) */
/* #define GET_EXTRALEN(c) */
/* #define NOT_FIRSTCU(c) */
#define GETCHAR(c, eptr) c = *eptr;
#define GETCHARTEST(c, eptr) c = *eptr;
#define GETCHARINC(c, eptr) c = *eptr++;
#define GETCHARINCTEST(c, eptr) c = *eptr++;
#define GETCHARLEN(c, eptr, len) c = *eptr;
#define PUTCHAR(c, p) (*p = c, 1)
/* #define GETCHARLENTEST(c, eptr, len) */
/* #define BACKCHAR(eptr) */
/* #define FORWARDCHAR(eptr) */
/* #define FORWARCCHARTEST(eptr,end) */
/* #define ACROSSCHAR(condition, eptr, action) */
#else /* SUPPORT_UNICODE */
/* ------------------- 8-bit support ------------------ */
#if PCRE2_CODE_UNIT_WIDTH == 8
#define MAYBE_UTF_MULTI /* UTF chars may use multiple code units */
/* The largest UTF code point that can be encoded as a single code unit. */
#define MAX_UTF_SINGLE_CU 127
/* Tests whether the code point needs extra characters to decode. */
#define HAS_EXTRALEN(c) HASUTF8EXTRALEN(c)
/* Returns with the additional number of characters if IS_MULTICHAR(c) is TRUE.
Otherwise it has an undefined behaviour. */
#define GET_EXTRALEN(c) (PRIV(utf8_table4)[(c) & 0x3fu])
/* Returns TRUE, if the given value is not the first code unit of a UTF
sequence. */
#define NOT_FIRSTCU(c) (((c) & 0xc0u) == 0x80u)
/* Get the next UTF-8 character, not advancing the pointer. This is called when
we know we are in UTF-8 mode. */
#define GETCHAR(c, eptr) \
c = *eptr; \
if (c >= 0xc0u) GETUTF8(c, eptr);
/* Get the next UTF-8 character, testing for UTF-8 mode, and not advancing the
pointer. */
#define GETCHARTEST(c, eptr) \
c = *eptr; \
if (utf && c >= 0xc0u) GETUTF8(c, eptr);
/* Get the next UTF-8 character, advancing the pointer. This is called when we
know we are in UTF-8 mode. */
#define GETCHARINC(c, eptr) \
c = *eptr++; \
if (c >= 0xc0u) GETUTF8INC(c, eptr);
/* Get the next character, testing for UTF-8 mode, and advancing the pointer.
This is called when we don't know if we are in UTF-8 mode. */
#define GETCHARINCTEST(c, eptr) \
c = *eptr++; \
if (utf && c >= 0xc0u) GETUTF8INC(c, eptr);
/* Get the next UTF-8 character, not advancing the pointer, incrementing length
if there are extra bytes. This is called when we know we are in UTF-8 mode. */
#define GETCHARLEN(c, eptr, len) \
c = *eptr; \
if (c >= 0xc0u) GETUTF8LEN(c, eptr, len);
/* Get the next UTF-8 character, testing for UTF-8 mode, not advancing the
pointer, incrementing length if there are extra bytes. This is called when we
do not know if we are in UTF-8 mode. */
#define GETCHARLENTEST(c, eptr, len) \
c = *eptr; \
if (utf && c >= 0xc0u) GETUTF8LEN(c, eptr, len);
/* If the pointer is not at the start of a character, move it back until
it is. This is called only in UTF-8 mode - we don't put a test within the macro
because almost all calls are already within a block of UTF-8 only code. */
#define BACKCHAR(eptr) while((*eptr & 0xc0u) == 0x80u) eptr--
/* Same as above, just in the other direction. */
#define FORWARDCHAR(eptr) while((*eptr & 0xc0u) == 0x80u) eptr++
#define FORWARDCHARTEST(eptr,end) while(eptr < end && (*eptr & 0xc0u) == 0x80u) eptr++
/* Same as above, but it allows a fully customizable form. */
#define ACROSSCHAR(condition, eptr, action) \
while((condition) && ((eptr) & 0xc0u) == 0x80u) action
/* Deposit a character into memory, returning the number of code units. */
#define PUTCHAR(c, p) ((utf && c > MAX_UTF_SINGLE_CU)? \
PRIV(ord2utf)(c,p) : (*p = c, 1))
/* ------------------- 16-bit support ------------------ */
#elif PCRE2_CODE_UNIT_WIDTH == 16
#define MAYBE_UTF_MULTI /* UTF chars may use multiple code units */
/* The largest UTF code point that can be encoded as a single code unit. */
#define MAX_UTF_SINGLE_CU 65535
/* Tests whether the code point needs extra characters to decode. */
#define HAS_EXTRALEN(c) (((c) & 0xfc00u) == 0xd800u)
/* Returns with the additional number of characters if IS_MULTICHAR(c) is TRUE.
Otherwise it has an undefined behaviour. */
#define GET_EXTRALEN(c) 1
/* Returns TRUE, if the given value is not the first code unit of a UTF
sequence. */
#define NOT_FIRSTCU(c) (((c) & 0xfc00u) == 0xdc00u)
/* Base macro to pick up the low surrogate of a UTF-16 character, not
advancing the pointer. */
#define GETUTF16(c, eptr) \
{ c = (((c & 0x3ffu) << 10) | (eptr[1] & 0x3ffu)) + 0x10000u; }
/* Get the next UTF-16 character, not advancing the pointer. This is called when
we know we are in UTF-16 mode. */
#define GETCHAR(c, eptr) \
c = *eptr; \
if ((c & 0xfc00u) == 0xd800u) GETUTF16(c, eptr);
/* Get the next UTF-16 character, testing for UTF-16 mode, and not advancing the
pointer. */
#define GETCHARTEST(c, eptr) \
c = *eptr; \
if (utf && (c & 0xfc00u) == 0xd800u) GETUTF16(c, eptr);
/* Base macro to pick up the low surrogate of a UTF-16 character, advancing
the pointer. */
#define GETUTF16INC(c, eptr) \
{ c = (((c & 0x3ffu) << 10) | (*eptr++ & 0x3ffu)) + 0x10000u; }
/* Get the next UTF-16 character, advancing the pointer. This is called when we
know we are in UTF-16 mode. */
#define GETCHARINC(c, eptr) \
c = *eptr++; \
if ((c & 0xfc00u) == 0xd800u) GETUTF16INC(c, eptr);
/* Get the next character, testing for UTF-16 mode, and advancing the pointer.
This is called when we don't know if we are in UTF-16 mode. */
#define GETCHARINCTEST(c, eptr) \
c = *eptr++; \
if (utf && (c & 0xfc00u) == 0xd800u) GETUTF16INC(c, eptr);
/* Base macro to pick up the low surrogate of a UTF-16 character, not
advancing the pointer, incrementing the length. */
#define GETUTF16LEN(c, eptr, len) \
{ c = (((c & 0x3ffu) << 10) | (eptr[1] & 0x3ffu)) + 0x10000u; len++; }
/* Get the next UTF-16 character, not advancing the pointer, incrementing
length if there is a low surrogate. This is called when we know we are in
UTF-16 mode. */
#define GETCHARLEN(c, eptr, len) \
c = *eptr; \
if ((c & 0xfc00u) == 0xd800u) GETUTF16LEN(c, eptr, len);
/* Get the next UTF-816character, testing for UTF-16 mode, not advancing the
pointer, incrementing length if there is a low surrogate. This is called when
we do not know if we are in UTF-16 mode. */
#define GETCHARLENTEST(c, eptr, len) \
c = *eptr; \
if (utf && (c & 0xfc00u) == 0xd800u) GETUTF16LEN(c, eptr, len);
/* If the pointer is not at the start of a character, move it back until
it is. This is called only in UTF-16 mode - we don't put a test within the
macro because almost all calls are already within a block of UTF-16 only
code. */
#define BACKCHAR(eptr) if ((*eptr & 0xfc00u) == 0xdc00u) eptr--
/* Same as above, just in the other direction. */
#define FORWARDCHAR(eptr) if ((*eptr & 0xfc00u) == 0xdc00u) eptr++
#define FORWARDCHARTEST(eptr,end) if (eptr < end && (*eptr & 0xfc00u) == 0xdc00u) eptr++
/* Same as above, but it allows a fully customizable form. */
#define ACROSSCHAR(condition, eptr, action) \
if ((condition) && ((eptr) & 0xfc00u) == 0xdc00u) action
/* Deposit a character into memory, returning the number of code units. */
#define PUTCHAR(c, p) ((utf && c > MAX_UTF_SINGLE_CU)? \
PRIV(ord2utf)(c,p) : (*p = c, 1))
/* ------------------- 32-bit support ------------------ */
#else
/* These are trivial for the 32-bit library, since all UTF-32 characters fit
into one PCRE2_UCHAR unit. */
#define MAX_UTF_SINGLE_CU (0x10ffffu)
#define HAS_EXTRALEN(c) (0)
#define GET_EXTRALEN(c) (0)
#define NOT_FIRSTCU(c) (0)
/* Get the next UTF-32 character, not advancing the pointer. This is called when
we know we are in UTF-32 mode. */
#define GETCHAR(c, eptr) \
c = *(eptr);
/* Get the next UTF-32 character, testing for UTF-32 mode, and not advancing the
pointer. */
#define GETCHARTEST(c, eptr) \
c = *(eptr);
/* Get the next UTF-32 character, advancing the pointer. This is called when we
know we are in UTF-32 mode. */
#define GETCHARINC(c, eptr) \
c = *((eptr)++);
/* Get the next character, testing for UTF-32 mode, and advancing the pointer.
This is called when we don't know if we are in UTF-32 mode. */
#define GETCHARINCTEST(c, eptr) \
c = *((eptr)++);
/* Get the next UTF-32 character, not advancing the pointer, not incrementing
length (since all UTF-32 is of length 1). This is called when we know we are in
UTF-32 mode. */
#define GETCHARLEN(c, eptr, len) \
GETCHAR(c, eptr)
/* Get the next UTF-32character, testing for UTF-32 mode, not advancing the
pointer, not incrementing the length (since all UTF-32 is of length 1).
This is called when we do not know if we are in UTF-32 mode. */
#define GETCHARLENTEST(c, eptr, len) \
GETCHARTEST(c, eptr)
/* If the pointer is not at the start of a character, move it back until
it is. This is called only in UTF-32 mode - we don't put a test within the
macro because almost all calls are already within a block of UTF-32 only
code.
These are all no-ops since all UTF-32 characters fit into one pcre_uchar. */
#define BACKCHAR(eptr) do { } while (0)
/* Same as above, just in the other direction. */
#define FORWARDCHAR(eptr) do { } while (0)
#define FORWARDCHARTEST(eptr,end) do { } while (0)
/* Same as above, but it allows a fully customizable form. */
#define ACROSSCHAR(condition, eptr, action) do { } while (0)
/* Deposit a character into memory, returning the number of code units. */
#define PUTCHAR(c, p) (*p = c, 1)
#endif /* UTF-32 character handling */
#endif /* SUPPORT_UNICODE */
/* Mode-dependent macros that have the same definition in all modes. */
#define CU2BYTES(x) ((x)*((PCRE2_CODE_UNIT_WIDTH/8)))
#define BYTES2CU(x) ((x)/((PCRE2_CODE_UNIT_WIDTH/8)))
#define PUTINC(a,n,d) PUT(a,n,d), a += LINK_SIZE
#define PUT2INC(a,n,d) PUT2(a,n,d), a += IMM2_SIZE
/* ----------------------- HIDDEN STRUCTURES ----------------------------- */
/* NOTE: All these structures *must* start with a pcre2_memctl structure. The
code that uses them is simpler because it assumes this. */
/* The real general context structure. At present it holds only data for custom
memory control. */
typedef struct pcre2_real_general_context {
pcre2_memctl memctl;
} pcre2_real_general_context;
/* The real compile context structure */
typedef struct pcre2_real_compile_context {
pcre2_memctl memctl;
int (*stack_guard)(uint32_t, void *);
void *stack_guard_data;
const uint8_t *tables;
PCRE2_SIZE max_pattern_length;
uint16_t bsr_convention;
uint16_t newline_convention;
uint32_t parens_nest_limit;
uint32_t extra_options;
} pcre2_real_compile_context;
/* The real match context structure. */
typedef struct pcre2_real_match_context {
pcre2_memctl memctl;
#ifdef SUPPORT_JIT
pcre2_jit_callback jit_callback;
void *jit_callback_data;
#endif
int (*callout)(pcre2_callout_block *, void *);
void *callout_data;
PCRE2_SIZE offset_limit;
uint32_t heap_limit;
uint32_t match_limit;
uint32_t depth_limit;
} pcre2_real_match_context;
/* The real convert context structure. */
typedef struct pcre2_real_convert_context {
pcre2_memctl memctl;
uint32_t glob_separator;
uint32_t glob_escape;
} pcre2_real_convert_context;
/* The real compiled code structure. The type for the blocksize field is
defined specially because it is required in pcre2_serialize_decode() when
copying the size from possibly unaligned memory into a variable of the same
type. Use a macro rather than a typedef to avoid compiler warnings when this
file is included multiple times by pcre2test. LOOKBEHIND_MAX specifies the
largest lookbehind that is supported. (OP_REVERSE in a pattern has a 16-bit
argument in 8-bit and 16-bit modes, so we need no more than a 16-bit field
here.) */
#undef CODE_BLOCKSIZE_TYPE
#define CODE_BLOCKSIZE_TYPE size_t
#undef LOOKBEHIND_MAX
#define LOOKBEHIND_MAX UINT16_MAX
typedef struct pcre2_real_code {
pcre2_memctl memctl; /* Memory control fields */
const uint8_t *tables; /* The character tables */
void *executable_jit; /* Pointer to JIT code */
uint8_t start_bitmap[32]; /* Bitmap for starting code unit < 256 */
CODE_BLOCKSIZE_TYPE blocksize; /* Total (bytes) that was malloc-ed */
uint32_t magic_number; /* Paranoid and endianness check */
uint32_t compile_options; /* Options passed to pcre2_compile() */
uint32_t overall_options; /* Options after processing the pattern */
uint32_t flags; /* Various state flags */
uint32_t limit_heap; /* Limit set in the pattern */
uint32_t limit_match; /* Limit set in the pattern */
uint32_t limit_depth; /* Limit set in the pattern */
uint32_t first_codeunit; /* Starting code unit */
uint32_t last_codeunit; /* This codeunit must be seen */
uint16_t bsr_convention; /* What \R matches */
uint16_t newline_convention; /* What is a newline? */
uint16_t max_lookbehind; /* Longest lookbehind (characters) */
uint16_t minlength; /* Minimum length of match */
uint16_t top_bracket; /* Highest numbered group */
uint16_t top_backref; /* Highest numbered back reference */
uint16_t name_entry_size; /* Size (code units) of table entries */
uint16_t name_count; /* Number of name entries in the table */
} pcre2_real_code;
/* The real match data structure. Define ovector large so that array bound
checkers don't grumble. Memory for this structure is obtained by calling
pcre2_match_data_create(), which sets the size as the offset of ovector plus
pairs of elements for each capturing group. (See also the heapframe structure
below.) */
typedef struct pcre2_real_match_data {
pcre2_memctl memctl;
const pcre2_real_code *code; /* The pattern used for the match */
PCRE2_SPTR subject; /* The subject that was matched */
PCRE2_SPTR mark; /* Pointer to last mark */
PCRE2_SIZE leftchar; /* Offset to leftmost code unit */
PCRE2_SIZE rightchar; /* Offset to rightmost code unit */
PCRE2_SIZE startchar; /* Offset to starting code unit */
uint16_t matchedby; /* Type of match (normal, JIT, DFA) */
uint16_t oveccount; /* Number of pairs */
int rc; /* The return code from the match */
PCRE2_SIZE ovector[10000];/* The first field */
} pcre2_real_match_data;
/* ----------------------- PRIVATE STRUCTURES ----------------------------- */
/* These structures are not needed for pcre2test. */
#ifndef PCRE2_PCRE2TEST
/* Structures for checking for mutual recursion when scanning compiled or
parsed code. */
typedef struct recurse_check {
struct recurse_check *prev;
PCRE2_SPTR group;
} recurse_check;
typedef struct parsed_recurse_check {
struct parsed_recurse_check *prev;
uint32_t *groupptr;
} parsed_recurse_check;
/* Structure for building a cache when filling in recursion offsets. */
typedef struct recurse_cache {
PCRE2_SPTR group;
int groupnumber;
} recurse_cache;
/* Structure for maintaining a chain of pointers to the currently incomplete
branches, for testing for left recursion while compiling. */
typedef struct branch_chain {
struct branch_chain *outer;
PCRE2_UCHAR *current_branch;
} branch_chain;
/* Structure for building a list of named groups during the first pass of
compiling. */
typedef struct named_group {
PCRE2_SPTR name; /* Points to the name in the pattern */
uint32_t number; /* Group number */
uint16_t length; /* Length of the name */
uint16_t isdup; /* TRUE if a duplicate */
} named_group;
/* Structure for passing "static" information around between the functions
doing the compiling, so that they are thread-safe. */
typedef struct compile_block {
pcre2_real_compile_context *cx; /* Points to the compile context */
const uint8_t *lcc; /* Points to lower casing table */
const uint8_t *fcc; /* Points to case-flipping table */
const uint8_t *cbits; /* Points to character type table */
const uint8_t *ctypes; /* Points to table of type maps */
PCRE2_SPTR start_workspace; /* The start of working space */
PCRE2_SPTR start_code; /* The start of the compiled code */
PCRE2_SPTR start_pattern; /* The start of the pattern */
PCRE2_SPTR end_pattern; /* The end of the pattern */
PCRE2_UCHAR *name_table; /* The name/number table */
PCRE2_SIZE workspace_size; /* Size of workspace */
PCRE2_SIZE small_ref_offset[10]; /* Offsets for \1 to \9 */
PCRE2_SIZE erroroffset; /* Offset of error in pattern */
uint16_t names_found; /* Number of entries so far */
uint16_t name_entry_size; /* Size of each entry */
open_capitem *open_caps; /* Chain of open capture items */
named_group *named_groups; /* Points to vector in pre-compile */
uint32_t named_group_list_size; /* Number of entries in the list */
uint32_t external_options; /* External (initial) options */
uint32_t external_flags; /* External flag bits to be set */
uint32_t bracount; /* Count of capturing parentheses */
uint32_t lastcapture; /* Last capture encountered */
uint32_t *parsed_pattern; /* Parsed pattern buffer */
uint32_t *parsed_pattern_end; /* Parsed pattern should not get here */
uint32_t *groupinfo; /* Group info vector */
uint32_t top_backref; /* Maximum back reference */
uint32_t backref_map; /* Bitmap of low back refs */
uint32_t nltype; /* Newline type */
uint32_t nllen; /* Newline string length */
uint32_t class_range_start; /* Overall class range start */
uint32_t class_range_end; /* Overall class range end */
PCRE2_UCHAR nl[4]; /* Newline string when fixed length */
int max_lookbehind; /* Maximum lookbehind (characters) */
int parens_depth; /* Depth of nested parentheses */
int assert_depth; /* Depth of nested assertions */
int req_varyopt; /* "After variable item" flag for reqbyte */
BOOL had_accept; /* (*ACCEPT) encountered */
BOOL had_pruneorskip; /* (*PRUNE) or (*SKIP) encountered */
BOOL had_recurse; /* Had a recursion or subroutine call */
BOOL dupnames; /* Duplicate names exist */
} compile_block;
/* Structure for keeping the properties of the in-memory stack used
by the JIT matcher. */
typedef struct pcre2_real_jit_stack {
pcre2_memctl memctl;
void* stack;
} pcre2_real_jit_stack;
/* Structure for items in a linked list that represents an explicit recursive
call within the pattern when running pcre_dfa_match(). */
typedef struct dfa_recursion_info {
struct dfa_recursion_info *prevrec;
PCRE2_SPTR subject_position;
uint32_t group_num;
} dfa_recursion_info;
/* Structure for "stack" frames that are used for remembering backtracking
positions during matching. As these are used in a vector, with the ovector item
being extended, the size of the structure must be a multiple of PCRE2_SIZE. The
only way to check this at compile time is to force an error by generating an
array with a negative size. By putting this in a typedef (which is never used),
we don't generate any code when all is well. */
typedef struct heapframe {
/* The first set of fields are variables that have to be preserved over calls
to RRMATCH(), but which do not need to be copied to new frames. */
PCRE2_SPTR ecode; /* The current position in the pattern */
PCRE2_SPTR temp_sptr[2]; /* Used for short-term PCRE_SPTR values */
PCRE2_SIZE length; /* Used for character, string, or code lengths */
PCRE2_SIZE back_frame; /* Amount to subtract on RRETURN */
PCRE2_SIZE temp_size; /* Used for short-term PCRE2_SIZE values */
uint32_t rdepth; /* "Recursion" depth */
uint32_t group_frame_type; /* Type information for group frames */
uint32_t temp_32[4]; /* Used for short-term 32-bit or BOOL values */
uint8_t return_id; /* Where to go on in internal "return" */
uint8_t op; /* Processing opcode */
#if PCRE2_CODE_UNIT_WIDTH == 8
PCRE2_UCHAR occu[6]; /* Used for other case code units */
#elif PCRE2_CODE_UNIT_WIDTH == 16
PCRE2_UCHAR occu[2]; /* Used for other case code units */
#else
PCRE2_UCHAR occu[1]; /* Used for other case code units */
#endif
/* The rest have to be copied from the previous frame whenever a new frame
becomes current. The final field is specified as a large vector so that
runtime array bound checks don't catch references to it. However, for any
specific call to pcre2_match() the memory allocated for each frame structure
allows for exactly the right size ovector for the number of capturing
parentheses. */
PCRE2_SPTR eptr; /* MUST BE FIRST */
PCRE2_SPTR start_match; /* Can be adjusted by \K */
PCRE2_SPTR mark; /* Most recent mark on the success path */
uint32_t current_recurse; /* Current (deepest) recursion number */
uint32_t capture_last; /* Most recent capture */
PCRE2_SIZE last_group_offset; /* Saved offset to most recent group frame */
PCRE2_SIZE offset_top; /* Offset after highest capture */
PCRE2_SIZE ovector[10000]; /* Must be last in the structure */
} heapframe;
typedef char check_heapframe_size[
((sizeof(heapframe) % sizeof(PCRE2_SIZE)) == 0)? (+1):(-1)];
/* Structure for passing "static" information around between the functions
doing traditional NFA matching (pcre2_match() and friends). */
typedef struct match_block {
pcre2_memctl memctl; /* For general use */
PCRE2_SIZE frame_vector_size; /* Size of a backtracking frame */
heapframe *match_frames; /* Points to vector of frames */
heapframe *match_frames_top; /* Points after the end of the vector */
heapframe *stack_frames; /* The original vector on the stack */
PCRE2_SIZE heap_limit; /* As it says */
uint32_t match_limit; /* As it says */
uint32_t match_limit_depth; /* As it says */
uint32_t match_call_count; /* Number of times a new frame is created */
BOOL hitend; /* Hit the end of the subject at some point */
BOOL hasthen; /* Pattern contains (*THEN) */
const uint8_t *lcc; /* Points to lower casing table */
const uint8_t *fcc; /* Points to case-flipping table */
const uint8_t *ctypes; /* Points to table of type maps */
PCRE2_SIZE start_offset; /* The start offset value */
PCRE2_SIZE end_offset_top; /* Highwater mark at end of match */
uint16_t partial; /* PARTIAL options */
uint16_t bsr_convention; /* \R interpretation */
uint16_t name_count; /* Number of names in name table */
uint16_t name_entry_size; /* Size of entry in names table */
PCRE2_SPTR name_table; /* Table of group names */
PCRE2_SPTR start_code; /* For use when recursing */
PCRE2_SPTR start_subject; /* Start of the subject string */
PCRE2_SPTR end_subject; /* End of the subject string */
PCRE2_SPTR end_match_ptr; /* Subject position at end match */
PCRE2_SPTR start_used_ptr; /* Earliest consulted character */
PCRE2_SPTR last_used_ptr; /* Latest consulted character */
PCRE2_SPTR mark; /* Mark pointer to pass back on success */
PCRE2_SPTR nomatch_mark; /* Mark pointer to pass back on failure */
PCRE2_SPTR verb_ecode_ptr; /* For passing back info */
PCRE2_SPTR verb_skip_ptr; /* For passing back a (*SKIP) name */
uint32_t verb_current_recurse; /* Current recurse when (*VERB) happens */
uint32_t moptions; /* Match options */
uint32_t poptions; /* Pattern options */
uint32_t skip_arg_count; /* For counting SKIP_ARGs */
uint32_t ignore_skip_arg; /* For re-run when SKIP arg name not found */
uint32_t nltype; /* Newline type */
uint32_t nllen; /* Newline string length */
PCRE2_UCHAR nl[4]; /* Newline string when fixed */
void *callout_data; /* To pass back to callouts */
int (*callout)(pcre2_callout_block *,void *); /* Callout function or NULL */
} match_block;
/* A similar structure is used for the same purpose by the DFA matching
functions. */
typedef struct dfa_match_block {
pcre2_memctl memctl; /* For general use */
PCRE2_SPTR start_code; /* Start of the compiled pattern */
PCRE2_SPTR start_subject ; /* Start of the subject string */
PCRE2_SPTR end_subject; /* End of subject string */
PCRE2_SPTR start_used_ptr; /* Earliest consulted character */
PCRE2_SPTR last_used_ptr; /* Latest consulted character */
const uint8_t *tables; /* Character tables */
PCRE2_SIZE start_offset; /* The start offset value */
uint32_t match_limit; /* As it says */
uint32_t match_limit_depth; /* As it says */
uint32_t match_call_count; /* Number of calls of internal function */
uint32_t moptions; /* Match options */
uint32_t poptions; /* Pattern options */
uint32_t nltype; /* Newline type */
uint32_t nllen; /* Newline string length */
PCRE2_UCHAR nl[4]; /* Newline string when fixed */
uint16_t bsr_convention; /* \R interpretation */
void *callout_data; /* To pass back to callouts */
int (*callout)(pcre2_callout_block *,void *); /* Callout function or NULL */
dfa_recursion_info *recursive; /* Linked list of recursion data */
} dfa_match_block;
#endif /* PCRE2_PCRE2TEST */
/* End of pcre2_intmodedep.h */

View file

@ -0,0 +1,189 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
#ifndef INCLUDED_FROM_PCRE2_JIT_COMPILE
#error This file must be included from pcre2_jit_compile.c.
#endif
#ifdef SUPPORT_JIT
static SLJIT_NOINLINE int jit_machine_stack_exec(jit_arguments *arguments, jit_function executable_func)
{
sljit_u8 local_space[MACHINE_STACK_SIZE];
struct sljit_stack local_stack;
local_stack.max_limit = local_space;
local_stack.limit = local_space;
local_stack.base = local_space + MACHINE_STACK_SIZE;
local_stack.top = local_space + MACHINE_STACK_SIZE;
arguments->stack = &local_stack;
return executable_func(arguments);
}
#endif
/*************************************************
* Do a JIT pattern match *
*************************************************/
/* This function runs a JIT pattern match.
Arguments:
code points to the compiled expression
subject points to the subject string
length length of subject string (may contain binary zeros)
start_offset where to start in the subject string
options option bits
match_data points to a match_data block
mcontext points to a match context
jit_stack points to a JIT stack
Returns: > 0 => success; value is the number of ovector pairs filled
= 0 => success, but ovector is not big enough
-1 => failed to match (PCRE_ERROR_NOMATCH)
< -1 => some kind of unexpected problem
*/
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_jit_match(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length,
PCRE2_SIZE start_offset, uint32_t options, pcre2_match_data *match_data,
pcre2_match_context *mcontext)
{
#ifndef SUPPORT_JIT
(void)code;
(void)subject;
(void)length;
(void)start_offset;
(void)options;
(void)match_data;
(void)mcontext;
return PCRE2_ERROR_JIT_BADOPTION;
#else /* SUPPORT_JIT */
pcre2_real_code *re = (pcre2_real_code *)code;
executable_functions *functions = (executable_functions *)re->executable_jit;
pcre2_jit_stack *jit_stack;
uint32_t oveccount = match_data->oveccount;
uint32_t max_oveccount;
union {
void *executable_func;
jit_function call_executable_func;
} convert_executable_func;
jit_arguments arguments;
int rc;
int index = 0;
if ((options & PCRE2_PARTIAL_HARD) != 0)
index = 2;
else if ((options & PCRE2_PARTIAL_SOFT) != 0)
index = 1;
if (functions->executable_funcs[index] == NULL)
return PCRE2_ERROR_JIT_BADOPTION;
/* Sanity checks should be handled by pcre_exec. */
arguments.str = subject + start_offset;
arguments.begin = subject;
arguments.end = subject + length;
arguments.match_data = match_data;
arguments.startchar_ptr = subject;
arguments.mark_ptr = NULL;
arguments.options = options;
if (mcontext != NULL)
{
arguments.callout = mcontext->callout;
arguments.callout_data = mcontext->callout_data;
arguments.offset_limit = mcontext->offset_limit;
arguments.limit_match = (mcontext->match_limit < re->limit_match)?
mcontext->match_limit : re->limit_match;
if (mcontext->jit_callback != NULL)
jit_stack = mcontext->jit_callback(mcontext->jit_callback_data);
else
jit_stack = (pcre2_jit_stack *)mcontext->jit_callback_data;
}
else
{
arguments.callout = NULL;
arguments.callout_data = NULL;
arguments.offset_limit = PCRE2_UNSET;
arguments.limit_match = (MATCH_LIMIT < re->limit_match)?
MATCH_LIMIT : re->limit_match;
jit_stack = NULL;
}
/* JIT only need two offsets for each ovector entry. Hence
the last 1/3 of the ovector will never be touched. */
max_oveccount = functions->top_bracket;
if (oveccount > max_oveccount)
oveccount = max_oveccount;
arguments.oveccount = oveccount << 1;
convert_executable_func.executable_func = functions->executable_funcs[index];
if (jit_stack != NULL)
{
arguments.stack = (struct sljit_stack *)(jit_stack->stack);
rc = convert_executable_func.call_executable_func(&arguments);
}
else
rc = jit_machine_stack_exec(&arguments, convert_executable_func.call_executable_func);
if (rc > (int)oveccount)
rc = 0;
match_data->code = re;
match_data->subject = subject;
match_data->rc = rc;
match_data->startchar = arguments.startchar_ptr - subject;
match_data->leftchar = 0;
match_data->rightchar = 0;
match_data->mark = arguments.mark_ptr;
match_data->matchedby = PCRE2_MATCHEDBY_JIT;
return match_data->rc;
#endif /* SUPPORT_JIT */
}
/* End of pcre2_jit_match.c */

View file

@ -0,0 +1,227 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
#ifndef INCLUDED_FROM_PCRE2_JIT_COMPILE
#error This file must be included from pcre2_jit_compile.c.
#endif
/*************************************************
* Free JIT read-only data *
*************************************************/
void
PRIV(jit_free_rodata)(void *current, void *allocator_data)
{
#ifndef SUPPORT_JIT
(void)current;
(void)allocator_data;
#else /* SUPPORT_JIT */
void *next;
SLJIT_UNUSED_ARG(allocator_data);
while (current != NULL)
{
next = *(void**)current;
SLJIT_FREE(current, allocator_data);
current = next;
}
#endif /* SUPPORT_JIT */
}
/*************************************************
* Free JIT compiled code *
*************************************************/
void
PRIV(jit_free)(void *executable_jit, pcre2_memctl *memctl)
{
#ifndef SUPPORT_JIT
(void)executable_jit;
(void)memctl;
#else /* SUPPORT_JIT */
executable_functions *functions = (executable_functions *)executable_jit;
void *allocator_data = memctl;
int i;
for (i = 0; i < JIT_NUMBER_OF_COMPILE_MODES; i++)
{
if (functions->executable_funcs[i] != NULL)
sljit_free_code(functions->executable_funcs[i]);
PRIV(jit_free_rodata)(functions->read_only_data_heads[i], allocator_data);
}
SLJIT_FREE(functions, allocator_data);
#endif /* SUPPORT_JIT */
}
/*************************************************
* Free unused JIT memory *
*************************************************/
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
pcre2_jit_free_unused_memory(pcre2_general_context *gcontext)
{
#ifndef SUPPORT_JIT
(void)gcontext; /* Suppress warning */
#else /* SUPPORT_JIT */
SLJIT_UNUSED_ARG(gcontext);
sljit_free_unused_memory_exec();
#endif /* SUPPORT_JIT */
}
/*************************************************
* Allocate a JIT stack *
*************************************************/
PCRE2_EXP_DEFN pcre2_jit_stack * PCRE2_CALL_CONVENTION
pcre2_jit_stack_create(size_t startsize, size_t maxsize,
pcre2_general_context *gcontext)
{
#ifndef SUPPORT_JIT
(void)gcontext;
(void)startsize;
(void)maxsize;
return NULL;
#else /* SUPPORT_JIT */
pcre2_jit_stack *jit_stack;
if (startsize < 1 || maxsize < 1)
return NULL;
if (startsize > maxsize)
startsize = maxsize;
startsize = (startsize + STACK_GROWTH_RATE - 1) & ~(STACK_GROWTH_RATE - 1);
maxsize = (maxsize + STACK_GROWTH_RATE - 1) & ~(STACK_GROWTH_RATE - 1);
jit_stack = PRIV(memctl_malloc)(sizeof(pcre2_real_jit_stack), (pcre2_memctl *)gcontext);
if (jit_stack == NULL) return NULL;
jit_stack->stack = sljit_allocate_stack(startsize, maxsize, &jit_stack->memctl);
return jit_stack;
#endif
}
/*************************************************
* Assign a JIT stack to a pattern *
*************************************************/
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
pcre2_jit_stack_assign(pcre2_match_context *mcontext, pcre2_jit_callback callback,
void *callback_data)
{
#ifndef SUPPORT_JIT
(void)mcontext;
(void)callback;
(void)callback_data;
#else /* SUPPORT_JIT */
if (mcontext == NULL) return;
mcontext->jit_callback = callback;
mcontext->jit_callback_data = callback_data;
#endif /* SUPPORT_JIT */
}
/*************************************************
* Free a JIT stack *
*************************************************/
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
pcre2_jit_stack_free(pcre2_jit_stack *jit_stack)
{
#ifndef SUPPORT_JIT
(void)jit_stack;
#else /* SUPPORT_JIT */
if (jit_stack != NULL)
{
sljit_free_stack((struct sljit_stack *)(jit_stack->stack), &jit_stack->memctl);
jit_stack->memctl.free(jit_stack, jit_stack->memctl.memory_data);
}
#endif /* SUPPORT_JIT */
}
/*************************************************
* Get target CPU type *
*************************************************/
const char*
PRIV(jit_get_target)(void)
{
#ifndef SUPPORT_JIT
return "JIT is not supported";
#else /* SUPPORT_JIT */
return sljit_get_platform_name();
#endif /* SUPPORT_JIT */
}
/*************************************************
* Get size of JIT code *
*************************************************/
size_t
PRIV(jit_get_size)(void *executable_jit)
{
#ifndef SUPPORT_JIT
(void)executable_jit;
return 0;
#else /* SUPPORT_JIT */
sljit_uw *executable_sizes = ((executable_functions *)executable_jit)->executable_sizes;
SLJIT_COMPILE_ASSERT(JIT_NUMBER_OF_COMPILE_MODES == 3, number_of_compile_modes_changed);
return executable_sizes[0] + executable_sizes[1] + executable_sizes[2];
#endif
}
/* End of pcre2_jit_misc.c */

View file

@ -6,7 +6,8 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2012 University of Cambridge
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@ -38,53 +39,53 @@ POSSIBILITY OF SUCH DAMAGE.
*/
/* This module contains the external function pcre_maketables(), which builds
character tables for PCRE in the current locale. The file is compiled on its
own as part of the PCRE library. However, it is also included in the
/* This module contains the external function pcre2_maketables(), which builds
character tables for PCRE2 in the current locale. The file is compiled on its
own as part of the PCRE2 library. However, it is also included in the
compilation of dftables.c, in which case the macro DFTABLES is defined. */
#ifndef DFTABLES
# ifdef HAVE_CONFIG_H
# include "config.h"
# endif
# include "pcre_internal.h"
# include "pcre2_internal.h"
#endif
/*************************************************
* Create PCRE character tables *
* Create PCRE2 character tables *
*************************************************/
/* This function builds a set of character tables for use by PCRE and returns
/* This function builds a set of character tables for use by PCRE2 and returns
a pointer to them. They are build using the ctype functions, and consequently
their contents will depend upon the current locale setting. When compiled as
part of the library, the store is obtained via PUBL(malloc)(), but when
compiled inside dftables, use malloc().
part of the library, the store is obtained via a general context malloc, if
supplied, but when DFTABLES is defined (when compiling the dftables auxiliary
program) malloc() is used, and the function has a different name so as not to
clash with the prototype in pcre2.h.
Arguments: none
Arguments: none when DFTABLES is defined
else a PCRE2 general context or NULL
Returns: pointer to the contiguous block of data
*/
#if defined COMPILE_PCRE8
const unsigned char *
pcre_maketables(void)
#elif defined COMPILE_PCRE16
const unsigned char *
pcre16_maketables(void)
#elif defined COMPILE_PCRE32
const unsigned char *
pcre32_maketables(void)
#endif
#ifdef DFTABLES /* Included in freestanding dftables.c program */
static const uint8_t *maketables(void)
{
unsigned char *yield, *p;
int i;
uint8_t *yield = (uint8_t *)malloc(tables_length);
#ifndef DFTABLES
yield = (unsigned char*)(PUBL(malloc))(tables_length);
#else
yield = (unsigned char*)malloc(tables_length);
#endif
#else /* Not DFTABLES, compiling the library */
PCRE2_EXP_DEFN const uint8_t * PCRE2_CALL_CONVENTION
pcre2_maketables(pcre2_general_context *gcontext)
{
uint8_t *yield = (uint8_t *)((gcontext != NULL)?
gcontext->memctl.malloc(tables_length, gcontext->memctl.memory_data) :
malloc(tables_length));
#endif /* DFTABLES */
int i;
uint8_t *p;
if (yield == NULL) return NULL;
p = yield;
@ -153,4 +154,4 @@ for (i = 0; i < 256; i++)
return yield;
}
/* End of pcre_maketables.c */
/* End of pcre2_maketables.c */

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,147 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016-2017 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "pcre2_internal.h"
/*************************************************
* Create a match data block given ovector size *
*************************************************/
/* A minimum of 1 is imposed on the number of ovector pairs. */
PCRE2_EXP_DEFN pcre2_match_data * PCRE2_CALL_CONVENTION
pcre2_match_data_create(uint32_t oveccount, pcre2_general_context *gcontext)
{
pcre2_match_data *yield;
if (oveccount < 1) oveccount = 1;
yield = PRIV(memctl_malloc)(
offsetof(pcre2_match_data, ovector) + 2*oveccount*sizeof(PCRE2_SIZE),
(pcre2_memctl *)gcontext);
if (yield == NULL) return NULL;
yield->oveccount = oveccount;
return yield;
}
/*************************************************
* Create a match data block using pattern data *
*************************************************/
/* If no context is supplied, use the memory allocator from the code. */
PCRE2_EXP_DEFN pcre2_match_data * PCRE2_CALL_CONVENTION
pcre2_match_data_create_from_pattern(const pcre2_code *code,
pcre2_general_context *gcontext)
{
if (gcontext == NULL) gcontext = (pcre2_general_context *)code;
return pcre2_match_data_create(((pcre2_real_code *)code)->top_bracket + 1,
gcontext);
}
/*************************************************
* Free a match data block *
*************************************************/
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
pcre2_match_data_free(pcre2_match_data *match_data)
{
if (match_data != NULL)
match_data->memctl.free(match_data, match_data->memctl.memory_data);
}
/*************************************************
* Get last mark in match *
*************************************************/
PCRE2_EXP_DEFN PCRE2_SPTR PCRE2_CALL_CONVENTION
pcre2_get_mark(pcre2_match_data *match_data)
{
return match_data->mark;
}
/*************************************************
* Get pointer to ovector *
*************************************************/
PCRE2_EXP_DEFN PCRE2_SIZE * PCRE2_CALL_CONVENTION
pcre2_get_ovector_pointer(pcre2_match_data *match_data)
{
return match_data->ovector;
}
/*************************************************
* Get number of ovector slots *
*************************************************/
PCRE2_EXP_DEFN uint32_t PCRE2_CALL_CONVENTION
pcre2_get_ovector_count(pcre2_match_data *match_data)
{
return match_data->oveccount;
}
/*************************************************
* Get starting code unit in match *
*************************************************/
PCRE2_EXP_DEFN PCRE2_SIZE PCRE2_CALL_CONVENTION
pcre2_get_startchar(pcre2_match_data *match_data)
{
return match_data->startchar;
}
/* End of pcre2_match_data.c */

View file

@ -6,7 +6,8 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2012 University of Cambridge
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@ -41,7 +42,7 @@ POSSIBILITY OF SUCH DAMAGE.
/* This module contains internal functions for testing newlines when more than
one kind of newline is to be recognized. When a newline is found, its length is
returned. In principle, we could implement several newline "types", each
referring to a different set of newline characters. At present, PCRE supports
referring to a different set of newline characters. At present, PCRE2 supports
only NLTYPE_FIXED, which gets handled without these functions, NLTYPE_ANYCRLF,
and NLTYPE_ANY. The full list of Unicode newline characters is taken from
http://unicode.org/unicode/reports/tr18/. */
@ -51,7 +52,7 @@ http://unicode.org/unicode/reports/tr18/. */
#include "config.h"
#endif
#include "pcre_internal.h"
#include "pcre2_internal.h"
@ -59,8 +60,10 @@ http://unicode.org/unicode/reports/tr18/. */
* Check for newline at given position *
*************************************************/
/* It is guaranteed that the initial value of ptr is less than the end of the
string that is being processed.
/* This function is called only via the IS_NEWLINE macro, which does so only
when the newline type is NLTYPE_ANY or NLTYPE_ANYCRLF. The case of a fixed
newline (NLTYPE_FIXED) is handled inline. It is guaranteed that the code unit
pointed to by ptr is less than the end of the string.
Arguments:
ptr pointer to possible newline
@ -73,28 +76,30 @@ Returns: TRUE or FALSE
*/
BOOL
PRIV(is_newline)(PCRE_PUCHAR ptr, int type, PCRE_PUCHAR endptr, int *lenptr,
BOOL utf)
PRIV(is_newline)(PCRE2_SPTR ptr, uint32_t type, PCRE2_SPTR endptr,
uint32_t *lenptr, BOOL utf)
{
pcre_uint32 c;
(void)utf;
#ifdef SUPPORT_UTF
if (utf)
{
GETCHAR(c, ptr);
}
else
#endif /* SUPPORT_UTF */
c = *ptr;
uint32_t c;
/* Note that this function is called only for ANY or ANYCRLF. */
#ifdef SUPPORT_UNICODE
if (utf) { GETCHAR(c, ptr); } else c = *ptr;
#else
(void)utf;
c = *ptr;
#endif /* SUPPORT_UNICODE */
if (type == NLTYPE_ANYCRLF) switch(c)
{
case CHAR_LF: *lenptr = 1; return TRUE;
case CHAR_CR: *lenptr = (ptr < endptr - 1 && ptr[1] == CHAR_LF)? 2 : 1;
return TRUE;
default: return FALSE;
case CHAR_LF:
*lenptr = 1;
return TRUE;
case CHAR_CR:
*lenptr = (ptr < endptr - 1 && ptr[1] == CHAR_LF)? 2 : 1;
return TRUE;
default:
return FALSE;
}
/* NLTYPE_ANY */
@ -106,25 +111,36 @@ else switch(c)
#endif
case CHAR_LF:
case CHAR_VT:
case CHAR_FF: *lenptr = 1; return TRUE;
case CHAR_FF:
*lenptr = 1;
return TRUE;
case CHAR_CR:
*lenptr = (ptr < endptr - 1 && ptr[1] == CHAR_LF)? 2 : 1;
return TRUE;
#ifndef EBCDIC
#ifdef COMPILE_PCRE8
case CHAR_NEL: *lenptr = utf? 2 : 1; return TRUE;
case 0x2028: /* LS */
case 0x2029: *lenptr = 3; return TRUE; /* PS */
#else /* COMPILE_PCRE16 || COMPILE_PCRE32 */
#if PCRE2_CODE_UNIT_WIDTH == 8
case CHAR_NEL:
case 0x2028: /* LS */
case 0x2029: *lenptr = 1; return TRUE; /* PS */
#endif /* COMPILE_PCRE8 */
#endif /* Not EBCDIC */
*lenptr = utf? 2 : 1;
return TRUE;
default: return FALSE;
case 0x2028: /* LS */
case 0x2029: /* PS */
*lenptr = 3;
return TRUE;
#else /* 16-bit or 32-bit code units */
case CHAR_NEL:
case 0x2028: /* LS */
case 0x2029: /* PS */
*lenptr = 1;
return TRUE;
#endif
#endif /* Not EBCDIC */
default:
return FALSE;
}
}
@ -134,8 +150,10 @@ else switch(c)
* Check for newline at previous position *
*************************************************/
/* It is guaranteed that the initial value of ptr is greater than the start of
the string that is being processed.
/* This function is called only via the WAS_NEWLINE macro, which does so only
when the newline type is NLTYPE_ANY or NLTYPE_ANYCRLF. The case of a fixed
newline (NLTYPE_FIXED) is handled inline. It is guaranteed that the initial
value of ptr is greater than the start of the string that is being processed.
Arguments:
ptr pointer to possible newline
@ -148,23 +166,23 @@ Returns: TRUE or FALSE
*/
BOOL
PRIV(was_newline)(PCRE_PUCHAR ptr, int type, PCRE_PUCHAR startptr, int *lenptr,
BOOL utf)
PRIV(was_newline)(PCRE2_SPTR ptr, uint32_t type, PCRE2_SPTR startptr,
uint32_t *lenptr, BOOL utf)
{
pcre_uint32 c;
(void)utf;
uint32_t c;
ptr--;
#ifdef SUPPORT_UTF
#ifdef SUPPORT_UNICODE
if (utf)
{
BACKCHAR(ptr);
GETCHAR(c, ptr);
}
else
#endif /* SUPPORT_UTF */
c = *ptr;
/* Note that this function is called only for ANY or ANYCRLF. */
else c = *ptr;
#else
(void)utf;
c = *ptr;
#endif /* SUPPORT_UNICODE */
if (type == NLTYPE_ANYCRLF) switch(c)
{
@ -172,8 +190,12 @@ if (type == NLTYPE_ANYCRLF) switch(c)
*lenptr = (ptr > startptr && ptr[-1] == CHAR_CR)? 2 : 1;
return TRUE;
case CHAR_CR: *lenptr = 1; return TRUE;
default: return FALSE;
case CHAR_CR:
*lenptr = 1;
return TRUE;
default:
return FALSE;
}
/* NLTYPE_ANY */
@ -189,22 +211,33 @@ else switch(c)
#endif
case CHAR_VT:
case CHAR_FF:
case CHAR_CR: *lenptr = 1; return TRUE;
case CHAR_CR:
*lenptr = 1;
return TRUE;
#ifndef EBCDIC
#ifdef COMPILE_PCRE8
case CHAR_NEL: *lenptr = utf? 2 : 1; return TRUE;
case 0x2028: /* LS */
case 0x2029: *lenptr = 3; return TRUE; /* PS */
#else /* COMPILE_PCRE16 || COMPILE_PCRE32 */
#if PCRE2_CODE_UNIT_WIDTH == 8
case CHAR_NEL:
case 0x2028: /* LS */
case 0x2029: *lenptr = 1; return TRUE; /* PS */
#endif /* COMPILE_PCRE8 */
#endif /* NotEBCDIC */
*lenptr = utf? 2 : 1;
return TRUE;
default: return FALSE;
case 0x2028: /* LS */
case 0x2029: /* PS */
*lenptr = 3;
return TRUE;
#else /* 16-bit or 32-bit code units */
case CHAR_NEL:
case 0x2028: /* LS */
case 0x2029: /* PS */
*lenptr = 1;
return TRUE;
#endif
#endif /* Not EBCDIC */
default:
return FALSE;
}
}
/* End of pcre_newline.c */
/* End of pcre2_newline.c */

View file

@ -6,7 +6,8 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2012 University of Cambridge
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@ -38,39 +39,51 @@ POSSIBILITY OF SUCH DAMAGE.
*/
/* This file contains a private PCRE function that converts an ordinal
character value into a UTF8 string. */
/* This file contains a function that converts a Unicode character code point
into a UTF string. The behaviour is different for each code unit width. */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#define COMPILE_PCRE8
#include "pcre2_internal.h"
/* If SUPPORT_UNICODE is not defined, this function will never be called.
Supply a dummy function because some compilers do not like empty source
modules. */
#ifndef SUPPORT_UNICODE
unsigned int
PRIV(ord2utf)(uint32_t cvalue, PCRE2_UCHAR *buffer)
{
(void)(cvalue);
(void)(buffer);
return 0;
}
#else /* SUPPORT_UNICODE */
#include "pcre_internal.h"
/*************************************************
* Convert character value to UTF-8 *
* Convert code point to UTF *
*************************************************/
/* This function takes an integer value in the range 0 - 0x10ffff
and encodes it as a UTF-8 character in 1 to 4 pcre_uchars.
/*
Arguments:
cvalue the character value
buffer pointer to buffer for result - at least 6 pcre_uchars long
buffer pointer to buffer for result
Returns: number of characters placed in the buffer
Returns: number of code units placed in the buffer
*/
unsigned
int
PRIV(ord2utf)(pcre_uint32 cvalue, pcre_uchar *buffer)
unsigned int
PRIV(ord2utf)(uint32_t cvalue, PCRE2_UCHAR *buffer)
{
#ifdef SUPPORT_UTF
register int i, j;
/* Convert to UTF-8 */
#if PCRE2_CODE_UNIT_WIDTH == 8
int i, j;
for (i = 0; i < PRIV(utf8_table1_size); i++)
if ((int)cvalue <= PRIV(utf8_table1)[i]) break;
buffer += i;
@ -82,13 +95,26 @@ for (j = i; j > 0; j--)
*buffer = PRIV(utf8_table2)[i] | cvalue;
return i + 1;
/* Convert to UTF-16 */
#elif PCRE2_CODE_UNIT_WIDTH == 16
if (cvalue <= 0xffff)
{
*buffer = (PCRE2_UCHAR)cvalue;
return 1;
}
cvalue -= 0x10000;
*buffer++ = 0xd800 | (cvalue >> 10);
*buffer = 0xdc00 | (cvalue & 0x3ff);
return 2;
/* Convert to UTF-32 */
#else
(void)(cvalue); /* Keep compiler happy; this function won't ever be */
(void)(buffer); /* called when SUPPORT_UTF is not defined. */
return 0;
*buffer = (PCRE2_UCHAR)cvalue;
return 1;
#endif
}
#endif /* SUPPORT_UNICODE */
/* End of pcre_ord2utf8.c */
/* End of pcre_ord2utf.c */

View file

@ -0,0 +1,426 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016-2017 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "pcre2_internal.h"
/*************************************************
* Return info about compiled pattern *
*************************************************/
/*
Arguments:
code points to compiled code
what what information is required
where where to put the information; if NULL, return length
Returns: 0 when data returned
> 0 when length requested
< 0 on error or unset value
*/
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_pattern_info(const pcre2_code *code, uint32_t what, void *where)
{
const pcre2_real_code *re = (pcre2_real_code *)code;
if (where == NULL) /* Requests field length */
{
switch(what)
{
case PCRE2_INFO_ALLOPTIONS:
case PCRE2_INFO_ARGOPTIONS:
case PCRE2_INFO_BACKREFMAX:
case PCRE2_INFO_BSR:
case PCRE2_INFO_CAPTURECOUNT:
case PCRE2_INFO_DEPTHLIMIT:
case PCRE2_INFO_FIRSTCODETYPE:
case PCRE2_INFO_FIRSTCODEUNIT:
case PCRE2_INFO_HASBACKSLASHC:
case PCRE2_INFO_HASCRORLF:
case PCRE2_INFO_HEAPLIMIT:
case PCRE2_INFO_JCHANGED:
case PCRE2_INFO_LASTCODETYPE:
case PCRE2_INFO_LASTCODEUNIT:
case PCRE2_INFO_MATCHEMPTY:
case PCRE2_INFO_MATCHLIMIT:
case PCRE2_INFO_MAXLOOKBEHIND:
case PCRE2_INFO_MINLENGTH:
case PCRE2_INFO_NAMEENTRYSIZE:
case PCRE2_INFO_NAMECOUNT:
case PCRE2_INFO_NEWLINE:
return sizeof(uint32_t);
case PCRE2_INFO_FIRSTBITMAP:
return sizeof(const uint8_t *);
case PCRE2_INFO_JITSIZE:
case PCRE2_INFO_SIZE:
case PCRE2_INFO_FRAMESIZE:
return sizeof(size_t);
case PCRE2_INFO_NAMETABLE:
return sizeof(PCRE2_SPTR);
}
}
if (re == NULL) return PCRE2_ERROR_NULL;
/* Check that the first field in the block is the magic number. If it is not,
return with PCRE2_ERROR_BADMAGIC. */
if (re->magic_number != MAGIC_NUMBER) return PCRE2_ERROR_BADMAGIC;
/* Check that this pattern was compiled in the correct bit mode */
if ((re->flags & (PCRE2_CODE_UNIT_WIDTH/8)) == 0) return PCRE2_ERROR_BADMODE;
switch(what)
{
case PCRE2_INFO_ALLOPTIONS:
*((uint32_t *)where) = re->overall_options;
break;
case PCRE2_INFO_ARGOPTIONS:
*((uint32_t *)where) = re->compile_options;
break;
case PCRE2_INFO_BACKREFMAX:
*((uint32_t *)where) = re->top_backref;
break;
case PCRE2_INFO_BSR:
*((uint32_t *)where) = re->bsr_convention;
break;
case PCRE2_INFO_CAPTURECOUNT:
*((uint32_t *)where) = re->top_bracket;
break;
case PCRE2_INFO_DEPTHLIMIT:
*((uint32_t *)where) = re->limit_depth;
if (re->limit_depth == UINT32_MAX) return PCRE2_ERROR_UNSET;
break;
case PCRE2_INFO_FIRSTCODETYPE:
*((uint32_t *)where) = ((re->flags & PCRE2_FIRSTSET) != 0)? 1 :
((re->flags & PCRE2_STARTLINE) != 0)? 2 : 0;
break;
case PCRE2_INFO_FIRSTCODEUNIT:
*((uint32_t *)where) = ((re->flags & PCRE2_FIRSTSET) != 0)?
re->first_codeunit : 0;
break;
case PCRE2_INFO_FIRSTBITMAP:
*((const uint8_t **)where) = ((re->flags & PCRE2_FIRSTMAPSET) != 0)?
&(re->start_bitmap[0]) : NULL;
break;
case PCRE2_INFO_FRAMESIZE:
*((size_t *)where) = offsetof(heapframe, ovector) +
re->top_bracket * 2 * sizeof(PCRE2_SIZE);
break;
case PCRE2_INFO_HASBACKSLASHC:
*((uint32_t *)where) = (re->flags & PCRE2_HASBKC) != 0;
break;
case PCRE2_INFO_HASCRORLF:
*((uint32_t *)where) = (re->flags & PCRE2_HASCRORLF) != 0;
break;
case PCRE2_INFO_HEAPLIMIT:
*((uint32_t *)where) = re->limit_heap;
if (re->limit_heap == UINT32_MAX) return PCRE2_ERROR_UNSET;
break;
case PCRE2_INFO_JCHANGED:
*((uint32_t *)where) = (re->flags & PCRE2_JCHANGED) != 0;
break;
case PCRE2_INFO_JITSIZE:
#ifdef SUPPORT_JIT
*((size_t *)where) = (re->executable_jit != NULL)?
PRIV(jit_get_size)(re->executable_jit) : 0;
#else
*((size_t *)where) = 0;
#endif
break;
case PCRE2_INFO_LASTCODETYPE:
*((uint32_t *)where) = ((re->flags & PCRE2_LASTSET) != 0)? 1 : 0;
break;
case PCRE2_INFO_LASTCODEUNIT:
*((uint32_t *)where) = ((re->flags & PCRE2_LASTSET) != 0)?
re->last_codeunit : 0;
break;
case PCRE2_INFO_MATCHEMPTY:
*((uint32_t *)where) = (re->flags & PCRE2_MATCH_EMPTY) != 0;
break;
case PCRE2_INFO_MATCHLIMIT:
*((uint32_t *)where) = re->limit_match;
if (re->limit_match == UINT32_MAX) return PCRE2_ERROR_UNSET;
break;
case PCRE2_INFO_MAXLOOKBEHIND:
*((uint32_t *)where) = re->max_lookbehind;
break;
case PCRE2_INFO_MINLENGTH:
*((uint32_t *)where) = re->minlength;
break;
case PCRE2_INFO_NAMEENTRYSIZE:
*((uint32_t *)where) = re->name_entry_size;
break;
case PCRE2_INFO_NAMECOUNT:
*((uint32_t *)where) = re->name_count;
break;
case PCRE2_INFO_NAMETABLE:
*((PCRE2_SPTR *)where) = (PCRE2_SPTR)((char *)re + sizeof(pcre2_real_code));
break;
case PCRE2_INFO_NEWLINE:
*((uint32_t *)where) = re->newline_convention;
break;
case PCRE2_INFO_SIZE:
*((size_t *)where) = re->blocksize;
break;
default: return PCRE2_ERROR_BADOPTION;
}
return 0;
}
/*************************************************
* Callout enumerator *
*************************************************/
/*
Arguments:
code points to compiled code
callback function called for each callout block
callout_data user data passed to the callback
Returns: 0 when successfully completed
< 0 on local error
!= 0 for callback error
*/
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_callout_enumerate(const pcre2_code *code,
int (*callback)(pcre2_callout_enumerate_block *, void *), void *callout_data)
{
pcre2_real_code *re = (pcre2_real_code *)code;
pcre2_callout_enumerate_block cb;
PCRE2_SPTR cc;
#ifdef SUPPORT_UNICODE
BOOL utf;
#endif
if (re == NULL) return PCRE2_ERROR_NULL;
#ifdef SUPPORT_UNICODE
utf = (re->overall_options & PCRE2_UTF) != 0;
#endif
/* Check that the first field in the block is the magic number. If it is not,
return with PCRE2_ERROR_BADMAGIC. */
if (re->magic_number != MAGIC_NUMBER) return PCRE2_ERROR_BADMAGIC;
/* Check that this pattern was compiled in the correct bit mode */
if ((re->flags & (PCRE2_CODE_UNIT_WIDTH/8)) == 0) return PCRE2_ERROR_BADMODE;
cb.version = 0;
cc = (PCRE2_SPTR)((uint8_t *)re + sizeof(pcre2_real_code))
+ re->name_count * re->name_entry_size;
while (TRUE)
{
int rc;
switch (*cc)
{
case OP_END:
return 0;
case OP_CHAR:
case OP_CHARI:
case OP_NOT:
case OP_NOTI:
case OP_STAR:
case OP_MINSTAR:
case OP_PLUS:
case OP_MINPLUS:
case OP_QUERY:
case OP_MINQUERY:
case OP_UPTO:
case OP_MINUPTO:
case OP_EXACT:
case OP_POSSTAR:
case OP_POSPLUS:
case OP_POSQUERY:
case OP_POSUPTO:
case OP_STARI:
case OP_MINSTARI:
case OP_PLUSI:
case OP_MINPLUSI:
case OP_QUERYI:
case OP_MINQUERYI:
case OP_UPTOI:
case OP_MINUPTOI:
case OP_EXACTI:
case OP_POSSTARI:
case OP_POSPLUSI:
case OP_POSQUERYI:
case OP_POSUPTOI:
case OP_NOTSTAR:
case OP_NOTMINSTAR:
case OP_NOTPLUS:
case OP_NOTMINPLUS:
case OP_NOTQUERY:
case OP_NOTMINQUERY:
case OP_NOTUPTO:
case OP_NOTMINUPTO:
case OP_NOTEXACT:
case OP_NOTPOSSTAR:
case OP_NOTPOSPLUS:
case OP_NOTPOSQUERY:
case OP_NOTPOSUPTO:
case OP_NOTSTARI:
case OP_NOTMINSTARI:
case OP_NOTPLUSI:
case OP_NOTMINPLUSI:
case OP_NOTQUERYI:
case OP_NOTMINQUERYI:
case OP_NOTUPTOI:
case OP_NOTMINUPTOI:
case OP_NOTEXACTI:
case OP_NOTPOSSTARI:
case OP_NOTPOSPLUSI:
case OP_NOTPOSQUERYI:
case OP_NOTPOSUPTOI:
cc += PRIV(OP_lengths)[*cc];
#ifdef SUPPORT_UNICODE
if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
#endif
break;
case OP_TYPESTAR:
case OP_TYPEMINSTAR:
case OP_TYPEPLUS:
case OP_TYPEMINPLUS:
case OP_TYPEQUERY:
case OP_TYPEMINQUERY:
case OP_TYPEUPTO:
case OP_TYPEMINUPTO:
case OP_TYPEEXACT:
case OP_TYPEPOSSTAR:
case OP_TYPEPOSPLUS:
case OP_TYPEPOSQUERY:
case OP_TYPEPOSUPTO:
cc += PRIV(OP_lengths)[*cc];
#ifdef SUPPORT_UNICODE
if (cc[-1] == OP_PROP || cc[-1] == OP_NOTPROP) cc += 2;
#endif
break;
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
case OP_XCLASS:
cc += GET(cc, 1);
break;
#endif
case OP_MARK:
case OP_PRUNE_ARG:
case OP_SKIP_ARG:
case OP_THEN_ARG:
cc += PRIV(OP_lengths)[*cc] + cc[1];
break;
case OP_CALLOUT:
cb.pattern_position = GET(cc, 1);
cb.next_item_length = GET(cc, 1 + LINK_SIZE);
cb.callout_number = cc[1 + 2*LINK_SIZE];
cb.callout_string_offset = 0;
cb.callout_string_length = 0;
cb.callout_string = NULL;
rc = callback(&cb, callout_data);
if (rc != 0) return rc;
cc += PRIV(OP_lengths)[*cc];
break;
case OP_CALLOUT_STR:
cb.pattern_position = GET(cc, 1);
cb.next_item_length = GET(cc, 1 + LINK_SIZE);
cb.callout_number = 0;
cb.callout_string_offset = GET(cc, 1 + 3*LINK_SIZE);
cb.callout_string_length =
GET(cc, 1 + 2*LINK_SIZE) - (1 + 4*LINK_SIZE) - 2;
cb.callout_string = cc + (1 + 4*LINK_SIZE) + 1;
rc = callback(&cb, callout_data);
if (rc != 0) return rc;
cc += GET(cc, 1 + 2*LINK_SIZE);
break;
default:
cc += PRIV(OP_lengths)[*cc];
break;
}
}
}
/* End of pcre2_pattern_info.c */

View file

@ -6,7 +6,8 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2012 University of Cambridge
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016-2017 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@ -40,104 +41,102 @@ POSSIBILITY OF SUCH DAMAGE.
/* This module contains a PCRE private debugging function for printing out the
internal form of a compiled regular expression, along with some supporting
local functions. This source file is used in two places:
local functions. This source file is #included in pcre2test.c at each supported
code unit width, with PCRE2_SUFFIX set appropriately, just like the functions
that comprise the library. It can also optionally be included in
pcre2_compile.c for detailed debugging in error situations. */
(1) It is #included by pcre_compile.c when it is compiled in debugging mode
(PCRE_DEBUG defined in pcre_internal.h). It is not included in production
compiles. In this case PCRE_INCLUDED is defined.
(2) It is also compiled separately and linked with pcretest.c, which can be
asked to print out a compiled regex for debugging purposes. */
/* Tables of operator names. The same 8-bit table is used for all code unit
widths, so it must be defined only once. The list itself is defined in
pcre2_internal.h, which is #included by pcre2test before this file. */
#ifndef PCRE_INCLUDED
#ifdef HAVE_CONFIG_H
#include "config.h"
#ifndef OP_LISTS_DEFINED
static const char *OP_names[] = { OP_NAME_LIST };
#define OP_LISTS_DEFINED
#endif
/* For pcretest program. */
#define PRIV(name) name
/* The functions and tables herein must all have mode-dependent names. */
/* We have to include pcre_internal.h because we need the internal info for
displaying the results of pcre_study() and we also need to know about the
internal macros, structures, and other internal data values; pcretest has
"inside information" compared to a program that strictly follows the PCRE API.
#define OP_lengths PCRE2_SUFFIX(OP_lengths_)
#define get_ucpname PCRE2_SUFFIX(get_ucpname_)
#define pcre2_printint PCRE2_SUFFIX(pcre2_printint_)
#define print_char PCRE2_SUFFIX(print_char_)
#define print_custring PCRE2_SUFFIX(print_custring_)
#define print_custring_bylen PCRE2_SUFFIX(print_custring_bylen_)
#define print_prop PCRE2_SUFFIX(print_prop_)
Although pcre_internal.h does itself include pcre.h, we explicitly include it
here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
appropriately for an application, not for building PCRE. */
/* Table of sizes for the fixed-length opcodes. It's defined in a macro so that
the definition is next to the definition of the opcodes in pcre2_internal.h.
The contents of the table are, however, mode-dependent. */
#include "pcre.h"
#include "pcre_internal.h"
/* These are the funtions that are contained within. It doesn't seem worth
having a separate .h file just for this. */
#endif /* PCRE_INCLUDED */
#ifdef PCRE_INCLUDED
static /* Keep the following function as private. */
#endif
#if defined COMPILE_PCRE8
void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
#elif defined COMPILE_PCRE16
void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
#elif defined COMPILE_PCRE32
void pcre32_printint(pcre *external_re, FILE *f, BOOL print_lengths);
#endif
/* Macro that decides whether a character should be output as a literal or in
hexadecimal. We don't use isprint() because that can vary from system to system
(even without the use of locales) and we want the output always to be the same,
for testing purposes. */
#ifdef EBCDIC
#define PRINTABLE(c) ((c) >= 64 && (c) < 255)
#else
#define PRINTABLE(c) ((c) >= 32 && (c) < 127)
#endif
/* The table of operator names. */
static const char *priv_OP_names[] = { OP_NAME_LIST };
/* This table of operator lengths is not actually used by the working code,
but its size is needed for a check that ensures it is the correct size for the
number of opcodes (thus catching update omissions). */
static const pcre_uint8 priv_OP_lengths[] = { OP_LENGTHS };
static const uint8_t OP_lengths[] = { OP_LENGTHS };
/*************************************************
* Print single- or multi-byte character *
* Print one character from a string *
*************************************************/
/* In UTF mode the character may occupy more than one code unit.
Arguments:
f file to write to
ptr pointer to first code unit of the character
utf TRUE if string is UTF (will be FALSE if UTF is not supported)
Returns: number of additional code units used
*/
static unsigned int
print_char(FILE *f, pcre_uchar *ptr, BOOL utf)
print_char(FILE *f, PCRE2_SPTR ptr, BOOL utf)
{
pcre_uint32 c = *ptr;
uint32_t c = *ptr;
BOOL one_code_unit = !utf;
#ifndef SUPPORT_UTF
(void)utf; /* Avoid compiler warning */
if (PRINTABLE(c)) fprintf(f, "%c", (char)c);
else if (c <= 0x80) fprintf(f, "\\x%02x", c);
else fprintf(f, "\\x{%x}", c);
return 0;
/* If UTF is supported and requested, check for a valid single code unit. */
#ifdef SUPPORT_UNICODE
if (utf)
{
#if PCRE2_CODE_UNIT_WIDTH == 8
one_code_unit = c < 0x80;
#elif PCRE2_CODE_UNIT_WIDTH == 16
one_code_unit = (c & 0xfc00) != 0xd800;
#else
one_code_unit = (c & 0xfffff800u) != 0xd800u;
#endif /* CODE_UNIT_WIDTH */
}
#endif /* SUPPORT_UNICODE */
#if defined COMPILE_PCRE8
/* Handle a valid one-code-unit character at any width. */
if (!utf || (c & 0xc0) != 0xc0)
if (one_code_unit)
{
if (PRINTABLE(c)) fprintf(f, "%c", (char)c);
else if (c < 0x80) fprintf(f, "\\x%02x", c);
else fprintf(f, "\\x{%02x}", c);
return 0;
}
/* Code for invalid UTF code units and multi-unit UTF characters is different
for each width. If UTF is not supported, control should never get here, but we
need a return statement to keep the compiler happy. */
#ifndef SUPPORT_UNICODE
return 0;
#else
/* Malformed UTF-8 should occur only if the sanity check has been turned off.
Rather than swallow random bytes, just stop if we hit a bad one. Print it with
\X instead of \x as an indication. */
#if PCRE2_CODE_UNIT_WIDTH == 8
if ((c & 0xc0) != 0xc0)
{
fprintf(f, "\\X{%x}", c); /* Invalid starting byte */
return 0;
}
else
{
int i;
@ -146,198 +145,181 @@ else
c = (c & PRIV(utf8_table3)[a]) << s;
for (i = 1; i <= a; i++)
{
/* This is a check for malformed UTF-8; it should only occur if the sanity
check has been turned off. Rather than swallow random bytes, just stop if
we hit a bad one. Print it with \X instead of \x as an indication. */
if ((ptr[i] & 0xc0) != 0x80)
{
fprintf(f, "\\X{%x}", c);
fprintf(f, "\\X{%x}", c); /* Invalid secondary byte */
return i - 1;
}
/* The byte is OK */
s -= 6;
c |= (ptr[i] & 0x3f) << s;
}
fprintf(f, "\\x{%x}", c);
return a;
}
}
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
#elif defined COMPILE_PCRE16
/* UTF-16: rather than swallow a low surrogate, just stop if we hit a bad one.
Print it with \X instead of \x as an indication. */
if (!utf || (c & 0xfc00) != 0xd800)
#if PCRE2_CODE_UNIT_WIDTH == 16
if ((ptr[1] & 0xfc00) != 0xdc00)
{
if (PRINTABLE(c)) fprintf(f, "%c", (char)c);
else if (c <= 0x80) fprintf(f, "\\x%02x", c);
else fprintf(f, "\\x{%02x}", c);
return 0;
}
else
{
/* This is a check for malformed UTF-16; it should only occur if the sanity
check has been turned off. Rather than swallow a low surrogate, just stop if
we hit a bad one. Print it with \X instead of \x as an indication. */
if ((ptr[1] & 0xfc00) != 0xdc00)
{
fprintf(f, "\\X{%x}", c);
return 0;
}
c = (((c & 0x3ff) << 10) | (ptr[1] & 0x3ff)) + 0x10000;
fprintf(f, "\\x{%x}", c);
return 1;
}
#elif defined COMPILE_PCRE32
if (!utf || (c & 0xfffff800u) != 0xd800u)
{
if (PRINTABLE(c)) fprintf(f, "%c", (char)c);
else if (c <= 0x80) fprintf(f, "\\x%02x", c);
else fprintf(f, "\\x{%x}", c);
return 0;
}
else
{
/* This is a check for malformed UTF-32; it should only occur if the sanity
check has been turned off. Rather than swallow a surrogate, just stop if
we hit one. Print it with \X instead of \x as an indication. */
fprintf(f, "\\X{%x}", c);
return 0;
}
c = (((c & 0x3ff) << 10) | (ptr[1] & 0x3ff)) + 0x10000;
fprintf(f, "\\x{%x}", c);
return 1;
#endif /* PCRE2_CODE_UNIT_WIDTH == 16 */
#endif /* COMPILE_PCRE[8|16|32] */
/* For UTF-32 we get here only for a malformed code unit, which should only
occur if the sanity check has been turned off. Print it with \X instead of \x
as an indication. */
#endif /* SUPPORT_UTF */
#if PCRE2_CODE_UNIT_WIDTH == 32
fprintf(f, "\\X{%x}", c);
return 0;
#endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
#endif /* SUPPORT_UNICODE */
}
/*************************************************
* Print uchar string (regardless of utf) *
* Print string as a list of code units *
*************************************************/
/* These take no account of UTF as they always print each individual code unit.
The string is zero-terminated for print_custring(); the length is given for
print_custring_bylen().
Arguments:
f file to write to
ptr point to the string
len length for print_custring_bylen()
Returns: nothing
*/
static void
print_puchar(FILE *f, PCRE_PUCHAR ptr)
print_custring(FILE *f, PCRE2_SPTR ptr)
{
while (*ptr != '\0')
{
register pcre_uint32 c = *ptr++;
uint32_t c = *ptr++;
if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x{%x}", c);
}
}
static void
print_custring_bylen(FILE *f, PCRE2_SPTR ptr, PCRE2_UCHAR len)
{
for (; len > 0; len--)
{
uint32_t c = *ptr++;
if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x{%x}", c);
}
}
/*************************************************
* Find Unicode property name *
*************************************************/
/* When there is no UTF/UCP support, the table of names does not exist. This
function should not be called in such configurations, because a pattern that
tries to use Unicode properties won't compile. Rather than put lots of #ifdefs
into the main code, however, we just put one into this function. */
static const char *
get_ucpname(unsigned int ptype, unsigned int pvalue)
{
#ifdef SUPPORT_UCP
#ifdef SUPPORT_UNICODE
int i;
for (i = PRIV(utt_size) - 1; i >= 0; i--)
{
if (ptype == PRIV(utt)[i].type && pvalue == PRIV(utt)[i].value) break;
}
return (i >= 0)? PRIV(utt_names) + PRIV(utt)[i].name_offset : "??";
#else
/* It gets harder and harder to shut off unwanted compiler warnings. */
ptype = ptype * pvalue;
return (ptype == pvalue)? "??" : "??";
#endif
#else /* No UTF support */
(void)ptype;
(void)pvalue;
return "??";
#endif /* SUPPORT_UNICODE */
}
/*************************************************
* Print Unicode property value *
*************************************************/
/* "Normal" properties can be printed from tables. The PT_CLIST property is a
pseudo-property that contains a pointer to a list of case-equivalent
characters. This is used only when UCP support is available and UTF mode is
selected. It should never occur otherwise, but just in case it does, have
something ready to print. */
characters.
Arguments:
f file to write to
code pointer in the compiled code
before text to print before
after text to print after
Returns: nothing
*/
static void
print_prop(FILE *f, pcre_uchar *code, const char *before, const char *after)
print_prop(FILE *f, PCRE2_SPTR code, const char *before, const char *after)
{
if (code[1] != PT_CLIST)
{
fprintf(f, "%s%s %s%s", before, priv_OP_names[*code], get_ucpname(code[1],
fprintf(f, "%s%s %s%s", before, OP_names[*code], get_ucpname(code[1],
code[2]), after);
}
else
{
const char *not = (*code == OP_PROP)? "" : "not ";
#ifndef SUPPORT_UCP
fprintf(f, "%s%sclist %d%s", before, not, code[2], after);
#else
const pcre_uint32 *p = PRIV(ucd_caseless_sets) + code[2];
const uint32_t *p = PRIV(ucd_caseless_sets) + code[2];
fprintf (f, "%s%sclist", before, not);
while (*p < NOTACHAR) fprintf(f, " %04x", *p++);
fprintf(f, "%s", after);
#endif
}
}
/*************************************************
* Print compiled regex *
* Print compiled pattern *
*************************************************/
/* Make this function work for a regex with integers either byte order.
However, we assume that what we are passed is a compiled regex. The
print_lengths flag controls whether offsets and lengths of items are printed.
They can be turned off from pcretest so that automatic tests on bytecode can be
written that do not depend on the value of LINK_SIZE. */
/* The print_lengths flag controls whether offsets and lengths of items are
printed. Lenths can be turned off from pcre2test so that automatic tests on
bytecode can be written that do not depend on the value of LINK_SIZE.
#ifdef PCRE_INCLUDED
static /* Keep the following function as private. */
#endif
#if defined COMPILE_PCRE8
void
pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths)
#elif defined COMPILE_PCRE16
void
pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths)
#elif defined COMPILE_PCRE32
void
pcre32_printint(pcre *external_re, FILE *f, BOOL print_lengths)
#endif
Arguments:
re a compiled pattern
f the file to write to
print_lengths show various lengths
Returns: nothing
*/
static void
pcre2_printint(pcre2_code *re, FILE *f, BOOL print_lengths)
{
REAL_PCRE *re = (REAL_PCRE *)external_re;
pcre_uchar *codestart, *code;
BOOL utf;
PCRE2_SPTR codestart, nametable, code;
uint32_t nesize = re->name_entry_size;
BOOL utf = (re->overall_options & PCRE2_UTF) != 0;
unsigned int options = re->options;
int offset = re->name_table_offset;
int count = re->name_count;
int size = re->name_entry_size;
if (re->magic_number != MAGIC_NUMBER)
{
offset = ((offset << 8) & 0xff00) | ((offset >> 8) & 0xff);
count = ((count << 8) & 0xff00) | ((count >> 8) & 0xff);
size = ((size << 8) & 0xff00) | ((size >> 8) & 0xff);
options = ((options << 24) & 0xff000000) |
((options << 8) & 0x00ff0000) |
((options >> 8) & 0x0000ff00) |
((options >> 24) & 0x000000ff);
}
code = codestart = (pcre_uchar *)re + offset + count * size;
/* PCRE_UTF(16|32) have the same value as PCRE_UTF8. */
utf = (options & PCRE_UTF8) != 0;
nametable = (PCRE2_SPTR)((uint8_t *)re + sizeof(pcre2_real_code));
code = codestart = nametable + re->name_count * re->name_entry_size;
for(;;)
{
pcre_uchar *ccode;
PCRE2_SPTR ccode;
uint32_t c;
int i;
const char *flag = " ";
pcre_uint32 c;
unsigned int extra = 0;
if (print_lengths)
@ -356,13 +338,13 @@ for(;;)
case OP_TABLE_LENGTH:
case OP_TABLE_LENGTH +
((sizeof(priv_OP_names)/sizeof(const char *) == OP_TABLE_LENGTH) &&
(sizeof(priv_OP_lengths) == OP_TABLE_LENGTH)):
break;
((sizeof(OP_names)/sizeof(const char *) == OP_TABLE_LENGTH) &&
(sizeof(OP_lengths) == OP_TABLE_LENGTH)):
return;
/* ========================================================================== */
case OP_END:
fprintf(f, " %s\n", priv_OP_names[*code]);
fprintf(f, " %s\n", OP_names[*code]);
fprintf(f, "------------------------------------------------------------------\n");
return;
@ -394,7 +376,7 @@ for(;;)
case OP_SCBRAPOS:
if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
else fprintf(f, " ");
fprintf(f, "%s %d", priv_OP_names[*code], GET2(code, 1+LINK_SIZE));
fprintf(f, "%s %d", OP_names[*code], GET2(code, 1+LINK_SIZE));
break;
case OP_BRA:
@ -411,29 +393,27 @@ for(;;)
case OP_ASSERTBACK:
case OP_ASSERTBACK_NOT:
case OP_ONCE:
case OP_ONCE_NC:
case OP_COND:
case OP_SCOND:
case OP_REVERSE:
if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
else fprintf(f, " ");
fprintf(f, "%s", priv_OP_names[*code]);
fprintf(f, "%s", OP_names[*code]);
break;
case OP_CLOSE:
fprintf(f, " %s %d", priv_OP_names[*code], GET2(code, 1));
fprintf(f, " %s %d", OP_names[*code], GET2(code, 1));
break;
case OP_CREF:
fprintf(f, "%3d %s", GET2(code,1), priv_OP_names[*code]);
fprintf(f, "%3d %s", GET2(code,1), OP_names[*code]);
break;
case OP_DNCREF:
{
pcre_uchar *entry = (pcre_uchar *)re + offset + (GET2(code, 1) * size) +
IMM2_SIZE;
PCRE2_SPTR entry = nametable + (GET2(code, 1) * nesize) + IMM2_SIZE;
fprintf(f, " %s Cond ref <", flag);
print_puchar(f, entry);
print_custring(f, entry);
fprintf(f, ">%d", GET2(code, 1 + IMM2_SIZE));
}
break;
@ -448,16 +428,19 @@ for(;;)
case OP_DNRREF:
{
pcre_uchar *entry = (pcre_uchar *)re + offset + (GET2(code, 1) * size) +
IMM2_SIZE;
PCRE2_SPTR entry = nametable + (GET2(code, 1) * nesize) + IMM2_SIZE;
fprintf(f, " %s Cond recurse <", flag);
print_puchar(f, entry);
print_custring(f, entry);
fprintf(f, ">%d", GET2(code, 1 + IMM2_SIZE));
}
break;
case OP_DEF:
fprintf(f, " Cond def");
case OP_FALSE:
fprintf(f, " Cond false");
break;
case OP_TRUE:
fprintf(f, " Cond true");
break;
case OP_STARI:
@ -490,6 +473,7 @@ for(;;)
case OP_TYPEMINQUERY:
case OP_TYPEPOSQUERY:
fprintf(f, " %s ", flag);
if (*code >= OP_TYPESTAR)
{
if (code[1] == OP_PROP || code[1] == OP_NOTPROP)
@ -497,10 +481,10 @@ for(;;)
print_prop(f, code + 1, "", " ");
extra = 2;
}
else fprintf(f, "%s", priv_OP_names[code[1]]);
else fprintf(f, "%s", OP_names[code[1]]);
}
else extra = print_char(f, code+1, utf);
fprintf(f, "%s", priv_OP_names[*code]);
fprintf(f, "%s", OP_names[*code]);
break;
case OP_EXACTI:
@ -531,7 +515,7 @@ for(;;)
print_prop(f, code + IMM2_SIZE + 1, " ", " ");
extra = 2;
}
else fprintf(f, " %s", priv_OP_names[code[1 + IMM2_SIZE]]);
else fprintf(f, " %s", OP_names[code[1 + IMM2_SIZE]]);
fprintf(f, "{");
if (*code != OP_TYPEEXACT) fprintf(f, "0,");
fprintf(f, "%d}", GET2(code,1));
@ -571,7 +555,7 @@ for(;;)
case OP_NOTPOSQUERY:
fprintf(f, " %s [^", flag);
extra = print_char(f, code + 1, utf);
fprintf(f, "]%s", priv_OP_names[*code]);
fprintf(f, "]%s", OP_names[*code]);
break;
case OP_NOTEXACTI:
@ -598,7 +582,7 @@ for(;;)
case OP_RECURSE:
if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
else fprintf(f, " ");
fprintf(f, "%s", priv_OP_names[*code]);
fprintf(f, "%s", OP_names[*code]);
break;
case OP_REFI:
@ -606,7 +590,7 @@ for(;;)
/* Fall through */
case OP_REF:
fprintf(f, " %s \\%d", flag, GET2(code,1));
ccode = code + priv_OP_lengths[*code];
ccode = code + OP_lengths[*code];
goto CLASS_REF_REPEAT;
case OP_DNREFI:
@ -614,18 +598,32 @@ for(;;)
/* Fall through */
case OP_DNREF:
{
pcre_uchar *entry = (pcre_uchar *)re + offset + (GET2(code, 1) * size) +
IMM2_SIZE;
PCRE2_SPTR entry = nametable + (GET2(code, 1) * nesize) + IMM2_SIZE;
fprintf(f, " %s \\k<", flag);
print_puchar(f, entry);
print_custring(f, entry);
fprintf(f, ">%d", GET2(code, 1 + IMM2_SIZE));
}
ccode = code + priv_OP_lengths[*code];
ccode = code + OP_lengths[*code];
goto CLASS_REF_REPEAT;
case OP_CALLOUT:
fprintf(f, " %s %d %d %d", priv_OP_names[*code], code[1], GET(code,2),
GET(code, 2 + LINK_SIZE));
fprintf(f, " %s %d %d %d", OP_names[*code], code[1 + 2*LINK_SIZE],
GET(code, 1), GET(code, 1 + LINK_SIZE));
break;
case OP_CALLOUT_STR:
c = code[1 + 4*LINK_SIZE];
fprintf(f, " %s %c", OP_names[*code], c);
extra = GET(code, 1 + 2*LINK_SIZE);
print_custring_bylen(f, code + 2 + 4*LINK_SIZE, extra - 3 - 4*LINK_SIZE);
for (i = 0; PRIV(callout_start_delims)[i] != 0; i++)
if (c == PRIV(callout_start_delims)[i])
{
c = PRIV(callout_end_delims)[i];
break;
}
fprintf(f, "%c %d %d %d", c, GET(code, 1 + 3*LINK_SIZE), GET(code, 1),
GET(code, 1 + LINK_SIZE));
break;
case OP_PROP:
@ -641,12 +639,11 @@ for(;;)
case OP_NCLASS:
case OP_XCLASS:
{
int i;
unsigned int min, max;
BOOL printmap;
BOOL invertmap = FALSE;
pcre_uint8 *map;
pcre_uint8 inverted_map[32];
uint8_t *map;
uint8_t inverted_map[32];
fprintf(f, " [");
@ -672,7 +669,7 @@ for(;;)
if (printmap)
{
map = (pcre_uint8 *)ccode;
map = (uint8_t *)ccode;
if (invertmap)
{
for (i = 0; i < 32; i++) inverted_map[i] = ~map[i];
@ -699,14 +696,14 @@ for(;;)
i = j;
}
}
ccode += 32 / sizeof(pcre_uchar);
ccode += 32 / sizeof(PCRE2_UCHAR);
}
/* For an XCLASS there is always some additional data */
if (*code == OP_XCLASS)
{
pcre_uchar ch;
PCRE2_UCHAR ch;
while ((ch = *ccode++) != XCL_END)
{
BOOL not = FALSE;
@ -776,8 +773,8 @@ for(;;)
case OP_CRPOSSTAR:
case OP_CRPOSPLUS:
case OP_CRPOSQUERY:
fprintf(f, "%s", priv_OP_names[*ccode]);
extra += priv_OP_lengths[*ccode];
fprintf(f, "%s", OP_names[*ccode]);
extra += OP_lengths[*ccode];
break;
case OP_CRRANGE:
@ -789,7 +786,7 @@ for(;;)
else fprintf(f, "{%u,%u}", min, max);
if (*ccode == OP_CRMINRANGE) fprintf(f, "?");
else if (*ccode == OP_CRPOSRANGE) fprintf(f, "+");
extra += priv_OP_lengths[*ccode];
extra += OP_lengths[*ccode];
break;
/* Do nothing if it's not a repeat; this code stops picky compilers
@ -805,13 +802,13 @@ for(;;)
case OP_PRUNE_ARG:
case OP_SKIP_ARG:
case OP_THEN_ARG:
fprintf(f, " %s ", priv_OP_names[*code]);
print_puchar(f, code + 2);
fprintf(f, " %s ", OP_names[*code]);
print_custring_bylen(f, code + 2, code[1]);
extra += code[1];
break;
case OP_THEN:
fprintf(f, " %s", priv_OP_names[*code]);
fprintf(f, " %s", OP_names[*code]);
break;
case OP_CIRCM:
@ -822,13 +819,13 @@ for(;;)
/* Anything else is just an item with no data, but possibly a flag. */
default:
fprintf(f, " %s %s", flag, priv_OP_names[*code]);
fprintf(f, " %s %s", flag, OP_names[*code]);
break;
}
code += priv_OP_lengths[*code] + extra;
code += OP_lengths[*code] + extra;
fprintf(f, "\n");
}
}
/* End of pcre_printint.src */
/* End of pcre2_printint.c */

View file

@ -0,0 +1,268 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016-2017 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
/* This module contains functions for serializing and deserializing
a sequence of compiled codes. */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "pcre2_internal.h"
/* Magic number to provide a small check against being handed junk. */
#define SERIALIZED_DATA_MAGIC 0x50523253u
/* Deserialization is limited to the current PCRE version and
character width. */
#define SERIALIZED_DATA_VERSION \
((PCRE2_MAJOR) | ((PCRE2_MINOR) << 16))
#define SERIALIZED_DATA_CONFIG \
(sizeof(PCRE2_UCHAR) | ((sizeof(void*)) << 8) | ((sizeof(PCRE2_SIZE)) << 16))
/*************************************************
* Serialize compiled patterns *
*************************************************/
PCRE2_EXP_DEFN int32_t PCRE2_CALL_CONVENTION
pcre2_serialize_encode(const pcre2_code **codes, int32_t number_of_codes,
uint8_t **serialized_bytes, PCRE2_SIZE *serialized_size,
pcre2_general_context *gcontext)
{
uint8_t *bytes;
uint8_t *dst_bytes;
int32_t i;
PCRE2_SIZE total_size;
const pcre2_real_code *re;
const uint8_t *tables;
pcre2_serialized_data *data;
const pcre2_memctl *memctl = (gcontext != NULL) ?
&gcontext->memctl : &PRIV(default_compile_context).memctl;
if (codes == NULL || serialized_bytes == NULL || serialized_size == NULL)
return PCRE2_ERROR_NULL;
if (number_of_codes <= 0) return PCRE2_ERROR_BADDATA;
/* Compute total size. */
total_size = sizeof(pcre2_serialized_data) + tables_length;
tables = NULL;
for (i = 0; i < number_of_codes; i++)
{
if (codes[i] == NULL) return PCRE2_ERROR_NULL;
re = (const pcre2_real_code *)(codes[i]);
if (re->magic_number != MAGIC_NUMBER) return PCRE2_ERROR_BADMAGIC;
if (tables == NULL)
tables = re->tables;
else if (tables != re->tables)
return PCRE2_ERROR_MIXEDTABLES;
total_size += re->blocksize;
}
/* Initialize the byte stream. */
bytes = memctl->malloc(total_size + sizeof(pcre2_memctl), memctl->memory_data);
if (bytes == NULL) return PCRE2_ERROR_NOMEMORY;
/* The controller is stored as a hidden parameter. */
memcpy(bytes, memctl, sizeof(pcre2_memctl));
bytes += sizeof(pcre2_memctl);
data = (pcre2_serialized_data *)bytes;
data->magic = SERIALIZED_DATA_MAGIC;
data->version = SERIALIZED_DATA_VERSION;
data->config = SERIALIZED_DATA_CONFIG;
data->number_of_codes = number_of_codes;
/* Copy all compiled code data. */
dst_bytes = bytes + sizeof(pcre2_serialized_data);
memcpy(dst_bytes, tables, tables_length);
dst_bytes += tables_length;
for (i = 0; i < number_of_codes; i++)
{
re = (const pcre2_real_code *)(codes[i]);
memcpy(dst_bytes, (char *)re, re->blocksize);
dst_bytes += re->blocksize;
}
*serialized_bytes = bytes;
*serialized_size = total_size;
return number_of_codes;
}
/*************************************************
* Deserialize compiled patterns *
*************************************************/
PCRE2_EXP_DEFN int32_t PCRE2_CALL_CONVENTION
pcre2_serialize_decode(pcre2_code **codes, int32_t number_of_codes,
const uint8_t *bytes, pcre2_general_context *gcontext)
{
const pcre2_serialized_data *data = (const pcre2_serialized_data *)bytes;
const pcre2_memctl *memctl = (gcontext != NULL) ?
&gcontext->memctl : &PRIV(default_compile_context).memctl;
const uint8_t *src_bytes;
pcre2_real_code *dst_re;
uint8_t *tables;
int32_t i, j;
/* Sanity checks. */
if (data == NULL || codes == NULL) return PCRE2_ERROR_NULL;
if (number_of_codes <= 0) return PCRE2_ERROR_BADDATA;
if (data->number_of_codes <= 0) return PCRE2_ERROR_BADSERIALIZEDDATA;
if (data->magic != SERIALIZED_DATA_MAGIC) return PCRE2_ERROR_BADMAGIC;
if (data->version != SERIALIZED_DATA_VERSION) return PCRE2_ERROR_BADMODE;
if (data->config != SERIALIZED_DATA_CONFIG) return PCRE2_ERROR_BADMODE;
if (number_of_codes > data->number_of_codes)
number_of_codes = data->number_of_codes;
src_bytes = bytes + sizeof(pcre2_serialized_data);
/* Decode tables. The reference count for the tables is stored immediately
following them. */
tables = memctl->malloc(tables_length + sizeof(PCRE2_SIZE), memctl->memory_data);
if (tables == NULL) return PCRE2_ERROR_NOMEMORY;
memcpy(tables, src_bytes, tables_length);
*(PCRE2_SIZE *)(tables + tables_length) = number_of_codes;
src_bytes += tables_length;
/* Decode the byte stream. We must not try to read the size from the compiled
code block in the stream, because it might be unaligned, which causes errors on
hardware such as Sparc-64 that doesn't like unaligned memory accesses. The type
of the blocksize field is given its own name to ensure that it is the same here
as in the block. */
for (i = 0; i < number_of_codes; i++)
{
CODE_BLOCKSIZE_TYPE blocksize;
memcpy(&blocksize, src_bytes + offsetof(pcre2_real_code, blocksize),
sizeof(CODE_BLOCKSIZE_TYPE));
if (blocksize <= sizeof(pcre2_real_code))
return PCRE2_ERROR_BADSERIALIZEDDATA;
/* The allocator provided by gcontext replaces the original one. */
dst_re = (pcre2_real_code *)PRIV(memctl_malloc)(blocksize,
(pcre2_memctl *)gcontext);
if (dst_re == NULL)
{
memctl->free(tables, memctl->memory_data);
for (j = 0; j < i; j++)
{
memctl->free(codes[j], memctl->memory_data);
codes[j] = NULL;
}
return PCRE2_ERROR_NOMEMORY;
}
/* The new allocator must be preserved. */
memcpy(((uint8_t *)dst_re) + sizeof(pcre2_memctl),
src_bytes + sizeof(pcre2_memctl), blocksize - sizeof(pcre2_memctl));
if (dst_re->magic_number != MAGIC_NUMBER ||
dst_re->name_entry_size > MAX_NAME_SIZE + IMM2_SIZE + 1 ||
dst_re->name_count > MAX_NAME_COUNT)
{
memctl->free(dst_re, memctl->memory_data);
return PCRE2_ERROR_BADSERIALIZEDDATA;
}
/* At the moment only one table is supported. */
dst_re->tables = tables;
dst_re->executable_jit = NULL;
dst_re->flags |= PCRE2_DEREF_TABLES;
codes[i] = dst_re;
src_bytes += blocksize;
}
return number_of_codes;
}
/*************************************************
* Get the number of serialized patterns *
*************************************************/
PCRE2_EXP_DEFN int32_t PCRE2_CALL_CONVENTION
pcre2_serialize_get_number_of_codes(const uint8_t *bytes)
{
const pcre2_serialized_data *data = (const pcre2_serialized_data *)bytes;
if (data == NULL) return PCRE2_ERROR_NULL;
if (data->magic != SERIALIZED_DATA_MAGIC) return PCRE2_ERROR_BADMAGIC;
if (data->version != SERIALIZED_DATA_VERSION) return PCRE2_ERROR_BADMODE;
if (data->config != SERIALIZED_DATA_CONFIG) return PCRE2_ERROR_BADMODE;
return data->number_of_codes;
}
/*************************************************
* Free the allocated stream *
*************************************************/
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
pcre2_serialize_free(uint8_t *bytes)
{
if (bytes != NULL)
{
pcre2_memctl *memctl = (pcre2_memctl *)(bytes - sizeof(pcre2_memctl));
memctl->free(memctl, memctl->memory_data);
}
}
/* End of pcre2_serialize.c */

View file

@ -0,0 +1,201 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
/* This module contains internal functions for comparing and finding the length
of strings. These are used instead of strcmp() etc because the standard
functions work only on 8-bit data. */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "pcre2_internal.h"
/*************************************************
* Compare two zero-terminated PCRE2 strings *
*************************************************/
/*
Arguments:
str1 first string
str2 second string
Returns: 0, 1, or -1
*/
int
PRIV(strcmp)(PCRE2_SPTR str1, PCRE2_SPTR str2)
{
PCRE2_UCHAR c1, c2;
while (*str1 != '\0' || *str2 != '\0')
{
c1 = *str1++;
c2 = *str2++;
if (c1 != c2) return ((c1 > c2) << 1) - 1;
}
return 0;
}
/*************************************************
* Compare zero-terminated PCRE2 & 8-bit strings *
*************************************************/
/* As the 8-bit string is almost always a literal, its type is specified as
const char *.
Arguments:
str1 first string
str2 second string
Returns: 0, 1, or -1
*/
int
PRIV(strcmp_c8)(PCRE2_SPTR str1, const char *str2)
{
PCRE2_UCHAR c1, c2;
while (*str1 != '\0' || *str2 != '\0')
{
c1 = *str1++;
c2 = *str2++;
if (c1 != c2) return ((c1 > c2) << 1) - 1;
}
return 0;
}
/*************************************************
* Compare two PCRE2 strings, given a length *
*************************************************/
/*
Arguments:
str1 first string
str2 second string
len the length
Returns: 0, 1, or -1
*/
int
PRIV(strncmp)(PCRE2_SPTR str1, PCRE2_SPTR str2, size_t len)
{
PCRE2_UCHAR c1, c2;
for (; len > 0; len--)
{
c1 = *str1++;
c2 = *str2++;
if (c1 != c2) return ((c1 > c2) << 1) - 1;
}
return 0;
}
/*************************************************
* Compare PCRE2 string to 8-bit string by length *
*************************************************/
/* As the 8-bit string is almost always a literal, its type is specified as
const char *.
Arguments:
str1 first string
str2 second string
len the length
Returns: 0, 1, or -1
*/
int
PRIV(strncmp_c8)(PCRE2_SPTR str1, const char *str2, size_t len)
{
PCRE2_UCHAR c1, c2;
for (; len > 0; len--)
{
c1 = *str1++;
c2 = *str2++;
if (c1 != c2) return ((c1 > c2) << 1) - 1;
}
return 0;
}
/*************************************************
* Find the length of a PCRE2 string *
*************************************************/
/*
Argument: the string
Returns: the length
*/
PCRE2_SIZE
PRIV(strlen)(PCRE2_SPTR str)
{
PCRE2_SIZE c = 0;
while (*str++ != 0) c++;
return c;
}
/*************************************************
* Copy 8-bit 0-terminated string to PCRE2 string *
*************************************************/
/* Arguments:
str1 buffer to receive the string
str2 8-bit string to be copied
Returns: the number of code units used (excluding trailing zero)
*/
PCRE2_SIZE
PRIV(strcpy_c8)(PCRE2_UCHAR *str1, const char *str2)
{
PCRE2_UCHAR *t = str1;
while (*str2 != 0) *t++ = *str2++;
*t = 0;
return t - str1;
}
/* End of pcre2_string_utils.c */

View file

@ -0,0 +1,858 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "pcre2_internal.h"
#define PTR_STACK_SIZE 20
#define SUBSTITUTE_OPTIONS \
(PCRE2_SUBSTITUTE_EXTENDED|PCRE2_SUBSTITUTE_GLOBAL| \
PCRE2_SUBSTITUTE_OVERFLOW_LENGTH|PCRE2_SUBSTITUTE_UNKNOWN_UNSET| \
PCRE2_SUBSTITUTE_UNSET_EMPTY)
/*************************************************
* Find end of substitute text *
*************************************************/
/* In extended mode, we recognize ${name:+set text:unset text} and similar
constructions. This requires the identification of unescaped : and }
characters. This function scans for such. It must deal with nested ${
constructions. The pointer to the text is updated, either to the required end
character, or to where an error was detected.
Arguments:
code points to the compiled expression (for options)
ptrptr points to the pointer to the start of the text (updated)
ptrend end of the whole string
last TRUE if the last expected string (only } recognized)
Returns: 0 on success
negative error code on failure
*/
static int
find_text_end(const pcre2_code *code, PCRE2_SPTR *ptrptr, PCRE2_SPTR ptrend,
BOOL last)
{
int rc = 0;
uint32_t nestlevel = 0;
BOOL literal = FALSE;
PCRE2_SPTR ptr = *ptrptr;
for (; ptr < ptrend; ptr++)
{
if (literal)
{
if (ptr[0] == CHAR_BACKSLASH && ptr < ptrend - 1 && ptr[1] == CHAR_E)
{
literal = FALSE;
ptr += 1;
}
}
else if (*ptr == CHAR_RIGHT_CURLY_BRACKET)
{
if (nestlevel == 0) goto EXIT;
nestlevel--;
}
else if (*ptr == CHAR_COLON && !last && nestlevel == 0) goto EXIT;
else if (*ptr == CHAR_DOLLAR_SIGN)
{
if (ptr < ptrend - 1 && ptr[1] == CHAR_LEFT_CURLY_BRACKET)
{
nestlevel++;
ptr += 1;
}
}
else if (*ptr == CHAR_BACKSLASH)
{
int erc;
int errorcode;
uint32_t ch;
if (ptr < ptrend - 1) switch (ptr[1])
{
case CHAR_L:
case CHAR_l:
case CHAR_U:
case CHAR_u:
ptr += 1;
continue;
}
ptr += 1; /* Must point after \ */
erc = PRIV(check_escape)(&ptr, ptrend, &ch, &errorcode,
code->overall_options, FALSE, NULL);
ptr -= 1; /* Back to last code unit of escape */
if (errorcode != 0)
{
rc = errorcode;
goto EXIT;
}
switch(erc)
{
case 0: /* Data character */
case ESC_E: /* Isolated \E is ignored */
break;
case ESC_Q:
literal = TRUE;
break;
default:
rc = PCRE2_ERROR_BADREPESCAPE;
goto EXIT;
}
}
}
rc = PCRE2_ERROR_REPMISSINGBRACE; /* Terminator not found */
EXIT:
*ptrptr = ptr;
return rc;
}
/*************************************************
* Match and substitute *
*************************************************/
/* This function applies a compiled re to a subject string and creates a new
string with substitutions. The first 7 arguments are the same as for
pcre2_match(). Either string length may be PCRE2_ZERO_TERMINATED.
Arguments:
code points to the compiled expression
subject points to the subject string
length length of subject string (may contain binary zeros)
start_offset where to start in the subject string
options option bits
match_data points to a match_data block, or is NULL
context points a PCRE2 context
replacement points to the replacement string
rlength length of replacement string
buffer where to put the substituted string
blength points to length of buffer; updated to length of string
Returns: >= 0 number of substitutions made
< 0 an error code
PCRE2_ERROR_BADREPLACEMENT means invalid use of $
*/
/* This macro checks for space in the buffer before copying into it. On
overflow, either give an error immediately, or keep on, accumulating the
length. */
#define CHECKMEMCPY(from,length) \
if (!overflowed && lengthleft < length) \
{ \
if ((suboptions & PCRE2_SUBSTITUTE_OVERFLOW_LENGTH) == 0) goto NOROOM; \
overflowed = TRUE; \
extra_needed = length - lengthleft; \
} \
else if (overflowed) \
{ \
extra_needed += length; \
} \
else \
{ \
memcpy(buffer + buff_offset, from, CU2BYTES(length)); \
buff_offset += length; \
lengthleft -= length; \
}
/* Here's the function */
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_substitute(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length,
PCRE2_SIZE start_offset, uint32_t options, pcre2_match_data *match_data,
pcre2_match_context *mcontext, PCRE2_SPTR replacement, PCRE2_SIZE rlength,
PCRE2_UCHAR *buffer, PCRE2_SIZE *blength)
{
int rc;
int subs;
int forcecase = 0;
int forcecasereset = 0;
uint32_t ovector_count;
uint32_t goptions = 0;
uint32_t suboptions;
BOOL match_data_created = FALSE;
BOOL literal = FALSE;
BOOL overflowed = FALSE;
#ifdef SUPPORT_UNICODE
BOOL utf = (code->overall_options & PCRE2_UTF) != 0;
#endif
PCRE2_UCHAR temp[6];
PCRE2_SPTR ptr;
PCRE2_SPTR repend;
PCRE2_SIZE extra_needed = 0;
PCRE2_SIZE buff_offset, buff_length, lengthleft, fraglength;
PCRE2_SIZE *ovector;
buff_offset = 0;
lengthleft = buff_length = *blength;
*blength = PCRE2_UNSET;
/* Partial matching is not valid. */
if ((options & (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT)) != 0)
return PCRE2_ERROR_BADOPTION;
/* If no match data block is provided, create one. */
if (match_data == NULL)
{
pcre2_general_context *gcontext = (mcontext == NULL)?
(pcre2_general_context *)code :
(pcre2_general_context *)mcontext;
match_data = pcre2_match_data_create_from_pattern(code, gcontext);
if (match_data == NULL) return PCRE2_ERROR_NOMEMORY;
match_data_created = TRUE;
}
ovector = pcre2_get_ovector_pointer(match_data);
ovector_count = pcre2_get_ovector_count(match_data);
/* Find lengths of zero-terminated strings and the end of the replacement. */
if (length == PCRE2_ZERO_TERMINATED) length = PRIV(strlen)(subject);
if (rlength == PCRE2_ZERO_TERMINATED) rlength = PRIV(strlen)(replacement);
repend = replacement + rlength;
/* Check UTF replacement string if necessary. */
#ifdef SUPPORT_UNICODE
if (utf && (options & PCRE2_NO_UTF_CHECK) == 0)
{
rc = PRIV(valid_utf)(replacement, rlength, &(match_data->rightchar));
if (rc != 0)
{
match_data->leftchar = 0;
goto EXIT;
}
}
#endif /* SUPPORT_UNICODE */
/* Save the substitute options and remove them from the match options. */
suboptions = options & SUBSTITUTE_OPTIONS;
options &= ~SUBSTITUTE_OPTIONS;
/* Copy up to the start offset */
if (start_offset > length)
{
match_data->leftchar = 0;
rc = PCRE2_ERROR_BADOFFSET;
goto EXIT;
}
CHECKMEMCPY(subject, start_offset);
/* Loop for global substituting. */
subs = 0;
do
{
PCRE2_SPTR ptrstack[PTR_STACK_SIZE];
uint32_t ptrstackptr = 0;
rc = pcre2_match(code, subject, length, start_offset, options|goptions,
match_data, mcontext);
#ifdef SUPPORT_UNICODE
if (utf) options |= PCRE2_NO_UTF_CHECK; /* Only need to check once */
#endif
/* Any error other than no match returns the error code. No match when not
doing the special after-empty-match global rematch, or when at the end of the
subject, breaks the global loop. Otherwise, advance the starting point by one
character, copying it to the output, and try again. */
if (rc < 0)
{
PCRE2_SIZE save_start;
if (rc != PCRE2_ERROR_NOMATCH) goto EXIT;
if (goptions == 0 || start_offset >= length) break;
/* Advance by one code point. Then, if CRLF is a valid newline sequence and
we have advanced into the middle of it, advance one more code point. In
other words, do not start in the middle of CRLF, even if CR and LF on their
own are valid newlines. */
save_start = start_offset++;
if (subject[start_offset-1] == CHAR_CR &&
code->newline_convention != PCRE2_NEWLINE_CR &&
code->newline_convention != PCRE2_NEWLINE_LF &&
start_offset < length &&
subject[start_offset] == CHAR_LF)
start_offset++;
/* Otherwise, in UTF mode, advance past any secondary code points. */
else if ((code->overall_options & PCRE2_UTF) != 0)
{
#if PCRE2_CODE_UNIT_WIDTH == 8
while (start_offset < length && (subject[start_offset] & 0xc0) == 0x80)
start_offset++;
#elif PCRE2_CODE_UNIT_WIDTH == 16
while (start_offset < length &&
(subject[start_offset] & 0xfc00) == 0xdc00)
start_offset++;
#endif
}
/* Copy what we have advanced past, reset the special global options, and
continue to the next match. */
fraglength = start_offset - save_start;
CHECKMEMCPY(subject + save_start, fraglength);
goptions = 0;
continue;
}
/* Handle a successful match. Matches that use \K to end before they start
are not supported. */
if (ovector[1] < ovector[0])
{
rc = PCRE2_ERROR_BADSUBSPATTERN;
goto EXIT;
}
/* Count substitutions with a paranoid check for integer overflow; surely no
real call to this function would ever hit this! */
if (subs == INT_MAX)
{
rc = PCRE2_ERROR_TOOMANYREPLACE;
goto EXIT;
}
subs++;
/* Copy the text leading up to the match. */
if (rc == 0) rc = ovector_count;
fraglength = ovector[0] - start_offset;
CHECKMEMCPY(subject + start_offset, fraglength);
/* Process the replacement string. Literal mode is set by \Q, but only in
extended mode when backslashes are being interpreted. In extended mode we
must handle nested substrings that are to be reprocessed. */
ptr = replacement;
for (;;)
{
uint32_t ch;
unsigned int chlen;
/* If at the end of a nested substring, pop the stack. */
if (ptr >= repend)
{
if (ptrstackptr <= 0) break; /* End of replacement string */
repend = ptrstack[--ptrstackptr];
ptr = ptrstack[--ptrstackptr];
continue;
}
/* Handle the next character */
if (literal)
{
if (ptr[0] == CHAR_BACKSLASH && ptr < repend - 1 && ptr[1] == CHAR_E)
{
literal = FALSE;
ptr += 2;
continue;
}
goto LOADLITERAL;
}
/* Not in literal mode. */
if (*ptr == CHAR_DOLLAR_SIGN)
{
int group, n;
uint32_t special = 0;
BOOL inparens;
BOOL star;
PCRE2_SIZE sublength;
PCRE2_SPTR text1_start = NULL;
PCRE2_SPTR text1_end = NULL;
PCRE2_SPTR text2_start = NULL;
PCRE2_SPTR text2_end = NULL;
PCRE2_UCHAR next;
PCRE2_UCHAR name[33];
if (++ptr >= repend) goto BAD;
if ((next = *ptr) == CHAR_DOLLAR_SIGN) goto LOADLITERAL;
group = -1;
n = 0;
inparens = FALSE;
star = FALSE;
if (next == CHAR_LEFT_CURLY_BRACKET)
{
if (++ptr >= repend) goto BAD;
next = *ptr;
inparens = TRUE;
}
if (next == CHAR_ASTERISK)
{
if (++ptr >= repend) goto BAD;
next = *ptr;
star = TRUE;
}
if (!star && next >= CHAR_0 && next <= CHAR_9)
{
group = next - CHAR_0;
while (++ptr < repend)
{
next = *ptr;
if (next < CHAR_0 || next > CHAR_9) break;
group = group * 10 + next - CHAR_0;
/* A check for a number greater than the hightest captured group
is sufficient here; no need for a separate overflow check. If unknown
groups are to be treated as unset, just skip over any remaining
digits and carry on. */
if (group > code->top_bracket)
{
if ((suboptions & PCRE2_SUBSTITUTE_UNKNOWN_UNSET) != 0)
{
while (++ptr < repend && *ptr >= CHAR_0 && *ptr <= CHAR_9);
break;
}
else
{
rc = PCRE2_ERROR_NOSUBSTRING;
goto PTREXIT;
}
}
}
}
else
{
const uint8_t *ctypes = code->tables + ctypes_offset;
while (MAX_255(next) && (ctypes[next] & ctype_word) != 0)
{
name[n++] = next;
if (n > 32) goto BAD;
if (++ptr >= repend) break;
next = *ptr;
}
if (n == 0) goto BAD;
name[n] = 0;
}
/* In extended mode we recognize ${name:+set text:unset text} and
${name:-default text}. */
if (inparens)
{
if ((suboptions & PCRE2_SUBSTITUTE_EXTENDED) != 0 &&
!star && ptr < repend - 2 && next == CHAR_COLON)
{
special = *(++ptr);
if (special != CHAR_PLUS && special != CHAR_MINUS)
{
rc = PCRE2_ERROR_BADSUBSTITUTION;
goto PTREXIT;
}
text1_start = ++ptr;
rc = find_text_end(code, &ptr, repend, special == CHAR_MINUS);
if (rc != 0) goto PTREXIT;
text1_end = ptr;
if (special == CHAR_PLUS && *ptr == CHAR_COLON)
{
text2_start = ++ptr;
rc = find_text_end(code, &ptr, repend, TRUE);
if (rc != 0) goto PTREXIT;
text2_end = ptr;
}
}
else
{
if (ptr >= repend || *ptr != CHAR_RIGHT_CURLY_BRACKET)
{
rc = PCRE2_ERROR_REPMISSINGBRACE;
goto PTREXIT;
}
}
ptr++;
}
/* Have found a syntactically correct group number or name, or *name.
Only *MARK is currently recognized. */
if (star)
{
if (PRIV(strcmp_c8)(name, STRING_MARK) == 0)
{
PCRE2_SPTR mark = pcre2_get_mark(match_data);
if (mark != NULL)
{
PCRE2_SPTR mark_start = mark;
while (*mark != 0) mark++;
fraglength = mark - mark_start;
CHECKMEMCPY(mark_start, fraglength);
}
}
else goto BAD;
}
/* Substitute the contents of a group. We don't use substring_copy
functions any more, in order to support case forcing. */
else
{
PCRE2_SPTR subptr, subptrend;
/* Find a number for a named group. In case there are duplicate names,
search for the first one that is set. If the name is not found when
PCRE2_SUBSTITUTE_UNKNOWN_EMPTY is set, set the group number to a
non-existent group. */
if (group < 0)
{
PCRE2_SPTR first, last, entry;
rc = pcre2_substring_nametable_scan(code, name, &first, &last);
if (rc == PCRE2_ERROR_NOSUBSTRING &&
(suboptions & PCRE2_SUBSTITUTE_UNKNOWN_UNSET) != 0)
{
group = code->top_bracket + 1;
}
else
{
if (rc < 0) goto PTREXIT;
for (entry = first; entry <= last; entry += rc)
{
uint32_t ng = GET2(entry, 0);
if (ng < ovector_count)
{
if (group < 0) group = ng; /* First in ovector */
if (ovector[ng*2] != PCRE2_UNSET)
{
group = ng; /* First that is set */
break;
}
}
}
/* If group is still negative, it means we did not find a group
that is in the ovector. Just set the first group. */
if (group < 0) group = GET2(first, 0);
}
}
/* We now have a group that is identified by number. Find the length of
the captured string. If a group in a non-special substitution is unset
when PCRE2_SUBSTITUTE_UNSET_EMPTY is set, substitute nothing. */
rc = pcre2_substring_length_bynumber(match_data, group, &sublength);
if (rc < 0)
{
if (rc == PCRE2_ERROR_NOSUBSTRING &&
(suboptions & PCRE2_SUBSTITUTE_UNKNOWN_UNSET) != 0)
{
rc = PCRE2_ERROR_UNSET;
}
if (rc != PCRE2_ERROR_UNSET) goto PTREXIT; /* Non-unset errors */
if (special == 0) /* Plain substitution */
{
if ((suboptions & PCRE2_SUBSTITUTE_UNSET_EMPTY) != 0) continue;
goto PTREXIT; /* Else error */
}
}
/* If special is '+' we have a 'set' and possibly an 'unset' text,
both of which are reprocessed when used. If special is '-' we have a
default text for when the group is unset; it must be reprocessed. */
if (special != 0)
{
if (special == CHAR_MINUS)
{
if (rc == 0) goto LITERAL_SUBSTITUTE;
text2_start = text1_start;
text2_end = text1_end;
}
if (ptrstackptr >= PTR_STACK_SIZE) goto BAD;
ptrstack[ptrstackptr++] = ptr;
ptrstack[ptrstackptr++] = repend;
if (rc == 0)
{
ptr = text1_start;
repend = text1_end;
}
else
{
ptr = text2_start;
repend = text2_end;
}
continue;
}
/* Otherwise we have a literal substitution of a group's contents. */
LITERAL_SUBSTITUTE:
subptr = subject + ovector[group*2];
subptrend = subject + ovector[group*2 + 1];
/* Substitute a literal string, possibly forcing alphabetic case. */
while (subptr < subptrend)
{
GETCHARINCTEST(ch, subptr);
if (forcecase != 0)
{
#ifdef SUPPORT_UNICODE
if (utf)
{
uint32_t type = UCD_CHARTYPE(ch);
if (PRIV(ucp_gentype)[type] == ucp_L &&
type != ((forcecase > 0)? ucp_Lu : ucp_Ll))
ch = UCD_OTHERCASE(ch);
}
else
#endif
{
if (((code->tables + cbits_offset +
((forcecase > 0)? cbit_upper:cbit_lower)
)[ch/8] & (1 << (ch%8))) == 0)
ch = (code->tables + fcc_offset)[ch];
}
forcecase = forcecasereset;
}
#ifdef SUPPORT_UNICODE
if (utf) chlen = PRIV(ord2utf)(ch, temp); else
#endif
{
temp[0] = ch;
chlen = 1;
}
CHECKMEMCPY(temp, chlen);
}
}
}
/* Handle an escape sequence in extended mode. We can use check_escape()
to process \Q, \E, \c, \o, \x and \ followed by non-alphanumerics, but
the case-forcing escapes are not supported in pcre2_compile() so must be
recognized here. */
else if ((suboptions & PCRE2_SUBSTITUTE_EXTENDED) != 0 &&
*ptr == CHAR_BACKSLASH)
{
int errorcode;
if (ptr < repend - 1) switch (ptr[1])
{
case CHAR_L:
forcecase = forcecasereset = -1;
ptr += 2;
continue;
case CHAR_l:
forcecase = -1;
forcecasereset = 0;
ptr += 2;
continue;
case CHAR_U:
forcecase = forcecasereset = 1;
ptr += 2;
continue;
case CHAR_u:
forcecase = 1;
forcecasereset = 0;
ptr += 2;
continue;
default:
break;
}
ptr++; /* Point after \ */
rc = PRIV(check_escape)(&ptr, repend, &ch, &errorcode,
code->overall_options, FALSE, NULL);
if (errorcode != 0) goto BADESCAPE;
switch(rc)
{
case ESC_E:
forcecase = forcecasereset = 0;
continue;
case ESC_Q:
literal = TRUE;
continue;
case 0: /* Data character */
goto LITERAL;
default:
goto BADESCAPE;
}
}
/* Handle a literal code unit */
else
{
LOADLITERAL:
GETCHARINCTEST(ch, ptr); /* Get character value, increment pointer */
LITERAL:
if (forcecase != 0)
{
#ifdef SUPPORT_UNICODE
if (utf)
{
uint32_t type = UCD_CHARTYPE(ch);
if (PRIV(ucp_gentype)[type] == ucp_L &&
type != ((forcecase > 0)? ucp_Lu : ucp_Ll))
ch = UCD_OTHERCASE(ch);
}
else
#endif
{
if (((code->tables + cbits_offset +
((forcecase > 0)? cbit_upper:cbit_lower)
)[ch/8] & (1 << (ch%8))) == 0)
ch = (code->tables + fcc_offset)[ch];
}
forcecase = forcecasereset;
}
#ifdef SUPPORT_UNICODE
if (utf) chlen = PRIV(ord2utf)(ch, temp); else
#endif
{
temp[0] = ch;
chlen = 1;
}
CHECKMEMCPY(temp, chlen);
} /* End handling a literal code unit */
} /* End of loop for scanning the replacement. */
/* The replacement has been copied to the output. Update the start offset to
point to the rest of the subject string. If we matched an empty string,
do the magic for global matches. */
start_offset = ovector[1];
goptions = (ovector[0] != ovector[1])? 0 :
PCRE2_ANCHORED|PCRE2_NOTEMPTY_ATSTART;
} while ((suboptions & PCRE2_SUBSTITUTE_GLOBAL) != 0); /* Repeat "do" loop */
/* Copy the rest of the subject. */
fraglength = length - start_offset;
CHECKMEMCPY(subject + start_offset, fraglength);
temp[0] = 0;
CHECKMEMCPY(temp , 1);
/* If overflowed is set it means the PCRE2_SUBSTITUTE_OVERFLOW_LENGTH is set,
and matching has carried on after a full buffer, in order to compute the length
needed. Otherwise, an overflow generates an immediate error return. */
if (overflowed)
{
rc = PCRE2_ERROR_NOMEMORY;
*blength = buff_length + extra_needed;
}
/* After a successful execution, return the number of substitutions and set the
length of buffer used, excluding the trailing zero. */
else
{
rc = subs;
*blength = buff_offset - 1;
}
EXIT:
if (match_data_created) pcre2_match_data_free(match_data);
else match_data->rc = rc;
return rc;
NOROOM:
rc = PCRE2_ERROR_NOMEMORY;
goto EXIT;
BAD:
rc = PCRE2_ERROR_BADREPLACEMENT;
goto PTREXIT;
BADESCAPE:
rc = PCRE2_ERROR_BADREPESCAPE;
PTREXIT:
*blength = (PCRE2_SIZE)(ptr - replacement);
goto EXIT;
}
/* End of pcre2_substitute.c */

View file

@ -0,0 +1,542 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "pcre2_internal.h"
/*************************************************
* Copy named captured string to given buffer *
*************************************************/
/* This function copies a single captured substring into a given buffer,
identifying it by name. If the regex permits duplicate names, the first
substring that is set is chosen.
Arguments:
match_data points to the match data
stringname the name of the required substring
buffer where to put the substring
sizeptr the size of the buffer, updated to the size of the substring
Returns: if successful: zero
if not successful, a negative error code:
(1) an error from nametable_scan()
(2) an error from copy_bynumber()
(3) PCRE2_ERROR_UNAVAILABLE: no group is in ovector
(4) PCRE2_ERROR_UNSET: all named groups in ovector are unset
*/
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_substring_copy_byname(pcre2_match_data *match_data, PCRE2_SPTR stringname,
PCRE2_UCHAR *buffer, PCRE2_SIZE *sizeptr)
{
PCRE2_SPTR first, last, entry;
int failrc, entrysize;
if (match_data->matchedby == PCRE2_MATCHEDBY_DFA_INTERPRETER)
return PCRE2_ERROR_DFA_UFUNC;
entrysize = pcre2_substring_nametable_scan(match_data->code, stringname,
&first, &last);
if (entrysize < 0) return entrysize;
failrc = PCRE2_ERROR_UNAVAILABLE;
for (entry = first; entry <= last; entry += entrysize)
{
uint32_t n = GET2(entry, 0);
if (n < match_data->oveccount)
{
if (match_data->ovector[n*2] != PCRE2_UNSET)
return pcre2_substring_copy_bynumber(match_data, n, buffer, sizeptr);
failrc = PCRE2_ERROR_UNSET;
}
}
return failrc;
}
/*************************************************
* Copy numbered captured string to given buffer *
*************************************************/
/* This function copies a single captured substring into a given buffer,
identifying it by number.
Arguments:
match_data points to the match data
stringnumber the number of the required substring
buffer where to put the substring
sizeptr the size of the buffer, updated to the size of the substring
Returns: if successful: 0
if not successful, a negative error code:
PCRE2_ERROR_NOMEMORY: buffer too small
PCRE2_ERROR_NOSUBSTRING: no such substring
PCRE2_ERROR_UNAVAILABLE: ovector too small
PCRE2_ERROR_UNSET: substring is not set
*/
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_substring_copy_bynumber(pcre2_match_data *match_data,
uint32_t stringnumber, PCRE2_UCHAR *buffer, PCRE2_SIZE *sizeptr)
{
int rc;
PCRE2_SIZE size;
rc = pcre2_substring_length_bynumber(match_data, stringnumber, &size);
if (rc < 0) return rc;
if (size + 1 > *sizeptr) return PCRE2_ERROR_NOMEMORY;
memcpy(buffer, match_data->subject + match_data->ovector[stringnumber*2],
CU2BYTES(size));
buffer[size] = 0;
*sizeptr = size;
return 0;
}
/*************************************************
* Extract named captured string *
*************************************************/
/* This function copies a single captured substring, identified by name, into
new memory. If the regex permits duplicate names, the first substring that is
set is chosen.
Arguments:
match_data pointer to match_data
stringname the name of the required substring
stringptr where to put the pointer to the new memory
sizeptr where to put the length of the substring
Returns: if successful: zero
if not successful, a negative value:
(1) an error from nametable_scan()
(2) an error from get_bynumber()
(3) PCRE2_ERROR_UNAVAILABLE: no group is in ovector
(4) PCRE2_ERROR_UNSET: all named groups in ovector are unset
*/
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_substring_get_byname(pcre2_match_data *match_data,
PCRE2_SPTR stringname, PCRE2_UCHAR **stringptr, PCRE2_SIZE *sizeptr)
{
PCRE2_SPTR first, last, entry;
int failrc, entrysize;
if (match_data->matchedby == PCRE2_MATCHEDBY_DFA_INTERPRETER)
return PCRE2_ERROR_DFA_UFUNC;
entrysize = pcre2_substring_nametable_scan(match_data->code, stringname,
&first, &last);
if (entrysize < 0) return entrysize;
failrc = PCRE2_ERROR_UNAVAILABLE;
for (entry = first; entry <= last; entry += entrysize)
{
uint32_t n = GET2(entry, 0);
if (n < match_data->oveccount)
{
if (match_data->ovector[n*2] != PCRE2_UNSET)
return pcre2_substring_get_bynumber(match_data, n, stringptr, sizeptr);
failrc = PCRE2_ERROR_UNSET;
}
}
return failrc;
}
/*************************************************
* Extract captured string to new memory *
*************************************************/
/* This function copies a single captured substring into a piece of new
memory.
Arguments:
match_data points to match data
stringnumber the number of the required substring
stringptr where to put a pointer to the new memory
sizeptr where to put the size of the substring
Returns: if successful: 0
if not successful, a negative error code:
PCRE2_ERROR_NOMEMORY: failed to get memory
PCRE2_ERROR_NOSUBSTRING: no such substring
PCRE2_ERROR_UNAVAILABLE: ovector too small
PCRE2_ERROR_UNSET: substring is not set
*/
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_substring_get_bynumber(pcre2_match_data *match_data,
uint32_t stringnumber, PCRE2_UCHAR **stringptr, PCRE2_SIZE *sizeptr)
{
int rc;
PCRE2_SIZE size;
PCRE2_UCHAR *yield;
rc = pcre2_substring_length_bynumber(match_data, stringnumber, &size);
if (rc < 0) return rc;
yield = PRIV(memctl_malloc)(sizeof(pcre2_memctl) +
(size + 1)*PCRE2_CODE_UNIT_WIDTH, (pcre2_memctl *)match_data);
if (yield == NULL) return PCRE2_ERROR_NOMEMORY;
yield = (PCRE2_UCHAR *)(((char *)yield) + sizeof(pcre2_memctl));
memcpy(yield, match_data->subject + match_data->ovector[stringnumber*2],
CU2BYTES(size));
yield[size] = 0;
*stringptr = yield;
*sizeptr = size;
return 0;
}
/*************************************************
* Free memory obtained by get_substring *
*************************************************/
/*
Argument: the result of a previous pcre2_substring_get_byxxx()
Returns: nothing
*/
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
pcre2_substring_free(PCRE2_UCHAR *string)
{
if (string != NULL)
{
pcre2_memctl *memctl = (pcre2_memctl *)((char *)string - sizeof(pcre2_memctl));
memctl->free(memctl, memctl->memory_data);
}
}
/*************************************************
* Get length of a named substring *
*************************************************/
/* This function returns the length of a named captured substring. If the regex
permits duplicate names, the first substring that is set is chosen.
Arguments:
match_data pointer to match data
stringname the name of the required substring
sizeptr where to put the length
Returns: 0 if successful, else a negative error number
*/
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_substring_length_byname(pcre2_match_data *match_data,
PCRE2_SPTR stringname, PCRE2_SIZE *sizeptr)
{
PCRE2_SPTR first, last, entry;
int failrc, entrysize;
if (match_data->matchedby == PCRE2_MATCHEDBY_DFA_INTERPRETER)
return PCRE2_ERROR_DFA_UFUNC;
entrysize = pcre2_substring_nametable_scan(match_data->code, stringname,
&first, &last);
if (entrysize < 0) return entrysize;
failrc = PCRE2_ERROR_UNAVAILABLE;
for (entry = first; entry <= last; entry += entrysize)
{
uint32_t n = GET2(entry, 0);
if (n < match_data->oveccount)
{
if (match_data->ovector[n*2] != PCRE2_UNSET)
return pcre2_substring_length_bynumber(match_data, n, sizeptr);
failrc = PCRE2_ERROR_UNSET;
}
}
return failrc;
}
/*************************************************
* Get length of a numbered substring *
*************************************************/
/* This function returns the length of a captured substring. If the start is
beyond the end (which can happen when \K is used in an assertion), it sets the
length to zero.
Arguments:
match_data pointer to match data
stringnumber the number of the required substring
sizeptr where to put the length, if not NULL
Returns: if successful: 0
if not successful, a negative error code:
PCRE2_ERROR_NOSUBSTRING: no such substring
PCRE2_ERROR_UNAVAILABLE: ovector is too small
PCRE2_ERROR_UNSET: substring is not set
*/
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_substring_length_bynumber(pcre2_match_data *match_data,
uint32_t stringnumber, PCRE2_SIZE *sizeptr)
{
PCRE2_SIZE left, right;
int count = match_data->rc;
if (count == PCRE2_ERROR_PARTIAL)
{
if (stringnumber > 0) return PCRE2_ERROR_PARTIAL;
count = 0;
}
else if (count < 0) return count; /* Match failed */
if (match_data->matchedby != PCRE2_MATCHEDBY_DFA_INTERPRETER)
{
if (stringnumber > match_data->code->top_bracket)
return PCRE2_ERROR_NOSUBSTRING;
if (stringnumber >= match_data->oveccount)
return PCRE2_ERROR_UNAVAILABLE;
if (match_data->ovector[stringnumber*2] == PCRE2_UNSET)
return PCRE2_ERROR_UNSET;
}
else /* Matched using pcre2_dfa_match() */
{
if (stringnumber >= match_data->oveccount) return PCRE2_ERROR_UNAVAILABLE;
if (count != 0 && stringnumber >= (uint32_t)count) return PCRE2_ERROR_UNSET;
}
left = match_data->ovector[stringnumber*2];
right = match_data->ovector[stringnumber*2+1];
if (sizeptr != NULL) *sizeptr = (left > right)? 0 : right - left;
return 0;
}
/*************************************************
* Extract all captured strings to new memory *
*************************************************/
/* This function gets one chunk of memory and builds a list of pointers and all
the captured substrings in it. A NULL pointer is put on the end of the list.
The substrings are zero-terminated, but also, if the final argument is
non-NULL, a list of lengths is also returned. This allows binary data to be
handled.
Arguments:
match_data points to the match data
listptr set to point to the list of pointers
lengthsptr set to point to the list of lengths (may be NULL)
Returns: if successful: 0
if not successful, a negative error code:
PCRE2_ERROR_NOMEMORY: failed to get memory,
or a match failure code
*/
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_substring_list_get(pcre2_match_data *match_data, PCRE2_UCHAR ***listptr,
PCRE2_SIZE **lengthsptr)
{
int i, count, count2;
PCRE2_SIZE size;
PCRE2_SIZE *lensp;
pcre2_memctl *memp;
PCRE2_UCHAR **listp;
PCRE2_UCHAR *sp;
PCRE2_SIZE *ovector;
if ((count = match_data->rc) < 0) return count; /* Match failed */
if (count == 0) count = match_data->oveccount; /* Ovector too small */
count2 = 2*count;
ovector = match_data->ovector;
size = sizeof(pcre2_memctl) + sizeof(PCRE2_UCHAR *); /* For final NULL */
if (lengthsptr != NULL) size += sizeof(PCRE2_SIZE) * count; /* For lengths */
for (i = 0; i < count2; i += 2)
{
size += sizeof(PCRE2_UCHAR *) + CU2BYTES(1);
if (ovector[i+1] > ovector[i]) size += CU2BYTES(ovector[i+1] - ovector[i]);
}
memp = PRIV(memctl_malloc)(size, (pcre2_memctl *)match_data);
if (memp == NULL) return PCRE2_ERROR_NOMEMORY;
*listptr = listp = (PCRE2_UCHAR **)((char *)memp + sizeof(pcre2_memctl));
lensp = (PCRE2_SIZE *)((char *)listp + sizeof(PCRE2_UCHAR *) * (count + 1));
if (lengthsptr == NULL)
{
sp = (PCRE2_UCHAR *)lensp;
lensp = NULL;
}
else
{
*lengthsptr = lensp;
sp = (PCRE2_UCHAR *)((char *)lensp + sizeof(PCRE2_SIZE) * count);
}
for (i = 0; i < count2; i += 2)
{
size = (ovector[i+1] > ovector[i])? (ovector[i+1] - ovector[i]) : 0;
memcpy(sp, match_data->subject + ovector[i], CU2BYTES(size));
*listp++ = sp;
if (lensp != NULL) *lensp++ = size;
sp += size;
*sp++ = 0;
}
*listp = NULL;
return 0;
}
/*************************************************
* Free memory obtained by substring_list_get *
*************************************************/
/*
Argument: the result of a previous pcre2_substring_list_get()
Returns: nothing
*/
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
pcre2_substring_list_free(PCRE2_SPTR *list)
{
if (list != NULL)
{
pcre2_memctl *memctl = (pcre2_memctl *)((char *)list - sizeof(pcre2_memctl));
memctl->free(memctl, memctl->memory_data);
}
}
/*************************************************
* Find (multiple) entries for named string *
*************************************************/
/* This function scans the nametable for a given name, using binary chop. It
returns either two pointers to the entries in the table, or, if no pointers are
given, the number of a unique group with the given name. If duplicate names are
permitted, and the name is not unique, an error is generated.
Arguments:
code the compiled regex
stringname the name whose entries required
firstptr where to put the pointer to the first entry
lastptr where to put the pointer to the last entry
Returns: PCRE2_ERROR_NOSUBSTRING if the name is not found
otherwise, if firstptr and lastptr are NULL:
a group number for a unique substring
else PCRE2_ERROR_NOUNIQUESUBSTRING
otherwise:
the length of each entry, having set firstptr and lastptr
*/
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_substring_nametable_scan(const pcre2_code *code, PCRE2_SPTR stringname,
PCRE2_SPTR *firstptr, PCRE2_SPTR *lastptr)
{
uint16_t bot = 0;
uint16_t top = code->name_count;
uint16_t entrysize = code->name_entry_size;
PCRE2_SPTR nametable = (PCRE2_SPTR)((char *)code + sizeof(pcre2_real_code));
while (top > bot)
{
uint16_t mid = (top + bot) / 2;
PCRE2_SPTR entry = nametable + entrysize*mid;
int c = PRIV(strcmp)(stringname, entry + IMM2_SIZE);
if (c == 0)
{
PCRE2_SPTR first;
PCRE2_SPTR last;
PCRE2_SPTR lastentry;
lastentry = nametable + entrysize * (code->name_count - 1);
first = last = entry;
while (first > nametable)
{
if (PRIV(strcmp)(stringname, (first - entrysize + IMM2_SIZE)) != 0) break;
first -= entrysize;
}
while (last < lastentry)
{
if (PRIV(strcmp)(stringname, (last + entrysize + IMM2_SIZE)) != 0) break;
last += entrysize;
}
if (firstptr == NULL) return (first == last)?
(int)GET2(entry, 0) : PCRE2_ERROR_NOUNIQUESUBSTRING;
*firstptr = first;
*lastptr = last;
return entrysize;
}
if (c > 0) bot = mid + 1; else top = mid;
}
return PCRE2_ERROR_NOSUBSTRING;
}
/*************************************************
* Find number for named string *
*************************************************/
/* This function is a convenience wrapper for pcre2_substring_nametable_scan()
when it is known that names are unique. If there are duplicate names, it is not
defined which number is returned.
Arguments:
code the compiled regex
stringname the name whose number is required
Returns: the number of the named parenthesis, or a negative number
PCRE2_ERROR_NOSUBSTRING if not found
PCRE2_ERROR_NOUNIQUESUBSTRING if not unique
*/
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_substring_number_from_name(const pcre2_code *code,
PCRE2_SPTR stringname)
{
return pcre2_substring_nametable_scan(code, stringname, NULL, NULL);
}
/* End of pcre2_substring.c */

View file

@ -6,7 +6,8 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2017 University of Cambridge
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016-2017 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@ -37,47 +38,65 @@ POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
#ifndef PCRE_INCLUDED
/* This module contains some fixed tables that are used by more than one of the
PCRE code modules. The tables are also #included by the pcretest program, which
uses macros to change their names from _pcre_xxx to xxxx, thereby avoiding name
clashes with the library. */
PCRE2 code modules. The tables are also #included by the pcre2test program,
which uses macros to change their names from _pcre2_xxx to xxxx, thereby
avoiding name clashes with the library. In this case, PCRE2_PCRE2TEST is
defined. */
#ifndef PCRE2_PCRE2TEST /* We're compiling the library */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "pcre2_internal.h"
#endif /* PCRE2_PCRE2TEST */
#include "pcre_internal.h"
#endif /* PCRE_INCLUDED */
/* Table of sizes for the fixed-length opcodes. It's defined in a macro so that
the definition is next to the definition of the opcodes in pcre_internal.h. */
the definition is next to the definition of the opcodes in pcre2_internal.h.
This is mode-dependent, so is skipped when this file is included by pcre2test. */
const pcre_uint8 PRIV(OP_lengths)[] = { OP_LENGTHS };
#ifndef PCRE2_PCRE2TEST
const uint8_t PRIV(OP_lengths)[] = { OP_LENGTHS };
#endif
/* Tables of horizontal and vertical whitespace characters, suitable for
adding to classes. */
const pcre_uint32 PRIV(hspace_list)[] = { HSPACE_LIST };
const pcre_uint32 PRIV(vspace_list)[] = { VSPACE_LIST };
const uint32_t PRIV(hspace_list)[] = { HSPACE_LIST };
const uint32_t PRIV(vspace_list)[] = { VSPACE_LIST };
/* These tables are the pairs of delimiters that are valid for callout string
arguments. For each starting delimiter there must be a matching ending
delimiter, which in fact is different only for bracket-like delimiters. */
const uint32_t PRIV(callout_start_delims)[] = {
CHAR_GRAVE_ACCENT, CHAR_APOSTROPHE, CHAR_QUOTATION_MARK,
CHAR_CIRCUMFLEX_ACCENT, CHAR_PERCENT_SIGN, CHAR_NUMBER_SIGN,
CHAR_DOLLAR_SIGN, CHAR_LEFT_CURLY_BRACKET, 0 };
const uint32_t PRIV(callout_end_delims[]) = {
CHAR_GRAVE_ACCENT, CHAR_APOSTROPHE, CHAR_QUOTATION_MARK,
CHAR_CIRCUMFLEX_ACCENT, CHAR_PERCENT_SIGN, CHAR_NUMBER_SIGN,
CHAR_DOLLAR_SIGN, CHAR_RIGHT_CURLY_BRACKET, 0 };
/*************************************************
* Tables for UTF-8 support *
*************************************************/
/* These tables are required by pcre2test in 16- or 32-bit mode, as well
as for the library in 8-bit mode, because pcre2test uses UTF-8 internally for
handling wide characters. */
#if defined PCRE2_PCRE2TEST || \
(defined SUPPORT_UNICODE && \
defined PCRE2_CODE_UNIT_WIDTH && \
PCRE2_CODE_UNIT_WIDTH == 8)
/* These are the breakpoints for different numbers of bytes in a UTF-8
character. */
#if (defined SUPPORT_UTF && defined COMPILE_PCRE8) \
|| (defined PCRE_INCLUDED && (defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32))
/* These tables are also required by pcretest in 16- or 32-bit mode. */
const int PRIV(utf8_table1)[] =
{ 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff};
@ -92,19 +111,20 @@ const int PRIV(utf8_table3)[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
/* Table of the number of extra bytes, indexed by the first byte masked with
0x3f. The highest number for a valid UTF-8 first byte is in fact 0x3d. */
const pcre_uint8 PRIV(utf8_table4)[] = {
const uint8_t PRIV(utf8_table4)[] = {
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
#endif /* (SUPPORT_UTF && COMPILE_PCRE8) || (PCRE_INCLUDED && SUPPORT_PCRE[16|32])*/
#endif /* UTF-8 support needed */
#ifdef SUPPORT_UTF
#ifdef SUPPORT_UNICODE
/* Table to translate from particular type value to the general value. */
const pcre_uint32 PRIV(ucp_gentype)[] = {
const uint32_t PRIV(ucp_gentype)[] = {
ucp_C, ucp_C, ucp_C, ucp_C, ucp_C, /* Cc, Cf, Cn, Co, Cs */
ucp_L, ucp_L, ucp_L, ucp_L, ucp_L, /* Ll, Lu, Lm, Lo, Lt */
ucp_M, ucp_M, ucp_M, /* Mc, Me, Mn */
@ -117,18 +137,18 @@ const pcre_uint32 PRIV(ucp_gentype)[] = {
/* This table encodes the rules for finding the end of an extended grapheme
cluster. Every code point has a grapheme break property which is one of the
ucp_gbXX values defined in ucp.h. The 2-dimensional table is indexed by the
properties of two adjacent code points. The left property selects a word from
the table, and the right property selects a bit from that word like this:
ucp_gbXX values defined in pcre2_ucp.h. The 2-dimensional table is indexed by
the properties of two adjacent code points. The left property selects a word
from the table, and the right property selects a bit from that word like this:
ucp_gbtable[left-property] & (1 << right-property)
PRIV(ucp_gbtable)[left-property] & (1 << right-property)
The value is non-zero if a grapheme break is NOT permitted between the relevant
two code points. The breaking rules are as follows:
1. Break at the start and end of text (pretty obviously).
2. Do not break between a CR and LF; otherwise, break before and after
2. Do not break between a CR and LF; otherwise, break before and after
controls.
3. Do not break Hangul syllable sequences, the rules for which are:
@ -137,44 +157,62 @@ two code points. The breaking rules are as follows:
LV or V may be followed by V or T
LVT or T may be followed by T
4. Do not break before extending characters.
4. Do not break before extending characters or zero-width-joiner (ZWJ).
The next two rules are only for extended grapheme clusters (but that's what we
The following rules are only for extended grapheme clusters (but that's what we
are implementing).
5. Do not break before SpacingMarks.
6. Do not break after Prepend characters.
7. Otherwise, break everywhere.
7. Do not break within emoji modifier sequences (E_Base or E_Base_GAZ followed
by E_Modifier). Extend characters are allowed before the modifier; this
cannot be represented in this table, the code has to deal with it.
8. Do not break within emoji zwj sequences (ZWJ followed by Glue_After_Zwj or
E_Base_GAZ).
9. Do not break within emoji flag sequences. That is, do not break between
regional indicator (RI) symbols if there are an odd number of RI characters
before the break point. This table encodes "join RI characters"; the code
has to deal with checking for previous adjoining RIs.
10. Otherwise, break everywhere.
*/
const pcre_uint32 PRIV(ucp_gbtable[]) = {
#define ESZ (1<<ucp_gbExtend)|(1<<ucp_gbSpacingMark)|(1<<ucp_gbZWJ)
const uint32_t PRIV(ucp_gbtable)[] = {
(1<<ucp_gbLF), /* 0 CR */
0, /* 1 LF */
0, /* 2 Control */
(1<<ucp_gbExtend)|(1<<ucp_gbSpacingMark), /* 3 Extend */
(1<<ucp_gbExtend)|(1<<ucp_gbPrepend)| /* 4 Prepend */
(1<<ucp_gbSpacingMark)|(1<<ucp_gbL)|
(1<<ucp_gbV)|(1<<ucp_gbT)|(1<<ucp_gbLV)|
(1<<ucp_gbLVT)|(1<<ucp_gbOther),
(1<<ucp_gbExtend)|(1<<ucp_gbSpacingMark), /* 5 SpacingMark */
(1<<ucp_gbExtend)|(1<<ucp_gbSpacingMark)|(1<<ucp_gbL)| /* 6 L */
(1<<ucp_gbV)|(1<<ucp_gbLV)|(1<<ucp_gbLVT),
(1<<ucp_gbExtend)|(1<<ucp_gbSpacingMark)|(1<<ucp_gbV)| /* 7 V */
(1<<ucp_gbT),
(1<<ucp_gbExtend)|(1<<ucp_gbSpacingMark)|(1<<ucp_gbT), /* 8 T */
(1<<ucp_gbExtend)|(1<<ucp_gbSpacingMark)|(1<<ucp_gbV)| /* 9 LV */
(1<<ucp_gbT),
(1<<ucp_gbExtend)|(1<<ucp_gbSpacingMark)|(1<<ucp_gbT), /* 10 LVT */
ESZ, /* 3 Extend */
ESZ|(1<<ucp_gbPrepend)| /* 4 Prepend */
(1<<ucp_gbL)|(1<<ucp_gbV)|(1<<ucp_gbT)|
(1<<ucp_gbLV)|(1<<ucp_gbLVT)|(1<<ucp_gbOther)|
(1<<ucp_gbRegionalIndicator)|
(1<<ucp_gbE_Base)|(1<<ucp_gbE_Modifier)|
(1<<ucp_gbE_Base_GAZ)|
(1<<ucp_gbZWJ)|(1<<ucp_gbGlue_After_Zwj),
ESZ, /* 5 SpacingMark */
ESZ|(1<<ucp_gbL)|(1<<ucp_gbV)|(1<<ucp_gbLV)| /* 6 L */
(1<<ucp_gbLVT),
ESZ|(1<<ucp_gbV)|(1<<ucp_gbT), /* 7 V */
ESZ|(1<<ucp_gbT), /* 8 T */
ESZ|(1<<ucp_gbV)|(1<<ucp_gbT), /* 9 LV */
ESZ|(1<<ucp_gbT), /* 10 LVT */
(1<<ucp_gbRegionalIndicator), /* 11 RegionalIndicator */
(1<<ucp_gbExtend)|(1<<ucp_gbSpacingMark) /* 12 Other */
ESZ, /* 12 Other */
ESZ|(1<<ucp_gbE_Modifier), /* 13 E_Base */
ESZ, /* 14 E_Modifier */
ESZ|(1<<ucp_gbE_Modifier), /* 15 E_Base_GAZ */
ESZ|(1<<ucp_gbGlue_After_Zwj)|(1<<ucp_gbE_Base_GAZ), /* 16 ZWJ */
ESZ /* 12 Glue_After_Zwj */
};
#undef ESZ
#ifdef SUPPORT_JIT
/* This table reverses PRIV(ucp_gentype). We can save the cost
of a memory load. */
@ -190,7 +228,7 @@ const int PRIV(ucp_typerange)[] = {
};
#endif /* SUPPORT_JIT */
/* The pcre_utt[] table below translates Unicode property names into type and
/* The PRIV(utt)[] table below translates Unicode property names into type and
code values. It is searched by binary chop, so must be in collating sequence of
name. Originally, the table contained pointers to the name strings in the first
field of each entry. However, that leads to a large number of relocations when
@ -207,6 +245,9 @@ version. Like all other character and string literals that are compared against
the regular expression pattern, we must use STR_ macros instead of literal
strings to make sure that UTF-8 support works on EBCDIC platforms. */
#define STRING_Adlam0 STR_A STR_d STR_l STR_a STR_m "\0"
#define STRING_Ahom0 STR_A STR_h STR_o STR_m "\0"
#define STRING_Anatolian_Hieroglyphs0 STR_A STR_n STR_a STR_t STR_o STR_l STR_i STR_a STR_n STR_UNDERSCORE STR_H STR_i STR_e STR_r STR_o STR_g STR_l STR_y STR_p STR_h STR_s "\0"
#define STRING_Any0 STR_A STR_n STR_y "\0"
#define STRING_Arabic0 STR_A STR_r STR_a STR_b STR_i STR_c "\0"
#define STRING_Armenian0 STR_A STR_r STR_m STR_e STR_n STR_i STR_a STR_n "\0"
@ -216,6 +257,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
#define STRING_Bassa_Vah0 STR_B STR_a STR_s STR_s STR_a STR_UNDERSCORE STR_V STR_a STR_h "\0"
#define STRING_Batak0 STR_B STR_a STR_t STR_a STR_k "\0"
#define STRING_Bengali0 STR_B STR_e STR_n STR_g STR_a STR_l STR_i "\0"
#define STRING_Bhaiksuki0 STR_B STR_h STR_a STR_i STR_k STR_s STR_u STR_k STR_i "\0"
#define STRING_Bopomofo0 STR_B STR_o STR_p STR_o STR_m STR_o STR_f STR_o "\0"
#define STRING_Brahmi0 STR_B STR_r STR_a STR_h STR_m STR_i "\0"
#define STRING_Braille0 STR_B STR_r STR_a STR_i STR_l STR_l STR_e "\0"
@ -254,6 +296,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
#define STRING_Han0 STR_H STR_a STR_n "\0"
#define STRING_Hangul0 STR_H STR_a STR_n STR_g STR_u STR_l "\0"
#define STRING_Hanunoo0 STR_H STR_a STR_n STR_u STR_n STR_o STR_o "\0"
#define STRING_Hatran0 STR_H STR_a STR_t STR_r STR_a STR_n "\0"
#define STRING_Hebrew0 STR_H STR_e STR_b STR_r STR_e STR_w "\0"
#define STRING_Hiragana0 STR_H STR_i STR_r STR_a STR_g STR_a STR_n STR_a "\0"
#define STRING_Imperial_Aramaic0 STR_I STR_m STR_p STR_e STR_r STR_i STR_a STR_l STR_UNDERSCORE STR_A STR_r STR_a STR_m STR_a STR_i STR_c "\0"
@ -290,6 +333,8 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
#define STRING_Malayalam0 STR_M STR_a STR_l STR_a STR_y STR_a STR_l STR_a STR_m "\0"
#define STRING_Mandaic0 STR_M STR_a STR_n STR_d STR_a STR_i STR_c "\0"
#define STRING_Manichaean0 STR_M STR_a STR_n STR_i STR_c STR_h STR_a STR_e STR_a STR_n "\0"
#define STRING_Marchen0 STR_M STR_a STR_r STR_c STR_h STR_e STR_n "\0"
#define STRING_Masaram_Gondi0 STR_M STR_a STR_s STR_a STR_r STR_a STR_m STR_UNDERSCORE STR_G STR_o STR_n STR_d STR_i "\0"
#define STRING_Mc0 STR_M STR_c "\0"
#define STRING_Me0 STR_M STR_e "\0"
#define STRING_Meetei_Mayek0 STR_M STR_e STR_e STR_t STR_e STR_i STR_UNDERSCORE STR_M STR_a STR_y STR_e STR_k "\0"
@ -301,16 +346,20 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
#define STRING_Modi0 STR_M STR_o STR_d STR_i "\0"
#define STRING_Mongolian0 STR_M STR_o STR_n STR_g STR_o STR_l STR_i STR_a STR_n "\0"
#define STRING_Mro0 STR_M STR_r STR_o "\0"
#define STRING_Multani0 STR_M STR_u STR_l STR_t STR_a STR_n STR_i "\0"
#define STRING_Myanmar0 STR_M STR_y STR_a STR_n STR_m STR_a STR_r "\0"
#define STRING_N0 STR_N "\0"
#define STRING_Nabataean0 STR_N STR_a STR_b STR_a STR_t STR_a STR_e STR_a STR_n "\0"
#define STRING_Nd0 STR_N STR_d "\0"
#define STRING_New_Tai_Lue0 STR_N STR_e STR_w STR_UNDERSCORE STR_T STR_a STR_i STR_UNDERSCORE STR_L STR_u STR_e "\0"
#define STRING_Newa0 STR_N STR_e STR_w STR_a "\0"
#define STRING_Nko0 STR_N STR_k STR_o "\0"
#define STRING_Nl0 STR_N STR_l "\0"
#define STRING_No0 STR_N STR_o "\0"
#define STRING_Nushu0 STR_N STR_u STR_s STR_h STR_u "\0"
#define STRING_Ogham0 STR_O STR_g STR_h STR_a STR_m "\0"
#define STRING_Ol_Chiki0 STR_O STR_l STR_UNDERSCORE STR_C STR_h STR_i STR_k STR_i "\0"
#define STRING_Old_Hungarian0 STR_O STR_l STR_d STR_UNDERSCORE STR_H STR_u STR_n STR_g STR_a STR_r STR_i STR_a STR_n "\0"
#define STRING_Old_Italic0 STR_O STR_l STR_d STR_UNDERSCORE STR_I STR_t STR_a STR_l STR_i STR_c "\0"
#define STRING_Old_North_Arabian0 STR_O STR_l STR_d STR_UNDERSCORE STR_N STR_o STR_r STR_t STR_h STR_UNDERSCORE STR_A STR_r STR_a STR_b STR_i STR_a STR_n "\0"
#define STRING_Old_Permic0 STR_O STR_l STR_d STR_UNDERSCORE STR_P STR_e STR_r STR_m STR_i STR_c "\0"
@ -318,6 +367,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
#define STRING_Old_South_Arabian0 STR_O STR_l STR_d STR_UNDERSCORE STR_S STR_o STR_u STR_t STR_h STR_UNDERSCORE STR_A STR_r STR_a STR_b STR_i STR_a STR_n "\0"
#define STRING_Old_Turkic0 STR_O STR_l STR_d STR_UNDERSCORE STR_T STR_u STR_r STR_k STR_i STR_c "\0"
#define STRING_Oriya0 STR_O STR_r STR_i STR_y STR_a "\0"
#define STRING_Osage0 STR_O STR_s STR_a STR_g STR_e "\0"
#define STRING_Osmanya0 STR_O STR_s STR_m STR_a STR_n STR_y STR_a "\0"
#define STRING_P0 STR_P "\0"
#define STRING_Pahawh_Hmong0 STR_P STR_a STR_h STR_a STR_w STR_h STR_UNDERSCORE STR_H STR_m STR_o STR_n STR_g "\0"
@ -342,11 +392,13 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
#define STRING_Sharada0 STR_S STR_h STR_a STR_r STR_a STR_d STR_a "\0"
#define STRING_Shavian0 STR_S STR_h STR_a STR_v STR_i STR_a STR_n "\0"
#define STRING_Siddham0 STR_S STR_i STR_d STR_d STR_h STR_a STR_m "\0"
#define STRING_SignWriting0 STR_S STR_i STR_g STR_n STR_W STR_r STR_i STR_t STR_i STR_n STR_g "\0"
#define STRING_Sinhala0 STR_S STR_i STR_n STR_h STR_a STR_l STR_a "\0"
#define STRING_Sk0 STR_S STR_k "\0"
#define STRING_Sm0 STR_S STR_m "\0"
#define STRING_So0 STR_S STR_o "\0"
#define STRING_Sora_Sompeng0 STR_S STR_o STR_r STR_a STR_UNDERSCORE STR_S STR_o STR_m STR_p STR_e STR_n STR_g "\0"
#define STRING_Soyombo0 STR_S STR_o STR_y STR_o STR_m STR_b STR_o "\0"
#define STRING_Sundanese0 STR_S STR_u STR_n STR_d STR_a STR_n STR_e STR_s STR_e "\0"
#define STRING_Syloti_Nagri0 STR_S STR_y STR_l STR_o STR_t STR_i STR_UNDERSCORE STR_N STR_a STR_g STR_r STR_i "\0"
#define STRING_Syriac0 STR_S STR_y STR_r STR_i STR_a STR_c "\0"
@ -357,6 +409,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
#define STRING_Tai_Viet0 STR_T STR_a STR_i STR_UNDERSCORE STR_V STR_i STR_e STR_t "\0"
#define STRING_Takri0 STR_T STR_a STR_k STR_r STR_i "\0"
#define STRING_Tamil0 STR_T STR_a STR_m STR_i STR_l "\0"
#define STRING_Tangut0 STR_T STR_a STR_n STR_g STR_u STR_t "\0"
#define STRING_Telugu0 STR_T STR_e STR_l STR_u STR_g STR_u "\0"
#define STRING_Thaana0 STR_T STR_h STR_a STR_a STR_n STR_a "\0"
#define STRING_Thai0 STR_T STR_h STR_a STR_i "\0"
@ -373,11 +426,15 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
#define STRING_Xwd0 STR_X STR_w STR_d "\0"
#define STRING_Yi0 STR_Y STR_i "\0"
#define STRING_Z0 STR_Z "\0"
#define STRING_Zanabazar_Square0 STR_Z STR_a STR_n STR_a STR_b STR_a STR_z STR_a STR_r STR_UNDERSCORE STR_S STR_q STR_u STR_a STR_r STR_e "\0"
#define STRING_Zl0 STR_Z STR_l "\0"
#define STRING_Zp0 STR_Z STR_p "\0"
#define STRING_Zs0 STR_Z STR_s "\0"
const char PRIV(utt_names)[] =
STRING_Adlam0
STRING_Ahom0
STRING_Anatolian_Hieroglyphs0
STRING_Any0
STRING_Arabic0
STRING_Armenian0
@ -387,6 +444,7 @@ const char PRIV(utt_names)[] =
STRING_Bassa_Vah0
STRING_Batak0
STRING_Bengali0
STRING_Bhaiksuki0
STRING_Bopomofo0
STRING_Brahmi0
STRING_Braille0
@ -425,6 +483,7 @@ const char PRIV(utt_names)[] =
STRING_Han0
STRING_Hangul0
STRING_Hanunoo0
STRING_Hatran0
STRING_Hebrew0
STRING_Hiragana0
STRING_Imperial_Aramaic0
@ -461,6 +520,8 @@ const char PRIV(utt_names)[] =
STRING_Malayalam0
STRING_Mandaic0
STRING_Manichaean0
STRING_Marchen0
STRING_Masaram_Gondi0
STRING_Mc0
STRING_Me0
STRING_Meetei_Mayek0
@ -472,16 +533,20 @@ const char PRIV(utt_names)[] =
STRING_Modi0
STRING_Mongolian0
STRING_Mro0
STRING_Multani0
STRING_Myanmar0
STRING_N0
STRING_Nabataean0
STRING_Nd0
STRING_New_Tai_Lue0
STRING_Newa0
STRING_Nko0
STRING_Nl0
STRING_No0
STRING_Nushu0
STRING_Ogham0
STRING_Ol_Chiki0
STRING_Old_Hungarian0
STRING_Old_Italic0
STRING_Old_North_Arabian0
STRING_Old_Permic0
@ -489,6 +554,7 @@ const char PRIV(utt_names)[] =
STRING_Old_South_Arabian0
STRING_Old_Turkic0
STRING_Oriya0
STRING_Osage0
STRING_Osmanya0
STRING_P0
STRING_Pahawh_Hmong0
@ -513,11 +579,13 @@ const char PRIV(utt_names)[] =
STRING_Sharada0
STRING_Shavian0
STRING_Siddham0
STRING_SignWriting0
STRING_Sinhala0
STRING_Sk0
STRING_Sm0
STRING_So0
STRING_Sora_Sompeng0
STRING_Soyombo0
STRING_Sundanese0
STRING_Syloti_Nagri0
STRING_Syriac0
@ -528,6 +596,7 @@ const char PRIV(utt_names)[] =
STRING_Tai_Viet0
STRING_Takri0
STRING_Tamil0
STRING_Tangut0
STRING_Telugu0
STRING_Thaana0
STRING_Thai0
@ -544,184 +613,201 @@ const char PRIV(utt_names)[] =
STRING_Xwd0
STRING_Yi0
STRING_Z0
STRING_Zanabazar_Square0
STRING_Zl0
STRING_Zp0
STRING_Zs0;
const ucp_type_table PRIV(utt)[] = {
{ 0, PT_ANY, 0 },
{ 4, PT_SC, ucp_Arabic },
{ 11, PT_SC, ucp_Armenian },
{ 20, PT_SC, ucp_Avestan },
{ 28, PT_SC, ucp_Balinese },
{ 37, PT_SC, ucp_Bamum },
{ 43, PT_SC, ucp_Bassa_Vah },
{ 53, PT_SC, ucp_Batak },
{ 59, PT_SC, ucp_Bengali },
{ 67, PT_SC, ucp_Bopomofo },
{ 76, PT_SC, ucp_Brahmi },
{ 83, PT_SC, ucp_Braille },
{ 91, PT_SC, ucp_Buginese },
{ 100, PT_SC, ucp_Buhid },
{ 106, PT_GC, ucp_C },
{ 108, PT_SC, ucp_Canadian_Aboriginal },
{ 128, PT_SC, ucp_Carian },
{ 135, PT_SC, ucp_Caucasian_Albanian },
{ 154, PT_PC, ucp_Cc },
{ 157, PT_PC, ucp_Cf },
{ 160, PT_SC, ucp_Chakma },
{ 167, PT_SC, ucp_Cham },
{ 172, PT_SC, ucp_Cherokee },
{ 181, PT_PC, ucp_Cn },
{ 184, PT_PC, ucp_Co },
{ 187, PT_SC, ucp_Common },
{ 194, PT_SC, ucp_Coptic },
{ 201, PT_PC, ucp_Cs },
{ 204, PT_SC, ucp_Cuneiform },
{ 214, PT_SC, ucp_Cypriot },
{ 222, PT_SC, ucp_Cyrillic },
{ 231, PT_SC, ucp_Deseret },
{ 239, PT_SC, ucp_Devanagari },
{ 250, PT_SC, ucp_Duployan },
{ 259, PT_SC, ucp_Egyptian_Hieroglyphs },
{ 280, PT_SC, ucp_Elbasan },
{ 288, PT_SC, ucp_Ethiopic },
{ 297, PT_SC, ucp_Georgian },
{ 306, PT_SC, ucp_Glagolitic },
{ 317, PT_SC, ucp_Gothic },
{ 324, PT_SC, ucp_Grantha },
{ 332, PT_SC, ucp_Greek },
{ 338, PT_SC, ucp_Gujarati },
{ 347, PT_SC, ucp_Gurmukhi },
{ 356, PT_SC, ucp_Han },
{ 360, PT_SC, ucp_Hangul },
{ 367, PT_SC, ucp_Hanunoo },
{ 375, PT_SC, ucp_Hebrew },
{ 382, PT_SC, ucp_Hiragana },
{ 391, PT_SC, ucp_Imperial_Aramaic },
{ 408, PT_SC, ucp_Inherited },
{ 418, PT_SC, ucp_Inscriptional_Pahlavi },
{ 440, PT_SC, ucp_Inscriptional_Parthian },
{ 463, PT_SC, ucp_Javanese },
{ 472, PT_SC, ucp_Kaithi },
{ 479, PT_SC, ucp_Kannada },
{ 487, PT_SC, ucp_Katakana },
{ 496, PT_SC, ucp_Kayah_Li },
{ 505, PT_SC, ucp_Kharoshthi },
{ 516, PT_SC, ucp_Khmer },
{ 522, PT_SC, ucp_Khojki },
{ 529, PT_SC, ucp_Khudawadi },
{ 539, PT_GC, ucp_L },
{ 541, PT_LAMP, 0 },
{ 544, PT_SC, ucp_Lao },
{ 548, PT_SC, ucp_Latin },
{ 554, PT_SC, ucp_Lepcha },
{ 561, PT_SC, ucp_Limbu },
{ 567, PT_SC, ucp_Linear_A },
{ 576, PT_SC, ucp_Linear_B },
{ 585, PT_SC, ucp_Lisu },
{ 590, PT_PC, ucp_Ll },
{ 593, PT_PC, ucp_Lm },
{ 596, PT_PC, ucp_Lo },
{ 599, PT_PC, ucp_Lt },
{ 602, PT_PC, ucp_Lu },
{ 605, PT_SC, ucp_Lycian },
{ 612, PT_SC, ucp_Lydian },
{ 619, PT_GC, ucp_M },
{ 621, PT_SC, ucp_Mahajani },
{ 630, PT_SC, ucp_Malayalam },
{ 640, PT_SC, ucp_Mandaic },
{ 648, PT_SC, ucp_Manichaean },
{ 659, PT_PC, ucp_Mc },
{ 662, PT_PC, ucp_Me },
{ 665, PT_SC, ucp_Meetei_Mayek },
{ 678, PT_SC, ucp_Mende_Kikakui },
{ 692, PT_SC, ucp_Meroitic_Cursive },
{ 709, PT_SC, ucp_Meroitic_Hieroglyphs },
{ 730, PT_SC, ucp_Miao },
{ 735, PT_PC, ucp_Mn },
{ 738, PT_SC, ucp_Modi },
{ 743, PT_SC, ucp_Mongolian },
{ 753, PT_SC, ucp_Mro },
{ 757, PT_SC, ucp_Myanmar },
{ 765, PT_GC, ucp_N },
{ 767, PT_SC, ucp_Nabataean },
{ 777, PT_PC, ucp_Nd },
{ 780, PT_SC, ucp_New_Tai_Lue },
{ 792, PT_SC, ucp_Nko },
{ 796, PT_PC, ucp_Nl },
{ 799, PT_PC, ucp_No },
{ 802, PT_SC, ucp_Ogham },
{ 808, PT_SC, ucp_Ol_Chiki },
{ 817, PT_SC, ucp_Old_Italic },
{ 828, PT_SC, ucp_Old_North_Arabian },
{ 846, PT_SC, ucp_Old_Permic },
{ 857, PT_SC, ucp_Old_Persian },
{ 869, PT_SC, ucp_Old_South_Arabian },
{ 887, PT_SC, ucp_Old_Turkic },
{ 898, PT_SC, ucp_Oriya },
{ 904, PT_SC, ucp_Osmanya },
{ 912, PT_GC, ucp_P },
{ 914, PT_SC, ucp_Pahawh_Hmong },
{ 927, PT_SC, ucp_Palmyrene },
{ 937, PT_SC, ucp_Pau_Cin_Hau },
{ 949, PT_PC, ucp_Pc },
{ 952, PT_PC, ucp_Pd },
{ 955, PT_PC, ucp_Pe },
{ 958, PT_PC, ucp_Pf },
{ 961, PT_SC, ucp_Phags_Pa },
{ 970, PT_SC, ucp_Phoenician },
{ 981, PT_PC, ucp_Pi },
{ 984, PT_PC, ucp_Po },
{ 987, PT_PC, ucp_Ps },
{ 990, PT_SC, ucp_Psalter_Pahlavi },
{ 1006, PT_SC, ucp_Rejang },
{ 1013, PT_SC, ucp_Runic },
{ 1019, PT_GC, ucp_S },
{ 1021, PT_SC, ucp_Samaritan },
{ 1031, PT_SC, ucp_Saurashtra },
{ 1042, PT_PC, ucp_Sc },
{ 1045, PT_SC, ucp_Sharada },
{ 1053, PT_SC, ucp_Shavian },
{ 1061, PT_SC, ucp_Siddham },
{ 1069, PT_SC, ucp_Sinhala },
{ 1077, PT_PC, ucp_Sk },
{ 1080, PT_PC, ucp_Sm },
{ 1083, PT_PC, ucp_So },
{ 1086, PT_SC, ucp_Sora_Sompeng },
{ 1099, PT_SC, ucp_Sundanese },
{ 1109, PT_SC, ucp_Syloti_Nagri },
{ 1122, PT_SC, ucp_Syriac },
{ 1129, PT_SC, ucp_Tagalog },
{ 1137, PT_SC, ucp_Tagbanwa },
{ 1146, PT_SC, ucp_Tai_Le },
{ 1153, PT_SC, ucp_Tai_Tham },
{ 1162, PT_SC, ucp_Tai_Viet },
{ 1171, PT_SC, ucp_Takri },
{ 1177, PT_SC, ucp_Tamil },
{ 1183, PT_SC, ucp_Telugu },
{ 1190, PT_SC, ucp_Thaana },
{ 1197, PT_SC, ucp_Thai },
{ 1202, PT_SC, ucp_Tibetan },
{ 1210, PT_SC, ucp_Tifinagh },
{ 1219, PT_SC, ucp_Tirhuta },
{ 1227, PT_SC, ucp_Ugaritic },
{ 1236, PT_SC, ucp_Vai },
{ 1240, PT_SC, ucp_Warang_Citi },
{ 1252, PT_ALNUM, 0 },
{ 1256, PT_PXSPACE, 0 },
{ 1260, PT_SPACE, 0 },
{ 1264, PT_UCNC, 0 },
{ 1268, PT_WORD, 0 },
{ 1272, PT_SC, ucp_Yi },
{ 1275, PT_GC, ucp_Z },
{ 1277, PT_PC, ucp_Zl },
{ 1280, PT_PC, ucp_Zp },
{ 1283, PT_PC, ucp_Zs }
{ 0, PT_SC, ucp_Adlam },
{ 6, PT_SC, ucp_Ahom },
{ 11, PT_SC, ucp_Anatolian_Hieroglyphs },
{ 33, PT_ANY, 0 },
{ 37, PT_SC, ucp_Arabic },
{ 44, PT_SC, ucp_Armenian },
{ 53, PT_SC, ucp_Avestan },
{ 61, PT_SC, ucp_Balinese },
{ 70, PT_SC, ucp_Bamum },
{ 76, PT_SC, ucp_Bassa_Vah },
{ 86, PT_SC, ucp_Batak },
{ 92, PT_SC, ucp_Bengali },
{ 100, PT_SC, ucp_Bhaiksuki },
{ 110, PT_SC, ucp_Bopomofo },
{ 119, PT_SC, ucp_Brahmi },
{ 126, PT_SC, ucp_Braille },
{ 134, PT_SC, ucp_Buginese },
{ 143, PT_SC, ucp_Buhid },
{ 149, PT_GC, ucp_C },
{ 151, PT_SC, ucp_Canadian_Aboriginal },
{ 171, PT_SC, ucp_Carian },
{ 178, PT_SC, ucp_Caucasian_Albanian },
{ 197, PT_PC, ucp_Cc },
{ 200, PT_PC, ucp_Cf },
{ 203, PT_SC, ucp_Chakma },
{ 210, PT_SC, ucp_Cham },
{ 215, PT_SC, ucp_Cherokee },
{ 224, PT_PC, ucp_Cn },
{ 227, PT_PC, ucp_Co },
{ 230, PT_SC, ucp_Common },
{ 237, PT_SC, ucp_Coptic },
{ 244, PT_PC, ucp_Cs },
{ 247, PT_SC, ucp_Cuneiform },
{ 257, PT_SC, ucp_Cypriot },
{ 265, PT_SC, ucp_Cyrillic },
{ 274, PT_SC, ucp_Deseret },
{ 282, PT_SC, ucp_Devanagari },
{ 293, PT_SC, ucp_Duployan },
{ 302, PT_SC, ucp_Egyptian_Hieroglyphs },
{ 323, PT_SC, ucp_Elbasan },
{ 331, PT_SC, ucp_Ethiopic },
{ 340, PT_SC, ucp_Georgian },
{ 349, PT_SC, ucp_Glagolitic },
{ 360, PT_SC, ucp_Gothic },
{ 367, PT_SC, ucp_Grantha },
{ 375, PT_SC, ucp_Greek },
{ 381, PT_SC, ucp_Gujarati },
{ 390, PT_SC, ucp_Gurmukhi },
{ 399, PT_SC, ucp_Han },
{ 403, PT_SC, ucp_Hangul },
{ 410, PT_SC, ucp_Hanunoo },
{ 418, PT_SC, ucp_Hatran },
{ 425, PT_SC, ucp_Hebrew },
{ 432, PT_SC, ucp_Hiragana },
{ 441, PT_SC, ucp_Imperial_Aramaic },
{ 458, PT_SC, ucp_Inherited },
{ 468, PT_SC, ucp_Inscriptional_Pahlavi },
{ 490, PT_SC, ucp_Inscriptional_Parthian },
{ 513, PT_SC, ucp_Javanese },
{ 522, PT_SC, ucp_Kaithi },
{ 529, PT_SC, ucp_Kannada },
{ 537, PT_SC, ucp_Katakana },
{ 546, PT_SC, ucp_Kayah_Li },
{ 555, PT_SC, ucp_Kharoshthi },
{ 566, PT_SC, ucp_Khmer },
{ 572, PT_SC, ucp_Khojki },
{ 579, PT_SC, ucp_Khudawadi },
{ 589, PT_GC, ucp_L },
{ 591, PT_LAMP, 0 },
{ 594, PT_SC, ucp_Lao },
{ 598, PT_SC, ucp_Latin },
{ 604, PT_SC, ucp_Lepcha },
{ 611, PT_SC, ucp_Limbu },
{ 617, PT_SC, ucp_Linear_A },
{ 626, PT_SC, ucp_Linear_B },
{ 635, PT_SC, ucp_Lisu },
{ 640, PT_PC, ucp_Ll },
{ 643, PT_PC, ucp_Lm },
{ 646, PT_PC, ucp_Lo },
{ 649, PT_PC, ucp_Lt },
{ 652, PT_PC, ucp_Lu },
{ 655, PT_SC, ucp_Lycian },
{ 662, PT_SC, ucp_Lydian },
{ 669, PT_GC, ucp_M },
{ 671, PT_SC, ucp_Mahajani },
{ 680, PT_SC, ucp_Malayalam },
{ 690, PT_SC, ucp_Mandaic },
{ 698, PT_SC, ucp_Manichaean },
{ 709, PT_SC, ucp_Marchen },
{ 717, PT_SC, ucp_Masaram_Gondi },
{ 731, PT_PC, ucp_Mc },
{ 734, PT_PC, ucp_Me },
{ 737, PT_SC, ucp_Meetei_Mayek },
{ 750, PT_SC, ucp_Mende_Kikakui },
{ 764, PT_SC, ucp_Meroitic_Cursive },
{ 781, PT_SC, ucp_Meroitic_Hieroglyphs },
{ 802, PT_SC, ucp_Miao },
{ 807, PT_PC, ucp_Mn },
{ 810, PT_SC, ucp_Modi },
{ 815, PT_SC, ucp_Mongolian },
{ 825, PT_SC, ucp_Mro },
{ 829, PT_SC, ucp_Multani },
{ 837, PT_SC, ucp_Myanmar },
{ 845, PT_GC, ucp_N },
{ 847, PT_SC, ucp_Nabataean },
{ 857, PT_PC, ucp_Nd },
{ 860, PT_SC, ucp_New_Tai_Lue },
{ 872, PT_SC, ucp_Newa },
{ 877, PT_SC, ucp_Nko },
{ 881, PT_PC, ucp_Nl },
{ 884, PT_PC, ucp_No },
{ 887, PT_SC, ucp_Nushu },
{ 893, PT_SC, ucp_Ogham },
{ 899, PT_SC, ucp_Ol_Chiki },
{ 908, PT_SC, ucp_Old_Hungarian },
{ 922, PT_SC, ucp_Old_Italic },
{ 933, PT_SC, ucp_Old_North_Arabian },
{ 951, PT_SC, ucp_Old_Permic },
{ 962, PT_SC, ucp_Old_Persian },
{ 974, PT_SC, ucp_Old_South_Arabian },
{ 992, PT_SC, ucp_Old_Turkic },
{ 1003, PT_SC, ucp_Oriya },
{ 1009, PT_SC, ucp_Osage },
{ 1015, PT_SC, ucp_Osmanya },
{ 1023, PT_GC, ucp_P },
{ 1025, PT_SC, ucp_Pahawh_Hmong },
{ 1038, PT_SC, ucp_Palmyrene },
{ 1048, PT_SC, ucp_Pau_Cin_Hau },
{ 1060, PT_PC, ucp_Pc },
{ 1063, PT_PC, ucp_Pd },
{ 1066, PT_PC, ucp_Pe },
{ 1069, PT_PC, ucp_Pf },
{ 1072, PT_SC, ucp_Phags_Pa },
{ 1081, PT_SC, ucp_Phoenician },
{ 1092, PT_PC, ucp_Pi },
{ 1095, PT_PC, ucp_Po },
{ 1098, PT_PC, ucp_Ps },
{ 1101, PT_SC, ucp_Psalter_Pahlavi },
{ 1117, PT_SC, ucp_Rejang },
{ 1124, PT_SC, ucp_Runic },
{ 1130, PT_GC, ucp_S },
{ 1132, PT_SC, ucp_Samaritan },
{ 1142, PT_SC, ucp_Saurashtra },
{ 1153, PT_PC, ucp_Sc },
{ 1156, PT_SC, ucp_Sharada },
{ 1164, PT_SC, ucp_Shavian },
{ 1172, PT_SC, ucp_Siddham },
{ 1180, PT_SC, ucp_SignWriting },
{ 1192, PT_SC, ucp_Sinhala },
{ 1200, PT_PC, ucp_Sk },
{ 1203, PT_PC, ucp_Sm },
{ 1206, PT_PC, ucp_So },
{ 1209, PT_SC, ucp_Sora_Sompeng },
{ 1222, PT_SC, ucp_Soyombo },
{ 1230, PT_SC, ucp_Sundanese },
{ 1240, PT_SC, ucp_Syloti_Nagri },
{ 1253, PT_SC, ucp_Syriac },
{ 1260, PT_SC, ucp_Tagalog },
{ 1268, PT_SC, ucp_Tagbanwa },
{ 1277, PT_SC, ucp_Tai_Le },
{ 1284, PT_SC, ucp_Tai_Tham },
{ 1293, PT_SC, ucp_Tai_Viet },
{ 1302, PT_SC, ucp_Takri },
{ 1308, PT_SC, ucp_Tamil },
{ 1314, PT_SC, ucp_Tangut },
{ 1321, PT_SC, ucp_Telugu },
{ 1328, PT_SC, ucp_Thaana },
{ 1335, PT_SC, ucp_Thai },
{ 1340, PT_SC, ucp_Tibetan },
{ 1348, PT_SC, ucp_Tifinagh },
{ 1357, PT_SC, ucp_Tirhuta },
{ 1365, PT_SC, ucp_Ugaritic },
{ 1374, PT_SC, ucp_Vai },
{ 1378, PT_SC, ucp_Warang_Citi },
{ 1390, PT_ALNUM, 0 },
{ 1394, PT_PXSPACE, 0 },
{ 1398, PT_SPACE, 0 },
{ 1402, PT_UCNC, 0 },
{ 1406, PT_WORD, 0 },
{ 1410, PT_SC, ucp_Yi },
{ 1413, PT_GC, ucp_Z },
{ 1415, PT_SC, ucp_Zanabazar_Square },
{ 1432, PT_PC, ucp_Zl },
{ 1435, PT_PC, ucp_Zp },
{ 1438, PT_PC, ucp_Zs }
};
const int PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table);
const size_t PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table);
#endif /* SUPPORT_UTF */
#endif /* SUPPORT_UNICODE */
/* End of pcre_tables.c */
/* End of pcre2_tables.c */

File diff suppressed because it is too large Load diff

View file

@ -1,9 +1,46 @@
/*************************************************
* Unicode Property Table handler *
* Perl-Compatible Regular Expressions *
*************************************************/
#ifndef _UCP_H
#define _UCP_H
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
#ifndef PCRE2_UCP_H_IDEMPOTENT_GUARD
#define PCRE2_UCP_H_IDEMPOTENT_GUARD
/* This file contains definitions of the property values that are returned by
the UCD access macros. New values that are added for new releases of Unicode
@ -14,7 +51,7 @@ the same as the values that are generated by the maint/MultiStage2.py script,
where the equivalent property descriptive names are listed in vectors.
ALSO: The specific values of the first two enums are assumed for the table
called catposstab in pcre_compile.c. */
called catposstab in pcre2_compile.c. */
/* These are the general character categories. */
@ -63,9 +100,7 @@ enum {
ucp_Zs /* Space separator */
};
/* These are grapheme break properties. Note that the code for processing them
assumes that the values are less than 16. If more values are added that take
the number to 16 or more, the code will have to be rewritten. */
/* These are grapheme break properties. */
enum {
ucp_gbCR, /* 0 */
@ -80,7 +115,12 @@ enum {
ucp_gbLV, /* 9 Hangul syllable type LV */
ucp_gbLVT, /* 10 Hangul syllable type LVT */
ucp_gbRegionalIndicator, /* 11 */
ucp_gbOther /* 12 */
ucp_gbOther, /* 12 */
ucp_gbE_Base, /* 13 */
ucp_gbE_Modifier, /* 14 */
ucp_gbE_Base_GAZ, /* 15 */
ucp_gbZWJ, /* 16 */
ucp_gbGlue_After_Zwj /* 17 */
};
/* These are the script identifications. */
@ -147,13 +187,13 @@ enum {
ucp_Tifinagh,
ucp_Ugaritic,
ucp_Yi,
/* New for Unicode 5.0: */
/* New for Unicode 5.0 */
ucp_Balinese,
ucp_Cuneiform,
ucp_Nko,
ucp_Phags_Pa,
ucp_Phoenician,
/* New for Unicode 5.1: */
/* New for Unicode 5.1 */
ucp_Carian,
ucp_Cham,
ucp_Kayah_Li,
@ -165,7 +205,7 @@ enum {
ucp_Saurashtra,
ucp_Sundanese,
ucp_Vai,
/* New for Unicode 5.2: */
/* New for Unicode 5.2 */
ucp_Avestan,
ucp_Bamum,
ucp_Egyptian_Hieroglyphs,
@ -181,11 +221,11 @@ enum {
ucp_Samaritan,
ucp_Tai_Tham,
ucp_Tai_Viet,
/* New for Unicode 6.0.0: */
/* New for Unicode 6.0.0 */
ucp_Batak,
ucp_Brahmi,
ucp_Mandaic,
/* New for Unicode 6.1.0: */
/* New for Unicode 6.1.0 */
ucp_Chakma,
ucp_Meroitic_Cursive,
ucp_Meroitic_Hieroglyphs,
@ -193,7 +233,7 @@ enum {
ucp_Sharada,
ucp_Sora_Sompeng,
ucp_Takri,
/* New for Unicode 7.0.0: */
/* New for Unicode 7.0.0 */
ucp_Bassa_Vah,
ucp_Caucasian_Albanian,
ucp_Duployan,
@ -216,9 +256,27 @@ enum {
ucp_Pau_Cin_Hau,
ucp_Siddham,
ucp_Tirhuta,
ucp_Warang_Citi
ucp_Warang_Citi,
/* New for Unicode 8.0.0 */
ucp_Ahom,
ucp_Anatolian_Hieroglyphs,
ucp_Hatran,
ucp_Multani,
ucp_Old_Hungarian,
ucp_SignWriting,
/* New for Unicode 10.0.0 (no update since 8.0.0) */
ucp_Adlam,
ucp_Bhaiksuki,
ucp_Marchen,
ucp_Newa,
ucp_Osage,
ucp_Tangut,
ucp_Masaram_Gondi,
ucp_Nushu,
ucp_Soyombo,
ucp_Zanabazar_Square
};
#endif
#endif /* PCRE2_UCP_H_IDEMPOTENT_GUARD */
/* End of ucp.h */
/* End of pcre2_ucp.h */

View file

@ -6,7 +6,8 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2013 University of Cambridge
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016-2017 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@ -38,107 +39,131 @@ POSSIBILITY OF SUCH DAMAGE.
*/
/* This module contains an internal function for validating UTF-8 character
strings. */
/* This module contains an internal function for validating UTF character
strings. This file is also #included by the pcre2test program, which uses
macros to change names from _pcre2_xxx to xxxx, thereby avoiding name clashes
with the library. In this case, PCRE2_PCRE2TEST is defined. */
#ifndef PCRE2_PCRE2TEST /* We're compiling the library */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "pcre2_internal.h"
#endif /* PCRE2_PCRE2TEST */
#ifndef SUPPORT_UNICODE
/*************************************************
* Dummy function when Unicode is not supported *
*************************************************/
/* This function should never be called when Unicode is not supported. */
int
PRIV(valid_utf)(PCRE2_SPTR string, PCRE2_SIZE length, PCRE2_SIZE *erroroffset)
{
(void)string;
(void)length;
(void)erroroffset;
return 0;
}
#else /* UTF is supported */
#include "pcre_internal.h"
/*************************************************
* Validate a UTF-8 string *
* Validate a UTF string *
*************************************************/
/* This function is called (optionally) at the start of compile or match, to
check that a supposed UTF-8 string is actually valid. The early check means
check that a supposed UTF string is actually valid. The early check means
that subsequent code can assume it is dealing with a valid string. The check
can be turned off for maximum performance, but the consequences of supplying an
invalid string are then undefined.
Originally, this function checked according to RFC 2279, allowing for values in
the range 0 to 0x7fffffff, up to 6 bytes long, but ensuring that they were in
the canonical format. Once somebody had pointed out RFC 3629 to me (it
obsoletes 2279), additional restrictions were applied. The values are now
limited to be between 0 and 0x0010ffff, no more than 4 bytes long, and the
subrange 0xd000 to 0xdfff is excluded. However, the format of 5-byte and 6-byte
characters is still checked.
From release 8.13 more information about the details of the error are passed
back in the returned value:
PCRE_UTF8_ERR0 No error
PCRE_UTF8_ERR1 Missing 1 byte at the end of the string
PCRE_UTF8_ERR2 Missing 2 bytes at the end of the string
PCRE_UTF8_ERR3 Missing 3 bytes at the end of the string
PCRE_UTF8_ERR4 Missing 4 bytes at the end of the string
PCRE_UTF8_ERR5 Missing 5 bytes at the end of the string
PCRE_UTF8_ERR6 2nd-byte's two top bits are not 0x80
PCRE_UTF8_ERR7 3rd-byte's two top bits are not 0x80
PCRE_UTF8_ERR8 4th-byte's two top bits are not 0x80
PCRE_UTF8_ERR9 5th-byte's two top bits are not 0x80
PCRE_UTF8_ERR10 6th-byte's two top bits are not 0x80
PCRE_UTF8_ERR11 5-byte character is not permitted by RFC 3629
PCRE_UTF8_ERR12 6-byte character is not permitted by RFC 3629
PCRE_UTF8_ERR13 4-byte character with value > 0x10ffff is not permitted
PCRE_UTF8_ERR14 3-byte character with value 0xd000-0xdfff is not permitted
PCRE_UTF8_ERR15 Overlong 2-byte sequence
PCRE_UTF8_ERR16 Overlong 3-byte sequence
PCRE_UTF8_ERR17 Overlong 4-byte sequence
PCRE_UTF8_ERR18 Overlong 5-byte sequence (won't ever occur)
PCRE_UTF8_ERR19 Overlong 6-byte sequence (won't ever occur)
PCRE_UTF8_ERR20 Isolated 0x80 byte (not within UTF-8 character)
PCRE_UTF8_ERR21 Byte with the illegal value 0xfe or 0xff
PCRE_UTF8_ERR22 Unused (was non-character)
Arguments:
string points to the string
length length of string, or -1 if the string is zero-terminated
length length of string
errp pointer to an error position offset variable
Returns: = 0 if the string is a valid UTF-8 string
> 0 otherwise, setting the offset of the bad character
Returns: == 0 if the string is a valid UTF string
!= 0 otherwise, setting the offset of the bad character
*/
int
PRIV(valid_utf)(PCRE_PUCHAR string, int length, int *erroroffset)
PRIV(valid_utf)(PCRE2_SPTR string, PCRE2_SIZE length, PCRE2_SIZE *erroroffset)
{
#ifdef SUPPORT_UTF
register PCRE_PUCHAR p;
PCRE2_SPTR p;
uint32_t c;
if (length < 0)
{
for (p = string; *p != 0; p++);
length = (int)(p - string);
}
/* ----------------- Check a UTF-8 string ----------------- */
for (p = string; length-- > 0; p++)
#if PCRE2_CODE_UNIT_WIDTH == 8
/* Originally, this function checked according to RFC 2279, allowing for values
in the range 0 to 0x7fffffff, up to 6 bytes long, but ensuring that they were
in the canonical format. Once somebody had pointed out RFC 3629 to me (it
obsoletes 2279), additional restrictions were applied. The values are now
limited to be between 0 and 0x0010ffff, no more than 4 bytes long, and the
subrange 0xd000 to 0xdfff is excluded. However, the format of 5-byte and 6-byte
characters is still checked. Error returns are as follows:
PCRE2_ERROR_UTF8_ERR1 Missing 1 byte at the end of the string
PCRE2_ERROR_UTF8_ERR2 Missing 2 bytes at the end of the string
PCRE2_ERROR_UTF8_ERR3 Missing 3 bytes at the end of the string
PCRE2_ERROR_UTF8_ERR4 Missing 4 bytes at the end of the string
PCRE2_ERROR_UTF8_ERR5 Missing 5 bytes at the end of the string
PCRE2_ERROR_UTF8_ERR6 2nd-byte's two top bits are not 0x80
PCRE2_ERROR_UTF8_ERR7 3rd-byte's two top bits are not 0x80
PCRE2_ERROR_UTF8_ERR8 4th-byte's two top bits are not 0x80
PCRE2_ERROR_UTF8_ERR9 5th-byte's two top bits are not 0x80
PCRE2_ERROR_UTF8_ERR10 6th-byte's two top bits are not 0x80
PCRE2_ERROR_UTF8_ERR11 5-byte character is not permitted by RFC 3629
PCRE2_ERROR_UTF8_ERR12 6-byte character is not permitted by RFC 3629
PCRE2_ERROR_UTF8_ERR13 4-byte character with value > 0x10ffff is not permitted
PCRE2_ERROR_UTF8_ERR14 3-byte character with value 0xd800-0xdfff is not permitted
PCRE2_ERROR_UTF8_ERR15 Overlong 2-byte sequence
PCRE2_ERROR_UTF8_ERR16 Overlong 3-byte sequence
PCRE2_ERROR_UTF8_ERR17 Overlong 4-byte sequence
PCRE2_ERROR_UTF8_ERR18 Overlong 5-byte sequence (won't ever occur)
PCRE2_ERROR_UTF8_ERR19 Overlong 6-byte sequence (won't ever occur)
PCRE2_ERROR_UTF8_ERR20 Isolated 0x80 byte (not within UTF-8 character)
PCRE2_ERROR_UTF8_ERR21 Byte with the illegal value 0xfe or 0xff
*/
for (p = string; length > 0; p++)
{
register pcre_uchar ab, c, d;
uint32_t ab, d;
c = *p;
length--;
if (c < 128) continue; /* ASCII character */
if (c < 0xc0) /* Isolated 10xx xxxx byte */
{
*erroroffset = (int)(p - string);
return PCRE_UTF8_ERR20;
*erroroffset = (PCRE2_SIZE)(p - string);
return PCRE2_ERROR_UTF8_ERR20;
}
if (c >= 0xfe) /* Invalid 0xfe or 0xff bytes */
{
*erroroffset = (int)(p - string);
return PCRE_UTF8_ERR21;
*erroroffset = (PCRE2_SIZE)(p - string);
return PCRE2_ERROR_UTF8_ERR21;
}
ab = PRIV(utf8_table4)[c & 0x3f]; /* Number of additional bytes */
if (length < ab)
ab = PRIV(utf8_table4)[c & 0x3f]; /* Number of additional bytes (1-5) */
if (length < ab) /* Missing bytes */
{
*erroroffset = (int)(p - string); /* Missing bytes */
return ab - length; /* Codes ERR1 to ERR5 */
*erroroffset = (PCRE2_SIZE)(p - string);
switch(ab - length)
{
case 1: return PCRE2_ERROR_UTF8_ERR1;
case 2: return PCRE2_ERROR_UTF8_ERR2;
case 3: return PCRE2_ERROR_UTF8_ERR3;
case 4: return PCRE2_ERROR_UTF8_ERR4;
case 5: return PCRE2_ERROR_UTF8_ERR5;
}
}
length -= ab; /* Length remaining */
@ -147,7 +172,7 @@ for (p = string; length-- > 0; p++)
if (((d = *(++p)) & 0xc0) != 0x80)
{
*erroroffset = (int)(p - string) - 1;
return PCRE_UTF8_ERR6;
return PCRE2_ERROR_UTF8_ERR6;
}
/* For each length, check that the remaining bytes start with the 0x80 bit
@ -162,7 +187,7 @@ for (p = string; length-- > 0; p++)
case 1: if ((c & 0x3e) == 0)
{
*erroroffset = (int)(p - string) - 1;
return PCRE_UTF8_ERR15;
return PCRE2_ERROR_UTF8_ERR15;
}
break;
@ -174,17 +199,17 @@ for (p = string; length-- > 0; p++)
if ((*(++p) & 0xc0) != 0x80) /* Third byte */
{
*erroroffset = (int)(p - string) - 2;
return PCRE_UTF8_ERR7;
return PCRE2_ERROR_UTF8_ERR7;
}
if (c == 0xe0 && (d & 0x20) == 0)
{
*erroroffset = (int)(p - string) - 2;
return PCRE_UTF8_ERR16;
return PCRE2_ERROR_UTF8_ERR16;
}
if (c == 0xed && d >= 0xa0)
{
*erroroffset = (int)(p - string) - 2;
return PCRE_UTF8_ERR14;
return PCRE2_ERROR_UTF8_ERR14;
}
break;
@ -196,22 +221,22 @@ for (p = string; length-- > 0; p++)
if ((*(++p) & 0xc0) != 0x80) /* Third byte */
{
*erroroffset = (int)(p - string) - 2;
return PCRE_UTF8_ERR7;
return PCRE2_ERROR_UTF8_ERR7;
}
if ((*(++p) & 0xc0) != 0x80) /* Fourth byte */
{
*erroroffset = (int)(p - string) - 3;
return PCRE_UTF8_ERR8;
return PCRE2_ERROR_UTF8_ERR8;
}
if (c == 0xf0 && (d & 0x30) == 0)
{
*erroroffset = (int)(p - string) - 3;
return PCRE_UTF8_ERR17;
return PCRE2_ERROR_UTF8_ERR17;
}
if (c > 0xf4 || (c == 0xf4 && d > 0x8f))
{
*erroroffset = (int)(p - string) - 3;
return PCRE_UTF8_ERR13;
return PCRE2_ERROR_UTF8_ERR13;
}
break;
@ -227,22 +252,22 @@ for (p = string; length-- > 0; p++)
if ((*(++p) & 0xc0) != 0x80) /* Third byte */
{
*erroroffset = (int)(p - string) - 2;
return PCRE_UTF8_ERR7;
return PCRE2_ERROR_UTF8_ERR7;
}
if ((*(++p) & 0xc0) != 0x80) /* Fourth byte */
{
*erroroffset = (int)(p - string) - 3;
return PCRE_UTF8_ERR8;
return PCRE2_ERROR_UTF8_ERR8;
}
if ((*(++p) & 0xc0) != 0x80) /* Fifth byte */
{
*erroroffset = (int)(p - string) - 4;
return PCRE_UTF8_ERR9;
return PCRE2_ERROR_UTF8_ERR9;
}
if (c == 0xf8 && (d & 0x38) == 0)
{
*erroroffset = (int)(p - string) - 4;
return PCRE_UTF8_ERR18;
return PCRE2_ERROR_UTF8_ERR18;
}
break;
@ -253,27 +278,27 @@ for (p = string; length-- > 0; p++)
if ((*(++p) & 0xc0) != 0x80) /* Third byte */
{
*erroroffset = (int)(p - string) - 2;
return PCRE_UTF8_ERR7;
return PCRE2_ERROR_UTF8_ERR7;
}
if ((*(++p) & 0xc0) != 0x80) /* Fourth byte */
{
*erroroffset = (int)(p - string) - 3;
return PCRE_UTF8_ERR8;
return PCRE2_ERROR_UTF8_ERR8;
}
if ((*(++p) & 0xc0) != 0x80) /* Fifth byte */
{
*erroroffset = (int)(p - string) - 4;
return PCRE_UTF8_ERR9;
return PCRE2_ERROR_UTF8_ERR9;
}
if ((*(++p) & 0xc0) != 0x80) /* Sixth byte */
{
*erroroffset = (int)(p - string) - 5;
return PCRE_UTF8_ERR10;
return PCRE2_ERROR_UTF8_ERR10;
}
if (c == 0xfc && (d & 0x3c) == 0)
{
*erroroffset = (int)(p - string) - 5;
return PCRE_UTF8_ERR19;
return PCRE2_ERROR_UTF8_ERR19;
}
break;
}
@ -285,17 +310,89 @@ for (p = string; length-- > 0; p++)
if (ab > 3)
{
*erroroffset = (int)(p - string) - ab;
return (ab == 4)? PCRE_UTF8_ERR11 : PCRE_UTF8_ERR12;
return (ab == 4)? PCRE2_ERROR_UTF8_ERR11 : PCRE2_ERROR_UTF8_ERR12;
}
}
return 0;
#else /* Not SUPPORT_UTF */
(void)(string); /* Keep picky compilers happy */
(void)(length);
(void)(erroroffset);
#endif
return PCRE_UTF8_ERR0; /* This indicates success */
/* ----------------- Check a UTF-16 string ----------------- */
#elif PCRE2_CODE_UNIT_WIDTH == 16
/* There's not so much work, nor so many errors, for UTF-16.
PCRE2_ERROR_UTF16_ERR1 Missing low surrogate at the end of the string
PCRE2_ERROR_UTF16_ERR2 Invalid low surrogate
PCRE2_ERROR_UTF16_ERR3 Isolated low surrogate
*/
for (p = string; length > 0; p++)
{
c = *p;
length--;
if ((c & 0xf800) != 0xd800)
{
/* Normal UTF-16 code point. Neither high nor low surrogate. */
}
else if ((c & 0x0400) == 0)
{
/* High surrogate. Must be a followed by a low surrogate. */
if (length == 0)
{
*erroroffset = p - string;
return PCRE2_ERROR_UTF16_ERR1;
}
p++;
length--;
if ((*p & 0xfc00) != 0xdc00)
{
*erroroffset = p - string;
return PCRE2_ERROR_UTF16_ERR2;
}
}
else
{
/* Isolated low surrogate. Always an error. */
*erroroffset = p - string;
return PCRE2_ERROR_UTF16_ERR3;
}
}
return 0;
/* ----------------- Check a UTF-32 string ----------------- */
#else
/* There is very little to do for a UTF-32 string.
PCRE2_ERROR_UTF32_ERR1 Surrogate character
PCRE2_ERROR_UTF32_ERR2 Character > 0x10ffff
*/
for (p = string; length > 0; length--, p++)
{
c = *p;
if ((c & 0xfffff800u) != 0xd800u)
{
/* Normal UTF-32 code point. Neither high nor low surrogate. */
if (c > 0x10ffffu)
{
*erroroffset = p - string;
return PCRE2_ERROR_UTF32_ERR2;
}
}
else
{
/* A surrogate */
*erroroffset = p - string;
return PCRE2_ERROR_UTF32_ERR1;
}
}
return 0;
#endif /* CODE_UNIT_WIDTH */
}
#endif /* SUPPORT_UNICODE */
/* End of pcre_valid_utf8.c */
/* End of pcre2_valid_utf.c */

View file

@ -6,7 +6,8 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2013 University of Cambridge
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@ -37,46 +38,46 @@ POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
/* This module contains an internal function that is used to match an extended
class. It is used by both pcre_exec() and pcre_def_exec(). */
class. It is used by pcre2_auto_possessify() and by both pcre2_match() and
pcre2_def_match(). */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "pcre_internal.h"
#include "pcre2_internal.h"
/*************************************************
* Match character against an XCLASS *
*************************************************/
/* This function is called to match a character against an extended class that
might contain values > 255 and/or Unicode properties.
might contain codepoints above 255 and/or Unicode properties.
Arguments:
c the character
data points to the flag byte of the XCLASS data
data points to the flag code unit of the XCLASS data
utf TRUE if in UTF mode
Returns: TRUE if character matches, else FALSE
*/
BOOL
PRIV(xclass)(pcre_uint32 c, const pcre_uchar *data, BOOL utf)
PRIV(xclass)(uint32_t c, PCRE2_SPTR data, BOOL utf)
{
pcre_uchar t;
PCRE2_UCHAR t;
BOOL negated = (*data & XCL_NOT) != 0;
(void)utf;
#ifdef COMPILE_PCRE8
#if PCRE2_CODE_UNIT_WIDTH == 8
/* In 8 bit mode, this must always be TRUE. Help the compiler to know that. */
utf = TRUE;
#endif
/* Character values < 256 are matched against a bitmap, if one is present. If
not, we still carry on, because there may be ranges that start below 256 in the
/* Code points < 256 are matched against a bitmap, if one is present. If not,
we still carry on, because there may be ranges that start below 256 in the
additional data. */
if (c < 256)
@ -84,37 +85,37 @@ if (c < 256)
if ((*data & XCL_HASPROP) == 0)
{
if ((*data & XCL_MAP) == 0) return negated;
return (((pcre_uint8 *)(data + 1))[c/8] & (1 << (c&7))) != 0;
return (((uint8_t *)(data + 1))[c/8] & (1 << (c&7))) != 0;
}
if ((*data & XCL_MAP) != 0 &&
(((pcre_uint8 *)(data + 1))[c/8] & (1 << (c&7))) != 0)
(((uint8_t *)(data + 1))[c/8] & (1 << (c&7))) != 0)
return !negated; /* char found */
}
/* First skip the bit map if present. Then match against the list of Unicode
properties or large chars or ranges that end with a large char. We won't ever
encounter XCL_PROP or XCL_NOTPROP when UCP support is not compiled. */
encounter XCL_PROP or XCL_NOTPROP when UTF support is not compiled. */
if ((*data++ & XCL_MAP) != 0) data += 32 / sizeof(pcre_uchar);
if ((*data++ & XCL_MAP) != 0) data += 32 / sizeof(PCRE2_UCHAR);
while ((t = *data++) != XCL_END)
{
pcre_uint32 x, y;
uint32_t x, y;
if (t == XCL_SINGLE)
{
#ifdef SUPPORT_UTF
#ifdef SUPPORT_UNICODE
if (utf)
{
GETCHARINC(x, data); /* macro generates multiple statements */
}
else
#endif
x = *data++;
x = *data++;
if (c == x) return !negated;
}
else if (t == XCL_RANGE)
{
#ifdef SUPPORT_UTF
#ifdef SUPPORT_UNICODE
if (utf)
{
GETCHARINC(x, data); /* macro generates multiple statements */
@ -129,7 +130,7 @@ while ((t = *data++) != XCL_END)
if (c >= x && c <= y) return !negated;
}
#ifdef SUPPORT_UCP
#ifdef SUPPORT_UNICODE
else /* XCL_PROP & XCL_NOTPROP */
{
const ucd_record *prop = GET_UCD(c);
@ -259,10 +260,12 @@ while ((t = *data++) != XCL_END)
data += 2;
}
#endif /* SUPPORT_UCP */
#else
(void)utf; /* Avoid compiler warning */
#endif /* SUPPORT_UNICODE */
}
return negated; /* char did not match */
}
/* End of pcre_xclass.c */
/* End of pcre2_xclass.c */

View file

@ -124,10 +124,10 @@
/* SLJIT_REWRITABLE_JUMP is 0x1000. */
#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
# define PATCH_MB 0x4
# define PATCH_MW 0x8
# define PATCH_MB 0x4
# define PATCH_MW 0x8
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
# define PATCH_MD 0x10
# define PATCH_MD 0x10
#endif
#endif
@ -1561,6 +1561,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_cmov(struct sljit_compile
CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(dst_reg & ~SLJIT_I32_OP));
if (src != SLJIT_IMM) {
CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(src));
CHECK_ARGUMENT(srcw == 0);
}
if ((type & 0xff) <= SLJIT_NOT_ZERO)
@ -1586,6 +1587,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_cmov(struct sljit_compile
static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset)
{
/* Any offset is allowed. */
SLJIT_UNUSED_ARG(offset);
#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)

View file

@ -1225,7 +1225,15 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil
sljit_s32 dst_reg,
sljit_s32 src, sljit_sw srcw);
/* Copies the base address of SLJIT_SP + offset to dst.
/* Copies the base address of SLJIT_SP + offset to dst. The offset can be
anything to negate the effect of relative addressing. For example if an
array of sljit_sw values is stored on the stack from offset 0x40, and R0
contains the offset of an array item plus 0x120, this item can be
overwritten by two SLJIT instructions:
sljit_get_local_base(compiler, SLJIT_R1, 0, 0x40 - 0x120);
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM2(SLJIT_R1, SLJIT_R0), 0, SLJIT_IMM, 0x5);
Flags: - (may destroy flags) */
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset);

View file

@ -498,12 +498,13 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
{
sljit_sw fir = 0;
switch (feature_type) {
case SLJIT_HAS_FPU:
#ifdef SLJIT_IS_FPU_AVAILABLE
return SLJIT_IS_FPU_AVAILABLE;
#elif defined(__GNUC__)
sljit_sw fir;
asm ("cfc1 %0, $0" : "=r"(fir));
return (fir >> 22) & 0x1;
#else
@ -517,7 +518,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
#endif
default:
return 0;
return fir;
}
}

View file

@ -760,7 +760,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *comp
(((inst) & ~(INT_ALIGNED | UPDATE_REQ)) | (((flags) & MEM_MASK) <= GPR_REG ? D(reg) : FD(reg)))
#endif
static const sljit_ins data_transfer_insts[64 + 8] = {
static const sljit_ins data_transfer_insts[64 + 16] = {
/* -------- Unsigned -------- */
@ -869,11 +869,20 @@ static const sljit_ins data_transfer_insts[64 + 8] = {
/* d n x s */ HI(31) | LO(727) /* stfdx */,
/* d n x l */ HI(31) | LO(599) /* lfdx */,
/* d w i s */ HI(55) /* stfdu */,
/* d w i l */ HI(51) /* lfdu */,
/* d w x s */ HI(31) | LO(759) /* stfdux */,
/* d w x l */ HI(31) | LO(631) /* lfdux */,
/* s n i s */ HI(52) /* stfs */,
/* s n i l */ HI(48) /* lfs */,
/* s n x s */ HI(31) | LO(663) /* stfsx */,
/* s n x l */ HI(31) | LO(535) /* lfsx */,
/* s w i s */ HI(53) /* stfsu */,
/* s w i l */ HI(49) /* lfsu */,
/* s w x s */ HI(31) | LO(695) /* stfsux */,
/* s w x l */ HI(31) | LO(567) /* lfsux */,
};
#undef ARCH_32_64
@ -1753,7 +1762,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *c
/* Floating point operators */
/* --------------------------------------------------------------------- */
#define FLOAT_DATA(op) (DOUBLE_DATA | ((op & SLJIT_F32_OP) >> 6))
#define FLOAT_DATA(op) (DOUBLE_DATA | ((op & SLJIT_F32_OP) >> 5))
#define SELECT_FOP(op, single, double) ((op & SLJIT_F32_OP) ? single : double)
#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
@ -2282,16 +2291,15 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co
FAIL_IF(emit_op_mem2(compiler, input_flags | LOAD_DATA, TMP_REG1, dst, dstw, dst, dstw));
invert = 0;
cr_bit = 0;
switch (type & 0xff) {
case SLJIT_LESS:
case SLJIT_SIG_LESS:
cr_bit = 0;
break;
case SLJIT_GREATER_EQUAL:
case SLJIT_SIG_GREATER_EQUAL:
cr_bit = 0;
invert = 1;
break;

View file

@ -0,0 +1,421 @@
/*
* Stack-less Just-In-Time compiler
*
* Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
* permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this list of
* conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice, this list
* of conditions and the following disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
* SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/*
This file contains a simple executable memory allocator
It is assumed, that executable code blocks are usually medium (or sometimes
large) memory blocks, and the allocator is not too frequently called (less
optimized than other allocators). Thus, using it as a generic allocator is
not suggested.
How does it work:
Memory is allocated in continuous memory areas called chunks by alloc_chunk()
Chunk format:
[ block ][ block ] ... [ block ][ block terminator ]
All blocks and the block terminator is started with block_header. The block
header contains the size of the previous and the next block. These sizes
can also contain special values.
Block size:
0 - The block is a free_block, with a different size member.
1 - The block is a block terminator.
n - The block is used at the moment, and the value contains its size.
Previous block size:
0 - This is the first block of the memory chunk.
n - The size of the previous block.
Using these size values we can go forward or backward on the block chain.
The unused blocks are stored in a chain list pointed by free_blocks. This
list is useful if we need to find a suitable memory area when the allocator
is called.
When a block is freed, the new free block is connected to its adjacent free
blocks if possible.
[ free block ][ used block ][ free block ]
and "used block" is freed, the three blocks are connected together:
[ one big free block ]
*/
/* --------------------------------------------------------------------- */
/* System (OS) functions */
/* --------------------------------------------------------------------- */
/* 64 KByte. */
#define CHUNK_SIZE 0x10000
struct chunk_header {
void *executable;
int fd;
};
/*
alloc_chunk / free_chunk :
* allocate executable system memory chunks
* the size is always divisible by CHUNK_SIZE
allocator_grab_lock / allocator_release_lock :
* make the allocator thread safe
* can be empty if the OS (or the application) does not support threading
* only the allocator requires this lock, sljit is fully thread safe
as it only uses local variables
*/
#include <fcntl.h>
#ifndef O_NOATIME
#define O_NOATIME 0
#endif
#ifdef __O_TMPFILE
#ifndef O_TMPFILE
#define O_TMPFILE (__O_TMPFILE | O_DIRECTORY)
#endif
#endif
int mkostemp(char *template, int flags);
char *secure_getenv(const char *name);
static SLJIT_INLINE int create_tempfile(void)
{
int fd;
char tmp_name[256];
size_t tmp_name_len;
char *dir;
size_t len;
#ifdef P_tmpdir
len = (P_tmpdir != NULL) ? strlen(P_tmpdir) : 0;
if (len > 0 && len < sizeof(tmp_name)) {
strcpy(tmp_name, P_tmpdir);
tmp_name_len = len;
}
else {
strcpy(tmp_name, "/tmp");
tmp_name_len = 4;
}
#else
strcpy(tmp_name, "/tmp");
tmp_name_len = 4;
#endif
dir = secure_getenv("TMPDIR");
if (dir) {
len = strlen(dir);
if (len > 0 && len < sizeof(tmp_name)) {
strcpy(tmp_name, dir);
tmp_name_len = len;
}
}
SLJIT_ASSERT(tmp_name_len > 0 && tmp_name_len < sizeof(tmp_name));
while (tmp_name_len > 0 && tmp_name[tmp_name_len - 1] == '/') {
tmp_name_len--;
tmp_name[tmp_name_len] = '\0';
}
#ifdef O_TMPFILE
fd = open(tmp_name, O_TMPFILE | O_EXCL | O_RDWR | O_NOATIME | O_CLOEXEC, S_IRUSR | S_IWUSR);
if (fd != -1)
return fd;
#endif
if (tmp_name_len + 7 >= sizeof(tmp_name))
{
return -1;
}
strcpy(tmp_name + tmp_name_len, "/XXXXXX");
fd = mkostemp(tmp_name, O_CLOEXEC | O_NOATIME);
if (fd == -1)
return fd;
if (unlink(tmp_name)) {
close(fd);
return -1;
}
return fd;
}
static SLJIT_INLINE struct chunk_header* alloc_chunk(sljit_uw size)
{
struct chunk_header *retval;
int fd;
fd = create_tempfile();
if (fd == -1)
return NULL;
if (ftruncate(fd, size)) {
close(fd);
return NULL;
}
retval = (struct chunk_header *)mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
if (retval == MAP_FAILED) {
close(fd);
return NULL;
}
retval->executable = mmap(NULL, size, PROT_READ | PROT_EXEC, MAP_SHARED, fd, 0);
if (retval->executable == MAP_FAILED) {
munmap(retval, size);
close(fd);
return NULL;
}
retval->fd = fd;
return retval;
}
static SLJIT_INLINE void free_chunk(void *chunk, sljit_uw size)
{
struct chunk_header *header = ((struct chunk_header *)chunk) - 1;
int fd = header->fd;
munmap(header->executable, size);
munmap(header, size);
close(fd);
}
/* --------------------------------------------------------------------- */
/* Common functions */
/* --------------------------------------------------------------------- */
#define CHUNK_MASK (~(CHUNK_SIZE - 1))
struct block_header {
sljit_uw size;
sljit_uw prev_size;
sljit_sw executable_offset;
};
struct free_block {
struct block_header header;
struct free_block *next;
struct free_block *prev;
sljit_uw size;
};
#define AS_BLOCK_HEADER(base, offset) \
((struct block_header*)(((sljit_u8*)base) + offset))
#define AS_FREE_BLOCK(base, offset) \
((struct free_block*)(((sljit_u8*)base) + offset))
#define MEM_START(base) ((void*)((base) + 1))
#define ALIGN_SIZE(size) (((size) + sizeof(struct block_header) + 7) & ~7)
static struct free_block* free_blocks;
static sljit_uw allocated_size;
static sljit_uw total_size;
static SLJIT_INLINE void sljit_insert_free_block(struct free_block *free_block, sljit_uw size)
{
free_block->header.size = 0;
free_block->size = size;
free_block->next = free_blocks;
free_block->prev = NULL;
if (free_blocks)
free_blocks->prev = free_block;
free_blocks = free_block;
}
static SLJIT_INLINE void sljit_remove_free_block(struct free_block *free_block)
{
if (free_block->next)
free_block->next->prev = free_block->prev;
if (free_block->prev)
free_block->prev->next = free_block->next;
else {
SLJIT_ASSERT(free_blocks == free_block);
free_blocks = free_block->next;
}
}
SLJIT_API_FUNC_ATTRIBUTE void* sljit_malloc_exec(sljit_uw size)
{
struct chunk_header *chunk_header;
struct block_header *header;
struct block_header *next_header;
struct free_block *free_block;
sljit_uw chunk_size;
sljit_sw executable_offset;
allocator_grab_lock();
if (size < (64 - sizeof(struct block_header)))
size = (64 - sizeof(struct block_header));
size = ALIGN_SIZE(size);
free_block = free_blocks;
while (free_block) {
if (free_block->size >= size) {
chunk_size = free_block->size;
if (chunk_size > size + 64) {
/* We just cut a block from the end of the free block. */
chunk_size -= size;
free_block->size = chunk_size;
header = AS_BLOCK_HEADER(free_block, chunk_size);
header->prev_size = chunk_size;
header->executable_offset = free_block->header.executable_offset;
AS_BLOCK_HEADER(header, size)->prev_size = size;
}
else {
sljit_remove_free_block(free_block);
header = (struct block_header*)free_block;
size = chunk_size;
}
allocated_size += size;
header->size = size;
allocator_release_lock();
return MEM_START(header);
}
free_block = free_block->next;
}
chunk_size = sizeof(struct chunk_header) + sizeof(struct block_header);
chunk_size = (chunk_size + size + CHUNK_SIZE - 1) & CHUNK_MASK;
chunk_header = alloc_chunk(chunk_size);
if (!chunk_header) {
allocator_release_lock();
return NULL;
}
executable_offset = (sljit_sw)((sljit_u8*)chunk_header->executable - (sljit_u8*)chunk_header);
chunk_size -= sizeof(struct chunk_header) + sizeof(struct block_header);
total_size += chunk_size;
header = (struct block_header *)(chunk_header + 1);
header->prev_size = 0;
header->executable_offset = executable_offset;
if (chunk_size > size + 64) {
/* Cut the allocated space into a free and a used block. */
allocated_size += size;
header->size = size;
chunk_size -= size;
free_block = AS_FREE_BLOCK(header, size);
free_block->header.prev_size = size;
free_block->header.executable_offset = executable_offset;
sljit_insert_free_block(free_block, chunk_size);
next_header = AS_BLOCK_HEADER(free_block, chunk_size);
}
else {
/* All space belongs to this allocation. */
allocated_size += chunk_size;
header->size = chunk_size;
next_header = AS_BLOCK_HEADER(header, chunk_size);
}
next_header->size = 1;
next_header->prev_size = chunk_size;
next_header->executable_offset = executable_offset;
allocator_release_lock();
return MEM_START(header);
}
SLJIT_API_FUNC_ATTRIBUTE void sljit_free_exec(void* ptr)
{
struct block_header *header;
struct free_block* free_block;
allocator_grab_lock();
header = AS_BLOCK_HEADER(ptr, -(sljit_sw)sizeof(struct block_header));
header = AS_BLOCK_HEADER(header, -header->executable_offset);
allocated_size -= header->size;
/* Connecting free blocks together if possible. */
/* If header->prev_size == 0, free_block will equal to header.
In this case, free_block->header.size will be > 0. */
free_block = AS_FREE_BLOCK(header, -(sljit_sw)header->prev_size);
if (SLJIT_UNLIKELY(!free_block->header.size)) {
free_block->size += header->size;
header = AS_BLOCK_HEADER(free_block, free_block->size);
header->prev_size = free_block->size;
}
else {
free_block = (struct free_block*)header;
sljit_insert_free_block(free_block, header->size);
}
header = AS_BLOCK_HEADER(free_block, free_block->size);
if (SLJIT_UNLIKELY(!header->size)) {
free_block->size += ((struct free_block*)header)->size;
sljit_remove_free_block((struct free_block*)header);
header = AS_BLOCK_HEADER(free_block, free_block->size);
header->prev_size = free_block->size;
}
/* The whole chunk is free. */
if (SLJIT_UNLIKELY(!free_block->header.prev_size && header->size == 1)) {
/* If this block is freed, we still have (allocated_size / 2) free space. */
if (total_size - free_block->size > (allocated_size * 3 / 2)) {
total_size -= free_block->size;
sljit_remove_free_block(free_block);
free_chunk(free_block, free_block->size + sizeof(struct block_header));
}
}
allocator_release_lock();
}
SLJIT_API_FUNC_ATTRIBUTE void sljit_free_unused_memory_exec(void)
{
struct free_block* free_block;
struct free_block* next_free_block;
allocator_grab_lock();
free_block = free_blocks;
while (free_block) {
next_free_block = free_block->next;
if (!free_block->header.prev_size &&
AS_BLOCK_HEADER(free_block, free_block->size)->size == 1) {
total_size -= free_block->size;
sljit_remove_free_block(free_block);
free_chunk(free_block, free_block->size + sizeof(struct block_header));
}
free_block = next_free_block;
}
SLJIT_ASSERT((total_size && free_blocks) || (!total_size && !free_blocks));
allocator_release_lock();
}
SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr)
{
return ((struct block_header *)(ptr))[-1].executable_offset;
}

View file

@ -1,45 +0,0 @@
THE MAIN PCRE LIBRARY
---------------------
Written by: Philip Hazel
Email local part: ph10
Email domain: cam.ac.uk
University of Cambridge Computing Service,
Cambridge, England.
Copyright (c) 1997-2017 University of Cambridge
All rights reserved
PCRE JUST-IN-TIME COMPILATION SUPPORT
-------------------------------------
Written by: Zoltan Herczeg
Email local part: hzmester
Emain domain: freemail.hu
Copyright(c) 2010-2017 Zoltan Herczeg
All rights reserved.
STACK-LESS JUST-IN-TIME COMPILER
--------------------------------
Written by: Zoltan Herczeg
Email local part: hzmester
Emain domain: freemail.hu
Copyright(c) 2009-2017 Zoltan Herczeg
All rights reserved.
THE C++ WRAPPER LIBRARY
-----------------------
Written by: Google Inc.
Copyright (c) 2007-2012 Google Inc
All rights reserved
####

View file

@ -1,5 +0,0 @@
PCRE LICENCE
Please see the file LICENCE in the PCRE distribution for licensing details.
End

File diff suppressed because it is too large Load diff

View file

@ -1,528 +0,0 @@
Technical Notes about PCRE
--------------------------
These are very rough technical notes that record potentially useful information
about PCRE internals. For information about testing PCRE, see the pcretest
documentation and the comment at the head of the RunTest file.
Historical note 1
-----------------
Many years ago I implemented some regular expression functions to an algorithm
suggested by Martin Richards. These were not Unix-like in form, and were quite
restricted in what they could do by comparison with Perl. The interesting part
about the algorithm was that the amount of space required to hold the compiled
form of an expression was known in advance. The code to apply an expression did
not operate by backtracking, as the original Henry Spencer code and current
Perl code does, but instead checked all possibilities simultaneously by keeping
a list of current states and checking all of them as it advanced through the
subject string. In the terminology of Jeffrey Friedl's book, it was a "DFA
algorithm", though it was not a traditional Finite State Machine (FSM). When
the pattern was all used up, all remaining states were possible matches, and
the one matching the longest subset of the subject string was chosen. This did
not necessarily maximize the individual wild portions of the pattern, as is
expected in Unix and Perl-style regular expressions.
Historical note 2
-----------------
By contrast, the code originally written by Henry Spencer (which was
subsequently heavily modified for Perl) compiles the expression twice: once in
a dummy mode in order to find out how much store will be needed, and then for
real. (The Perl version probably doesn't do this any more; I'm talking about
the original library.) The execution function operates by backtracking and
maximizing (or, optionally, minimizing in Perl) the amount of the subject that
matches individual wild portions of the pattern. This is an "NFA algorithm" in
Friedl's terminology.
OK, here's the real stuff
-------------------------
For the set of functions that form the "basic" PCRE library (which are
unrelated to those mentioned above), I tried at first to invent an algorithm
that used an amount of store bounded by a multiple of the number of characters
in the pattern, to save on compiling time. However, because of the greater
complexity in Perl regular expressions, I couldn't do this. In any case, a
first pass through the pattern is helpful for other reasons.
Support for 16-bit and 32-bit data strings
-------------------------------------------
From release 8.30, PCRE supports 16-bit as well as 8-bit data strings; and from
release 8.32, PCRE supports 32-bit data strings. The library can be compiled
in any combination of 8-bit, 16-bit or 32-bit modes, creating up to three
different libraries. In the description that follows, the word "short" is used
for a 16-bit data quantity, and the word "unit" is used for a quantity that is
a byte in 8-bit mode, a short in 16-bit mode and a 32-bit word in 32-bit mode.
However, so as not to over-complicate the text, the names of PCRE functions are
given in 8-bit form only.
Computing the memory requirement: how it was
--------------------------------------------
Up to and including release 6.7, PCRE worked by running a very degenerate first
pass to calculate a maximum store size, and then a second pass to do the real
compile - which might use a bit less than the predicted amount of memory. The
idea was that this would turn out faster than the Henry Spencer code because
the first pass is degenerate and the second pass can just store stuff straight
into the vector, which it knows is big enough.
Computing the memory requirement: how it is
-------------------------------------------
By the time I was working on a potential 6.8 release, the degenerate first pass
had become very complicated and hard to maintain. Indeed one of the early
things I did for 6.8 was to fix Yet Another Bug in the memory computation. Then
I had a flash of inspiration as to how I could run the real compile function in
a "fake" mode that enables it to compute how much memory it would need, while
actually only ever using a few hundred bytes of working memory, and without too
many tests of the mode that might slow it down. So I refactored the compiling
functions to work this way. This got rid of about 600 lines of source. It
should make future maintenance and development easier. As this was such a major
change, I never released 6.8, instead upping the number to 7.0 (other quite
major changes were also present in the 7.0 release).
A side effect of this work was that the previous limit of 200 on the nesting
depth of parentheses was removed. However, there is a downside: pcre_compile()
runs more slowly than before (30% or more, depending on the pattern) because it
is doing a full analysis of the pattern. My hope was that this would not be a
big issue, and in the event, nobody has commented on it.
At release 8.34, a limit on the nesting depth of parentheses was re-introduced
(default 250, settable at build time) so as to put a limit on the amount of
system stack used by pcre_compile(). This is a safety feature for environments
with small stacks where the patterns are provided by users.
Traditional matching function
-----------------------------
The "traditional", and original, matching function is called pcre_exec(), and
it implements an NFA algorithm, similar to the original Henry Spencer algorithm
and the way that Perl works. This is not surprising, since it is intended to be
as compatible with Perl as possible. This is the function most users of PCRE
will use most of the time. From release 8.20, if PCRE is compiled with
just-in-time (JIT) support, and studying a compiled pattern with JIT is
successful, the JIT code is run instead of the normal pcre_exec() code, but the
result is the same.
Supplementary matching function
-------------------------------
From PCRE 6.0, there is also a supplementary matching function called
pcre_dfa_exec(). This implements a DFA matching algorithm that searches
simultaneously for all possible matches that start at one point in the subject
string. (Going back to my roots: see Historical Note 1 above.) This function
intreprets the same compiled pattern data as pcre_exec(); however, not all the
facilities are available, and those that are do not always work in quite the
same way. See the user documentation for details.
The algorithm that is used for pcre_dfa_exec() is not a traditional FSM,
because it may have a number of states active at one time. More work would be
needed at compile time to produce a traditional FSM where only one state is
ever active at once. I believe some other regex matchers work this way. JIT
support is not available for this kind of matching.
Changeable options
------------------
The /i, /m, or /s options (PCRE_CASELESS, PCRE_MULTILINE, PCRE_DOTALL, and some
others) may change in the middle of patterns. From PCRE 8.13, their processing
is handled entirely at compile time by generating different opcodes for the
different settings. The runtime functions do not need to keep track of an
options state any more.
Format of compiled patterns
---------------------------
The compiled form of a pattern is a vector of unsigned units (bytes in 8-bit
mode, shorts in 16-bit mode, 32-bit words in 32-bit mode), containing items of
variable length. The first unit in an item contains an opcode, and the length
of the item is either implicit in the opcode or contained in the data that
follows it.
In many cases listed below, LINK_SIZE data values are specified for offsets
within the compiled pattern. LINK_SIZE always specifies a number of bytes. The
default value for LINK_SIZE is 2, but PCRE can be compiled to use 3-byte or
4-byte values for these offsets, although this impairs the performance. (3-byte
LINK_SIZE values are available only in 8-bit mode.) Specifing a LINK_SIZE
larger than 2 is necessary only when patterns whose compiled length is greater
than 64K are going to be processed. In this description, we assume the "normal"
compilation options. Data values that are counts (e.g. quantifiers) are two
bytes long in 8-bit mode (most significant byte first), or one unit in 16-bit
and 32-bit modes.
Opcodes with no following data
------------------------------
These items are all just one unit long
OP_END end of pattern
OP_ANY match any one character other than newline
OP_ALLANY match any one character, including newline
OP_ANYBYTE match any single unit, even in UTF-8/16 mode
OP_SOD match start of data: \A
OP_SOM, start of match (subject + offset): \G
OP_SET_SOM, set start of match (\K)
OP_CIRC ^ (start of data)
OP_CIRCM ^ multiline mode (start of data or after newline)
OP_NOT_WORD_BOUNDARY \W
OP_WORD_BOUNDARY \w
OP_NOT_DIGIT \D
OP_DIGIT \d
OP_NOT_HSPACE \H
OP_HSPACE \h
OP_NOT_WHITESPACE \S
OP_WHITESPACE \s
OP_NOT_VSPACE \V
OP_VSPACE \v
OP_NOT_WORDCHAR \W
OP_WORDCHAR \w
OP_EODN match end of data or newline at end: \Z
OP_EOD match end of data: \z
OP_DOLL $ (end of data, or before final newline)
OP_DOLLM $ multiline mode (end of data or before newline)
OP_EXTUNI match an extended Unicode grapheme cluster
OP_ANYNL match any Unicode newline sequence
OP_ASSERT_ACCEPT )
OP_ACCEPT ) These are Perl 5.10's "backtracking control
OP_COMMIT ) verbs". If OP_ACCEPT is inside capturing
OP_FAIL ) parentheses, it may be preceded by one or more
OP_PRUNE ) OP_CLOSE, each followed by a count that
OP_SKIP ) indicates which parentheses must be closed.
OP_THEN )
OP_ASSERT_ACCEPT is used when (*ACCEPT) is encountered within an assertion.
This ends the assertion, not the entire pattern match.
Backtracking control verbs with optional data
---------------------------------------------
(*THEN) without an argument generates the opcode OP_THEN and no following data.
OP_MARK is followed by the mark name, preceded by a one-unit length, and
followed by a binary zero. For (*PRUNE), (*SKIP), and (*THEN) with arguments,
the opcodes OP_PRUNE_ARG, OP_SKIP_ARG, and OP_THEN_ARG are used, with the name
following in the same format as OP_MARK.
Matching literal characters
---------------------------
The OP_CHAR opcode is followed by a single character that is to be matched
casefully. For caseless matching, OP_CHARI is used. In UTF-8 or UTF-16 modes,
the character may be more than one unit long. In UTF-32 mode, characters
are always exactly one unit long.
If there is only one character in a character class, OP_CHAR or OP_CHARI is
used for a positive class, and OP_NOT or OP_NOTI for a negative one (that is,
for something like [^a]).
Repeating single characters
---------------------------
The common repeats (*, +, ?), when applied to a single character, use the
following opcodes, which come in caseful and caseless versions:
Caseful Caseless
OP_STAR OP_STARI
OP_MINSTAR OP_MINSTARI
OP_POSSTAR OP_POSSTARI
OP_PLUS OP_PLUSI
OP_MINPLUS OP_MINPLUSI
OP_POSPLUS OP_POSPLUSI
OP_QUERY OP_QUERYI
OP_MINQUERY OP_MINQUERYI
OP_POSQUERY OP_POSQUERYI
Each opcode is followed by the character that is to be repeated. In ASCII mode,
these are two-unit items; in UTF-8 or UTF-16 modes, the length is variable; in
UTF-32 mode these are one-unit items. Those with "MIN" in their names are the
minimizing versions. Those with "POS" in their names are possessive versions.
Other repeats make use of these opcodes:
Caseful Caseless
OP_UPTO OP_UPTOI
OP_MINUPTO OP_MINUPTOI
OP_POSUPTO OP_POSUPTOI
OP_EXACT OP_EXACTI
Each of these is followed by a count and then the repeated character. OP_UPTO
matches from 0 to the given number. A repeat with a non-zero minimum and a
fixed maximum is coded as an OP_EXACT followed by an OP_UPTO (or OP_MINUPTO or
OPT_POSUPTO).
Another set of matching repeating opcodes (called OP_NOTSTAR, OP_NOTSTARI,
etc.) are used for repeated, negated, single-character classes such as [^a]*.
The normal single-character opcodes (OP_STAR, etc.) are used for repeated
positive single-character classes.
Repeating character types
-------------------------
Repeats of things like \d are done exactly as for single characters, except
that instead of a character, the opcode for the type is stored in the data
unit. The opcodes are:
OP_TYPESTAR
OP_TYPEMINSTAR
OP_TYPEPOSSTAR
OP_TYPEPLUS
OP_TYPEMINPLUS
OP_TYPEPOSPLUS
OP_TYPEQUERY
OP_TYPEMINQUERY
OP_TYPEPOSQUERY
OP_TYPEUPTO
OP_TYPEMINUPTO
OP_TYPEPOSUPTO
OP_TYPEEXACT
Match by Unicode property
-------------------------
OP_PROP and OP_NOTPROP are used for positive and negative matches of a
character by testing its Unicode property (the \p and \P escape sequences).
Each is followed by two units that encode the desired property as a type and a
value. The types are a set of #defines of the form PT_xxx, and the values are
enumerations of the form ucp_xx, defined in the ucp.h source file. The value is
relevant only for PT_GC (General Category), PT_PC (Particular Category), and
PT_SC (Script).
Repeats of these items use the OP_TYPESTAR etc. set of opcodes, followed by
three units: OP_PROP or OP_NOTPROP, and then the desired property type and
value.
Character classes
-----------------
If there is only one character in a class, OP_CHAR or OP_CHARI is used for a
positive class, and OP_NOT or OP_NOTI for a negative one (that is, for
something like [^a]).
A set of repeating opcodes (called OP_NOTSTAR etc.) are used for repeated,
negated, single-character classes. The normal single-character opcodes
(OP_STAR, etc.) are used for repeated positive single-character classes.
When there is more than one character in a class, and all the code points are
less than 256, OP_CLASS is used for a positive class, and OP_NCLASS for a
negative one. In either case, the opcode is followed by a 32-byte (16-short,
8-word) bit map containing a 1 bit for every character that is acceptable. The
bits are counted from the least significant end of each unit. In caseless mode,
bits for both cases are set.
The reason for having both OP_CLASS and OP_NCLASS is so that, in UTF-8/16/32
mode, subject characters with values greater than 255 can be handled correctly.
For OP_CLASS they do not match, whereas for OP_NCLASS they do.
For classes containing characters with values greater than 255 or that contain
\p or \P, OP_XCLASS is used. It optionally uses a bit map if any code points
are less than 256, followed by a list of pairs (for a range) and single
characters. In caseless mode, both cases are explicitly listed.
OP_XCLASS is followed by a unit containing flag bits: XCL_NOT indicates that
this is a negative class, and XCL_MAP indicates that a bit map is present.
There follows the bit map, if XCL_MAP is set, and then a sequence of items
coded as follows:
XCL_END marks the end of the list
XCL_SINGLE one character follows
XCL_RANGE two characters follow
XCL_PROP a Unicode property (type, value) follows
XCL_NOTPROP a Unicode property (type, value) follows
If a range starts with a code point less than 256 and ends with one greater
than 256, an XCL_RANGE item is used, without setting any bits in the bit map.
This means that if no other items in the class set bits in the map, a map is
not needed.
Back references
---------------
OP_REF (caseful) or OP_REFI (caseless) is followed by a count containing the
reference number if the reference is to a unique capturing group (either by
number or by name). When named groups are used, there may be more than one
group with the same name. In this case, a reference by name generates OP_DNREF
or OP_DNREFI. These are followed by two counts: the index (not the byte offset)
in the group name table of the first entry for the requred name, followed by
the number of groups with the same name.
Repeating character classes and back references
-----------------------------------------------
Single-character classes are handled specially (see above). This section
applies to other classes and also to back references. In both cases, the repeat
information follows the base item. The matching code looks at the following
opcode to see if it is one of
OP_CRSTAR
OP_CRMINSTAR
OP_CRPOSSTAR
OP_CRPLUS
OP_CRMINPLUS
OP_CRPOSPLUS
OP_CRQUERY
OP_CRMINQUERY
OP_CRPOSQUERY
OP_CRRANGE
OP_CRMINRANGE
OP_CRPOSRANGE
All but the last three are single-unit items, with no data. The others are
followed by the minimum and maximum repeat counts.
Brackets and alternation
------------------------
A pair of non-capturing round brackets is wrapped round each expression at
compile time, so alternation always happens in the context of brackets.
[Note for North Americans: "bracket" to some English speakers, including
myself, can be round, square, curly, or pointy. Hence this usage rather than
"parentheses".]
Non-capturing brackets use the opcode OP_BRA. Originally PCRE was limited to 99
capturing brackets and it used a different opcode for each one. From release
3.5, the limit was removed by putting the bracket number into the data for
higher-numbered brackets. From release 7.0 all capturing brackets are handled
this way, using the single opcode OP_CBRA.
A bracket opcode is followed by LINK_SIZE bytes which give the offset to the
next alternative OP_ALT or, if there aren't any branches, to the matching
OP_KET opcode. Each OP_ALT is followed by LINK_SIZE bytes giving the offset to
the next one, or to the OP_KET opcode. For capturing brackets, the bracket
number is a count that immediately follows the offset.
OP_KET is used for subpatterns that do not repeat indefinitely, and OP_KETRMIN
and OP_KETRMAX are used for indefinite repetitions, minimally or maximally
respectively (see below for possessive repetitions). All three are followed by
LINK_SIZE bytes giving (as a positive number) the offset back to the matching
bracket opcode.
If a subpattern is quantified such that it is permitted to match zero times, it
is preceded by one of OP_BRAZERO, OP_BRAMINZERO, or OP_SKIPZERO. These are
single-unit opcodes that tell the matcher that skipping the following
subpattern entirely is a valid branch. In the case of the first two, not
skipping the pattern is also valid (greedy and non-greedy). The third is used
when a pattern has the quantifier {0,0}. It cannot be entirely discarded,
because it may be called as a subroutine from elsewhere in the regex.
A subpattern with an indefinite maximum repetition is replicated in the
compiled data its minimum number of times (or once with OP_BRAZERO if the
minimum is zero), with the final copy terminating with OP_KETRMIN or OP_KETRMAX
as appropriate.
A subpattern with a bounded maximum repetition is replicated in a nested
fashion up to the maximum number of times, with OP_BRAZERO or OP_BRAMINZERO
before each replication after the minimum, so that, for example, (abc){2,5} is
compiled as (abc)(abc)((abc)((abc)(abc)?)?)?, except that each bracketed group
has the same number.
When a repeated subpattern has an unbounded upper limit, it is checked to see
whether it could match an empty string. If this is the case, the opcode in the
final replication is changed to OP_SBRA or OP_SCBRA. This tells the matcher
that it needs to check for matching an empty string when it hits OP_KETRMIN or
OP_KETRMAX, and if so, to break the loop.
Possessive brackets
-------------------
When a repeated group (capturing or non-capturing) is marked as possessive by
the "+" notation, e.g. (abc)++, different opcodes are used. Their names all
have POS on the end, e.g. OP_BRAPOS instead of OP_BRA and OP_SCPBRPOS instead
of OP_SCBRA. The end of such a group is marked by OP_KETRPOS. If the minimum
repetition is zero, the group is preceded by OP_BRAPOSZERO.
Once-only (atomic) groups
-------------------------
These are just like other subpatterns, but they start with the opcode
OP_ONCE or OP_ONCE_NC. The former is used when there are no capturing brackets
within the atomic group; the latter when there are. The distinction is needed
for when there is a backtrack to before the group - any captures within the
group must be reset, so it is necessary to retain backtracking points inside
the group even after it is complete in order to do this. When there are no
captures in an atomic group, all the backtracking can be discarded when it is
complete. This is more efficient, and also uses less stack.
The check for matching an empty string in an unbounded repeat is handled
entirely at runtime, so there are just these two opcodes for atomic groups.
Assertions
----------
Forward assertions are also just like other subpatterns, but starting with one
of the opcodes OP_ASSERT or OP_ASSERT_NOT. Backward assertions use the opcodes
OP_ASSERTBACK and OP_ASSERTBACK_NOT, and the first opcode inside the assertion
is OP_REVERSE, followed by a count of the number of characters to move back the
pointer in the subject string. In ASCII mode, the count is a number of units,
but in UTF-8/16 mode each character may occupy more than one unit; in UTF-32
mode each character occupies exactly one unit. A separate count is present in
each alternative of a lookbehind assertion, allowing them to have different
fixed lengths.
Conditional subpatterns
-----------------------
These are like other subpatterns, but they start with the opcode OP_COND, or
OP_SCOND for one that might match an empty string in an unbounded repeat. If
the condition is a back reference, this is stored at the start of the
subpattern using the opcode OP_CREF followed by a count containing the
reference number, provided that the reference is to a unique capturing group.
If the reference was by name and there is more than one group with that name,
OP_DNCREF is used instead. It is followed by two counts: the index in the group
names table, and the number of groups with the same name.
If the condition is "in recursion" (coded as "(?(R)"), or "in recursion of
group x" (coded as "(?(Rx)"), the group number is stored at the start of the
subpattern using the opcode OP_RREF (with a value of zero for "the whole
pattern") or OP_DNRREF (with data as for OP_DNCREF). For a DEFINE condition,
just the single unit OP_DEF is used (it has no associated data). Otherwise, a
conditional subpattern always starts with one of the assertions.
Recursion
---------
Recursion either matches the current regex, or some subexpression. The opcode
OP_RECURSE is followed by aLINK_SIZE value that is the offset to the starting
bracket from the start of the whole pattern. From release 6.5, OP_RECURSE is
automatically wrapped inside OP_ONCE brackets, because otherwise some patterns
broke it. OP_RECURSE is also used for "subroutine" calls, even though they are
not strictly a recursion.
Callout
-------
OP_CALLOUT is followed by one unit of data that holds a callout number in the
range 0 to 254 for manual callouts, or 255 for an automatic callout. In both
cases there follows a count giving the offset in the pattern string to the
start of the following item, and another count giving the length of this item.
These values make is possible for pcretest to output useful tracing information
using automatic callouts.
Philip Hazel
November 2013

View file

@ -1,93 +0,0 @@
PCRE LICENCE
------------
PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Release 8 of PCRE is distributed under the terms of the "BSD" licence, as
specified below. The documentation for PCRE, supplied in the "doc"
directory, is distributed under the same terms as the software itself. The data
in the testdata directory is not copyrighted and is in the public domain.
The basic library functions are written in C and are freestanding. Also
included in the distribution is a set of C++ wrapper functions, and a
just-in-time compiler that can be used to optimize pattern matching. These
are both optional features that can be omitted when the library is built.
THE BASIC LIBRARY FUNCTIONS
---------------------------
Written by: Philip Hazel
Email local part: ph10
Email domain: cam.ac.uk
University of Cambridge Computing Service,
Cambridge, England.
Copyright (c) 1997-2017 University of Cambridge
All rights reserved.
PCRE JUST-IN-TIME COMPILATION SUPPORT
-------------------------------------
Written by: Zoltan Herczeg
Email local part: hzmester
Emain domain: freemail.hu
Copyright(c) 2010-2017 Zoltan Herczeg
All rights reserved.
STACK-LESS JUST-IN-TIME COMPILER
--------------------------------
Written by: Zoltan Herczeg
Email local part: hzmester
Emain domain: freemail.hu
Copyright(c) 2009-2017 Zoltan Herczeg
All rights reserved.
THE C++ WRAPPER FUNCTIONS
-------------------------
Contributed by: Google Inc.
Copyright (c) 2007-2012, Google Inc.
All rights reserved.
THE "BSD" LICENCE
-----------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the name of Google
Inc. nor the names of their contributors may be used to endorse or
promote products derived from this software without specific prior
written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
End

View file

@ -1,737 +0,0 @@
News about PCRE releases
------------------------
Release 8.41 13-June-2017
-------------------------
This is a bug-fix release.
Release 8.40 11-January-2017
----------------------------
This is a bug-fix release.
Release 8.39 14-June-2016
-------------------------
Some appropriate PCRE2 JIT improvements have been retro-fitted to PCRE1. Apart
from that, this is another bug-fix release. Note that this library (now called
PCRE1) is now being maintained for bug fixes only. New projects are advised to
use the new PCRE2 libraries.
Release 8.38 23-November-2015
-----------------------------
This is bug-fix release. Note that this library (now called PCRE1) is now being
maintained for bug fixes only. New projects are advised to use the new PCRE2
libraries.
Release 8.37 28-April-2015
--------------------------
This is bug-fix release. Note that this library (now called PCRE1) is now being
maintained for bug fixes only. New projects are advised to use the new PCRE2
libraries.
Release 8.36 26-September-2014
------------------------------
This is primarily a bug-fix release. However, in addition, the Unicode data
tables have been updated to Unicode 7.0.0.
Release 8.35 04-April-2014
--------------------------
There have been performance improvements for classes containing non-ASCII
characters and the "auto-possessification" feature has been extended. Other
minor improvements have been implemented and bugs fixed. There is a new callout
feature to enable applications to do detailed stack checks at compile time, to
avoid running out of stack for deeply nested parentheses. The JIT compiler has
been extended with experimental support for ARM-64, MIPS-64, and PPC-LE.
Release 8.34 15-December-2013
-----------------------------
As well as fixing the inevitable bugs, performance has been improved by
refactoring and extending the amount of "auto-possessification" that PCRE does.
Other notable changes:
. Implemented PCRE_INFO_MATCH_EMPTY, which yields 1 if the pattern can match
an empty string. If it can, pcretest shows this in its information output.
. A back reference to a named subpattern when there is more than one of the
same name now checks them in the order in which they appear in the pattern.
The first one that is set is used for the reference. Previously only the
first one was inspected. This change makes PCRE more compatible with Perl.
. Unicode character properties were updated from Unicode 6.3.0.
. The character VT has been added to the set of characters that match \s and
are generally treated as white space, following this same change in Perl
5.18. There is now no difference between "Perl space" and "POSIX space".
. Perl has changed its handling of \8 and \9. If there is no previously
encountered capturing group of those numbers, they are treated as the
literal characters 8 and 9 instead of a binary zero followed by the
literals. PCRE now does the same.
. Following Perl, added \o{} to specify codepoints in octal, making it
possible to specify values greater than 0777 and also making them
unambiguous.
. In UCP mode, \s was not matching two of the characters that Perl matches,
namely NEL (U+0085) and MONGOLIAN VOWEL SEPARATOR (U+180E), though they
were matched by \h.
. Add JIT support for the 64 bit TileGX architecture.
. Upgraded the handling of the POSIX classes [:graph:], [:print:], and
[:punct:] when PCRE_UCP is set so as to include the same characters as Perl
does in Unicode mode.
. Perl no longer allows group names to start with digits, so I have made this
change also in PCRE.
. Added support for [[:<:]] and [[:>:]] as used in the BSD POSIX library to
mean "start of word" and "end of word", respectively, as a transition aid.
Release 8.33 28-May-2013
--------------------------
A number of bugs are fixed, and some performance improvements have been made.
There are also some new features, of which these are the most important:
. The behaviour of the backtracking verbs has been rationalized and
documented in more detail.
. JIT now supports callouts and all of the backtracking verbs.
. Unicode validation has been updated in the light of Unicode Corrigendum #9,
which points out that "non characters" are not "characters that may not
appear in Unicode strings" but rather "characters that are reserved for
internal use and have only local meaning".
. (*LIMIT_MATCH=d) and (*LIMIT_RECURSION=d) have been added so that the
creator of a pattern can specify lower (but not higher) limits for the
matching process.
. The PCRE_NEVER_UTF option is available to prevent pattern-writers from using
the (*UTF) feature, as this could be a security issue.
Release 8.32 30-November-2012
-----------------------------
This release fixes a number of bugs, but also has some new features. These are
the highlights:
. There is now support for 32-bit character strings and UTF-32. Like the
16-bit support, this is done by compiling a separate 32-bit library.
. \X now matches a Unicode extended grapheme cluster.
. Case-independent matching of Unicode characters that have more than one
"other case" now makes all three (or more) characters equivalent. This
applies, for example, to Greek Sigma, which has two lowercase versions.
. Unicode character properties are updated to Unicode 6.2.0.
. The EBCDIC support, which had decayed, has had a spring clean.
. A number of JIT optimizations have been added, which give faster JIT
execution speed. In addition, a new direct interface to JIT execution is
available. This bypasses some of the sanity checks of pcre_exec() to give a
noticeable speed-up.
. A number of issues in pcregrep have been fixed, making it more compatible
with GNU grep. In particular, --exclude and --include (and variants) apply
to all files now, not just those obtained from scanning a directory
recursively. In Windows environments, the default action for directories is
now "skip" instead of "read" (which provokes an error).
. If the --only-matching (-o) option in pcregrep is specified multiple
times, each one causes appropriate output. For example, -o1 -o2 outputs the
substrings matched by the 1st and 2nd capturing parentheses. A separating
string can be specified by --om-separator (default empty).
. When PCRE is built via Autotools using a version of gcc that has the
"visibility" feature, it is used to hide internal library functions that are
not part of the public API.
Release 8.31 06-July-2012
-------------------------
This is mainly a bug-fixing release, with a small number of developments:
. The JIT compiler now supports partial matching and the (*MARK) and
(*COMMIT) verbs.
. PCRE_INFO_MAXLOOKBEHIND can be used to find the longest lookbehind in a
pattern.
. There should be a performance improvement when using the heap instead of the
stack for recursion.
. pcregrep can now be linked with libedit as an alternative to libreadline.
. pcregrep now has a --file-list option where the list of files to scan is
given as a file.
. pcregrep now recognizes binary files and there are related options.
. The Unicode tables have been updated to 6.1.0.
As always, the full list of changes is in the ChangeLog file.
Release 8.30 04-February-2012
-----------------------------
Release 8.30 introduces a major new feature: support for 16-bit character
strings, compiled as a separate library. There are a few changes to the
8-bit library, in addition to some bug fixes.
. The pcre_info() function, which has been obsolete for over 10 years, has
been removed.
. When a compiled pattern was saved to a file and later reloaded on a host
with different endianness, PCRE used automatically to swap the bytes in some
of the data fields. With the advent of the 16-bit library, where more of this
swapping is needed, it is no longer done automatically. Instead, the bad
endianness is detected and a specific error is given. The user can then call
a new function called pcre_pattern_to_host_byte_order() (or an equivalent
16-bit function) to do the swap.
. In UTF-8 mode, the values 0xd800 to 0xdfff are not legal Unicode
code points and are now faulted. (They are the so-called "surrogates"
that are reserved for coding high values in UTF-16.)
Release 8.21 12-Dec-2011
------------------------
This is almost entirely a bug-fix release. The only new feature is the ability
to obtain the size of the memory used by the JIT compiler.
Release 8.20 21-Oct-2011
------------------------
The main change in this release is the inclusion of Zoltan Herczeg's
just-in-time compiler support, which can be accessed by building PCRE with
--enable-jit. Large performance benefits can be had in many situations. 8.20
also fixes an unfortunate bug that was introduced in 8.13 as well as tidying up
a number of infelicities and differences from Perl.
Release 8.13 16-Aug-2011
------------------------
This is mainly a bug-fix release. There has been a lot of internal refactoring.
The Unicode tables have been updated. The only new feature in the library is
the passing of *MARK information to callouts. Some additions have been made to
pcretest to make testing easier and more comprehensive. There is a new option
for pcregrep to adjust its internal buffer size.
Release 8.12 15-Jan-2011
------------------------
This release fixes some bugs in pcregrep, one of which caused the tests to fail
on 64-bit big-endian systems. There are no changes to the code of the library.
Release 8.11 10-Dec-2010
------------------------
A number of bugs in the library and in pcregrep have been fixed. As always, see
ChangeLog for details. The following are the non-bug-fix changes:
. Added --match-limit and --recursion-limit to pcregrep.
. Added an optional parentheses number to the -o and --only-matching options
of pcregrep.
. Changed the way PCRE_PARTIAL_HARD affects the matching of $, \z, \Z, \b, and
\B.
. Added PCRE_ERROR_SHORTUTF8 to make it possible to distinguish between a
bad UTF-8 sequence and one that is incomplete when using PCRE_PARTIAL_HARD.
. Recognize (*NO_START_OPT) at the start of a pattern to set the PCRE_NO_
START_OPTIMIZE option, which is now allowed at compile time
Release 8.10 25-Jun-2010
------------------------
There are two major additions: support for (*MARK) and friends, and the option
PCRE_UCP, which changes the behaviour of \b, \d, \s, and \w (and their
opposites) so that they make use of Unicode properties. There are also a number
of lesser new features, and several bugs have been fixed. A new option,
--line-buffered, has been added to pcregrep, for use when it is connected to
pipes.
Release 8.02 19-Mar-2010
------------------------
Another bug-fix release.
Release 8.01 19-Jan-2010
------------------------
This is a bug-fix release. Several bugs in the code itself and some bugs and
infelicities in the build system have been fixed.
Release 8.00 19-Oct-09
----------------------
Bugs have been fixed in the library and in pcregrep. There are also some
enhancements. Restrictions on patterns used for partial matching have been
removed, extra information is given for partial matches, the partial matching
process has been improved, and an option to make a partial match override a
full match is available. The "study" process has been enhanced by finding a
lower bound matching length. Groups with duplicate numbers may now have
duplicated names without the use of PCRE_DUPNAMES. However, they may not have
different names. The documentation has been revised to reflect these changes.
The version number has been expanded to 3 digits as it is clear that the rate
of change is not slowing down.
Release 7.9 11-Apr-09
---------------------
Mostly bugfixes and tidies with just a couple of minor functional additions.
Release 7.8 05-Sep-08
---------------------
More bug fixes, plus a performance improvement in Unicode character property
lookup.
Release 7.7 07-May-08
---------------------
This is once again mainly a bug-fix release, but there are a couple of new
features.
Release 7.6 28-Jan-08
---------------------
The main reason for having this release so soon after 7.5 is because it fixes a
potential buffer overflow problem in pcre_compile() when run in UTF-8 mode. In
addition, the CMake configuration files have been brought up to date.
Release 7.5 10-Jan-08
---------------------
This is mainly a bug-fix release. However the ability to link pcregrep with
libz or libbz2 and the ability to link pcretest with libreadline have been
added. Also the --line-offsets and --file-offsets options were added to
pcregrep.
Release 7.4 21-Sep-07
---------------------
The only change of specification is the addition of options to control whether
\R matches any Unicode line ending (the default) or just CR, LF, and CRLF.
Otherwise, the changes are bug fixes and a refactoring to reduce the number of
relocations needed in a shared library. There have also been some documentation
updates, in particular, some more information about using CMake to build PCRE
has been added to the NON-UNIX-USE file.
Release 7.3 28-Aug-07
---------------------
Most changes are bug fixes. Some that are not:
1. There is some support for Perl 5.10's experimental "backtracking control
verbs" such as (*PRUNE).
2. UTF-8 checking is now as per RFC 3629 instead of RFC 2279; this is more
restrictive in the strings it accepts.
3. Checking for potential integer overflow has been made more dynamic, and as a
consequence there is no longer a hard limit on the size of a subpattern that
has a limited repeat count.
4. When CRLF is a valid line-ending sequence, pcre_exec() and pcre_dfa_exec()
no longer advance by two characters instead of one when an unanchored match
fails at CRLF if there are explicit CR or LF matches within the pattern.
This gets rid of some anomalous effects that previously occurred.
5. Some PCRE-specific settings for varying the newline options at the start of
a pattern have been added.
Release 7.2 19-Jun-07
---------------------
WARNING: saved patterns that were compiled by earlier versions of PCRE must be
recompiled for use with 7.2 (necessitated by the addition of \K, \h, \H, \v,
and \V).
Correction to the notes for 7.1: the note about shared libraries for Windows is
wrong. Previously, three libraries were built, but each could function
independently. For example, the pcreposix library also included all the
functions from the basic pcre library. The change is that the three libraries
are no longer independent. They are like the Unix libraries. To use the
pcreposix functions, for example, you need to link with both the pcreposix and
the basic pcre library.
Some more features from Perl 5.10 have been added:
(?-n) and (?+n) relative references for recursion and subroutines.
(?(-n) and (?(+n) relative references as conditions.
\k{name} and \g{name} are synonyms for \k<name>.
\K to reset the start of the matched string; for example, (foo)\Kbar
matches bar preceded by foo, but only sets bar as the matched string.
(?| introduces a group where the capturing parentheses in each alternative
start from the same number; for example, (?|(abc)|(xyz)) sets capturing
parentheses number 1 in both cases.
\h, \H, \v, \V match horizontal and vertical whitespace, respectively.
Release 7.1 24-Apr-07
---------------------
There is only one new feature in this release: a linebreak setting of
PCRE_NEWLINE_ANYCRLF. It is a cut-down version of PCRE_NEWLINE_ANY, which
recognizes only CRLF, CR, and LF as linebreaks.
A few bugs are fixed (see ChangeLog for details), but the major change is a
complete re-implementation of the build system. This now has full Autotools
support and so is now "standard" in some sense. It should help with compiling
PCRE in a wide variety of environments.
NOTE: when building shared libraries for Windows, three dlls are now built,
called libpcre, libpcreposix, and libpcrecpp. Previously, everything was
included in a single dll.
Another important change is that the dftables auxiliary program is no longer
compiled and run at "make" time by default. Instead, a default set of character
tables (assuming ASCII coding) is used. If you want to use dftables to generate
the character tables as previously, add --enable-rebuild-chartables to the
"configure" command. You must do this if you are compiling PCRE to run on a
system that uses EBCDIC code.
There is a discussion about character tables in the README file. The default is
not to use dftables so that that there is no problem when cross-compiling.
Release 7.0 19-Dec-06
---------------------
This release has a new major number because there have been some internal
upheavals to facilitate the addition of new optimizations and other facilities,
and to make subsequent maintenance and extension easier. Compilation is likely
to be a bit slower, but there should be no major effect on runtime performance.
Previously compiled patterns are NOT upwards compatible with this release. If
you have saved compiled patterns from a previous release, you will have to
re-compile them. Important changes that are visible to users are:
1. The Unicode property tables have been updated to Unicode 5.0.0, which adds
some more scripts.
2. The option PCRE_NEWLINE_ANY causes PCRE to recognize any Unicode newline
sequence as a newline.
3. The \R escape matches a single Unicode newline sequence as a single unit.
4. New features that will appear in Perl 5.10 are now in PCRE. These include
alternative Perl syntax for named parentheses, and Perl syntax for
recursion.
5. The C++ wrapper interface has been extended by the addition of a
QuoteMeta function and the ability to allow copy construction and
assignment.
For a complete list of changes, see the ChangeLog file.
Release 6.7 04-Jul-06
---------------------
The main additions to this release are the ability to use the same name for
multiple sets of parentheses, and support for CRLF line endings in both the
library and pcregrep (and in pcretest for testing).
Thanks to Ian Taylor, the stack usage for many kinds of pattern has been
significantly reduced for certain subject strings.
Release 6.5 01-Feb-06
---------------------
Important changes in this release:
1. A number of new features have been added to pcregrep.
2. The Unicode property tables have been updated to Unicode 4.1.0, and the
supported properties have been extended with script names such as "Arabic",
and the derived properties "Any" and "L&". This has necessitated a change to
the interal format of compiled patterns. Any saved compiled patterns that
use \p or \P must be recompiled.
3. The specification of recursion in patterns has been changed so that all
recursive subpatterns are automatically treated as atomic groups. Thus, for
example, (?R) is treated as if it were (?>(?R)). This is necessary because
otherwise there are situations where recursion does not work.
See the ChangeLog for a complete list of changes, which include a number of bug
fixes and tidies.
Release 6.0 07-Jun-05
---------------------
The release number has been increased to 6.0 because of the addition of several
major new pieces of functionality.
A new function, pcre_dfa_exec(), which implements pattern matching using a DFA
algorithm, has been added. This has a number of advantages for certain cases,
though it does run more slowly, and lacks the ability to capture substrings. On
the other hand, it does find all matches, not just the first, and it works
better for partial matching. The pcrematching man page discusses the
differences.
The pcretest program has been enhanced so that it can make use of the new
pcre_dfa_exec() matching function and the extra features it provides.
The distribution now includes a C++ wrapper library. This is built
automatically if a C++ compiler is found. The pcrecpp man page discusses this
interface.
The code itself has been re-organized into many more files, one for each
function, so it no longer requires everything to be linked in when static
linkage is used. As a consequence, some internal functions have had to have
their names exposed. These functions all have names starting with _pcre_. They
are undocumented, and are not intended for use by outside callers.
The pcregrep program has been enhanced with new functionality such as
multiline-matching and options for output more matching context. See the
ChangeLog for a complete list of changes to the library and the utility
programs.
Release 5.0 13-Sep-04
---------------------
The licence under which PCRE is released has been changed to the more
conventional "BSD" licence.
In the code, some bugs have been fixed, and there are also some major changes
in this release (which is why I've increased the number to 5.0). Some changes
are internal rearrangements, and some provide a number of new facilities. The
new features are:
1. There's an "automatic callout" feature that inserts callouts before every
item in the regex, and there's a new callout field that gives the position
in the pattern - useful for debugging and tracing.
2. The extra_data structure can now be used to pass in a set of character
tables at exec time. This is useful if compiled regex are saved and re-used
at a later time when the tables may not be at the same address. If the
default internal tables are used, the pointer saved with the compiled
pattern is now set to NULL, which means that you don't need to do anything
special unless you are using custom tables.
3. It is possible, with some restrictions on the content of the regex, to
request "partial" matching. A special return code is given if all of the
subject string matched part of the regex. This could be useful for testing
an input field as it is being typed.
4. There is now some optional support for Unicode character properties, which
means that the patterns items such as \p{Lu} and \X can now be used. Only
the general category properties are supported. If PCRE is compiled with this
support, an additional 90K data structure is include, which increases the
size of the library dramatically.
5. There is support for saving compiled patterns and re-using them later.
6. There is support for running regular expressions that were compiled on a
different host with the opposite endianness.
7. The pcretest program has been extended to accommodate the new features.
The main internal rearrangement is that sequences of literal characters are no
longer handled as strings. Instead, each character is handled on its own. This
makes some UTF-8 handling easier, and makes the support of partial matching
possible. Compiled patterns containing long literal strings will be larger as a
result of this change; I hope that performance will not be much affected.
Release 4.5 01-Dec-03
---------------------
Again mainly a bug-fix and tidying release, with only a couple of new features:
1. It's possible now to compile PCRE so that it does not use recursive
function calls when matching. Instead it gets memory from the heap. This slows
things down, but may be necessary on systems with limited stacks.
2. UTF-8 string checking has been tightened to reject overlong sequences and to
check that a starting offset points to the start of a character. Failure of the
latter returns a new error code: PCRE_ERROR_BADUTF8_OFFSET.
3. PCRE can now be compiled for systems that use EBCDIC code.
Release 4.4 21-Aug-03
---------------------
This is mainly a bug-fix and tidying release. The only new feature is that PCRE
checks UTF-8 strings for validity by default. There is an option to suppress
this, just in case anybody wants that teeny extra bit of performance.
Releases 4.1 - 4.3
------------------
Sorry, I forgot about updating the NEWS file for these releases. Please take a
look at ChangeLog.
Release 4.0 17-Feb-03
---------------------
There have been a lot of changes for the 4.0 release, adding additional
functionality and mending bugs. Below is a list of the highlights of the new
functionality. For full details of these features, please consult the
documentation. For a complete list of changes, see the ChangeLog file.
1. Support for Perl's \Q...\E escapes.
2. "Possessive quantifiers" ?+, *+, ++, and {,}+ which come from Sun's Java
package. They provide some syntactic sugar for simple cases of "atomic
grouping".
3. Support for the \G assertion. It is true when the current matching position
is at the start point of the match.
4. A new feature that provides some of the functionality that Perl provides
with (?{...}). The facility is termed a "callout". The way it is done in PCRE
is for the caller to provide an optional function, by setting pcre_callout to
its entry point. To get the function called, the regex must include (?C) at
appropriate points.
5. Support for recursive calls to individual subpatterns. This makes it really
easy to get totally confused.
6. Support for named subpatterns. The Python syntax (?P<name>...) is used to
name a group.
7. Several extensions to UTF-8 support; it is now fairly complete. There is an
option for pcregrep to make it operate in UTF-8 mode.
8. The single man page has been split into a number of separate man pages.
These also give rise to individual HTML pages which are put in a separate
directory. There is an index.html page that lists them all. Some hyperlinking
between the pages has been installed.
Release 3.5 15-Aug-01
---------------------
1. The configuring system has been upgraded to use later versions of autoconf
and libtool. By default it builds both a shared and a static library if the OS
supports it. You can use --disable-shared or --disable-static on the configure
command if you want only one of them.
2. The pcretest utility is now installed along with pcregrep because it is
useful for users (to test regexs) and by doing this, it automatically gets
relinked by libtool. The documentation has been turned into a man page, so
there are now .1, .txt, and .html versions in /doc.
3. Upgrades to pcregrep:
(i) Added long-form option names like gnu grep.
(ii) Added --help to list all options with an explanatory phrase.
(iii) Added -r, --recursive to recurse into sub-directories.
(iv) Added -f, --file to read patterns from a file.
4. Added --enable-newline-is-cr and --enable-newline-is-lf to the configure
script, to force use of CR or LF instead of \n in the source. On non-Unix
systems, the value can be set in config.h.
5. The limit of 200 on non-capturing parentheses is a _nesting_ limit, not an
absolute limit. Changed the text of the error message to make this clear, and
likewise updated the man page.
6. The limit of 99 on the number of capturing subpatterns has been removed.
The new limit is 65535, which I hope will not be a "real" limit.
Release 3.3 01-Aug-00
---------------------
There is some support for UTF-8 character strings. This is incomplete and
experimental. The documentation describes what is and what is not implemented.
Otherwise, this is just a bug-fixing release.
Release 3.0 01-Feb-00
---------------------
1. A "configure" script is now used to configure PCRE for Unix systems. It
builds a Makefile, a config.h file, and the pcre-config script.
2. PCRE is built as a shared library by default.
3. There is support for POSIX classes such as [:alpha:].
5. There is an experimental recursion feature.
----------------------------------------------------------------------------
IMPORTANT FOR THOSE UPGRADING FROM VERSIONS BEFORE 2.00
Please note that there has been a change in the API such that a larger
ovector is required at matching time, to provide some additional workspace.
The new man page has details. This change was necessary in order to support
some of the new functionality in Perl 5.005.
IMPORTANT FOR THOSE UPGRADING FROM VERSION 2.00
Another (I hope this is the last!) change has been made to the API for the
pcre_compile() function. An additional argument has been added to make it
possible to pass over a pointer to character tables built in the current
locale by pcre_maketables(). To use the default tables, this new argument
should be passed as NULL.
IMPORTANT FOR THOSE UPGRADING FROM VERSION 2.05
Yet another (and again I hope this really is the last) change has been made
to the API for the pcre_exec() function. An additional argument has been
added to make it possible to start the match other than at the start of the
subject string. This is important if there are lookbehinds. The new man
page has the details, but you just want to convert existing programs, all
you need to do is to stick in a new fifth argument to pcre_exec(), with a
value of zero. For example, change
pcre_exec(pattern, extra, subject, length, options, ovec, ovecsize)
to
pcre_exec(pattern, extra, subject, length, 0, options, ovec, ovecsize)
****

View file

@ -1,7 +0,0 @@
Compiling PCRE on non-Unix systems
----------------------------------
This has been renamed to better reflect its contents. Please see the file
NON-AUTOTOOLS-BUILD for details of how to build PCRE without using autotools.
####

File diff suppressed because it is too large Load diff

View file

@ -1,459 +0,0 @@
#include <php_compat.h>
#ifdef PHP_WIN32
# include <config.w32.h>
#else
# include <php_config.h>
#endif
#undef PACKAGE_NAME
#undef PACKAGE_VERSION
#undef PACKAGE_TARNAME
#undef PACKAGE_STRING
#define SUPPORT_UCP
#define SUPPORT_UTF8
#if defined(__GNUC__) && __GNUC__ >= 4
# ifdef __cplusplus
# define PCRE_EXP_DECL extern "C" __attribute__ ((visibility("default")))
# else
# define PCRE_EXP_DECL extern __attribute__ ((visibility("default")))
# endif
# define PCRE_EXP_DEFN __attribute__ ((visibility("default")))
# define PCRE_EXP_DATA_DEFN __attribute__ ((visibility("default")))
#endif
/* Exclude these below definitions when building within PHP */
#ifndef ZEND_API
/* config.h. Generated from config.h.in by configure. */
/* config.h.in. Generated from configure.ac by autoheader. */
/* PCRE is written in Standard C, but there are a few non-standard things it
can cope with, allowing it to run on SunOS4 and other "close to standard"
systems.
In environments that support the facilities, config.h.in is converted by
"configure", or config-cmake.h.in is converted by CMake, into config.h. If you
are going to build PCRE "by hand" without using "configure" or CMake, you
should copy the distributed config.h.generic to config.h, and then edit the
macro definitions to be the way you need them. You must then add
-DHAVE_CONFIG_H to all of your compile commands, so that config.h is included
at the start of every source.
Alternatively, you can avoid editing by using -D on the compiler command line
to set the macro values. In this case, you do not have to set -DHAVE_CONFIG_H.
PCRE uses memmove() if HAVE_MEMMOVE is set to 1; otherwise it uses bcopy() if
HAVE_BCOPY is set to 1. If your system has neither bcopy() nor memmove(), set
them both to 0; an emulation function will be used. */
/* By default, the \R escape sequence matches any Unicode line ending
character or sequence of characters. If BSR_ANYCRLF is defined (to any
value), this is changed so that backslash-R matches only CR, LF, or CRLF.
The build-time default can be overridden by the user of PCRE at runtime. */
#undef BSR_ANYCRLF
/* If you are compiling for a system that uses EBCDIC instead of ASCII
character codes, define this macro to any value. You must also edit the
NEWLINE macro below to set a suitable EBCDIC newline, commonly 21 (0x15).
On systems that can use "configure" or CMake to set EBCDIC, NEWLINE is
automatically adjusted. When EBCDIC is set, PCRE assumes that all input
strings are in EBCDIC. If you do not define this macro, PCRE will assume
input strings are ASCII or UTF-8/16/32 Unicode. It is not possible to build
a version of PCRE that supports both EBCDIC and UTF-8/16/32. */
#undef EBCDIC
/* In an EBCDIC environment, define this macro to any value to arrange for the
NL character to be 0x25 instead of the default 0x15. NL plays the role that
LF does in an ASCII/Unicode environment. The value must also be set in the
NEWLINE macro below. On systems that can use "configure" or CMake to set
EBCDIC_NL25, the adjustment of NEWLINE is automatic. */
#undef EBCDIC_NL25
/* Define to 1 if you have the `bcopy' function. */
#ifndef HAVE_BCOPY
#define HAVE_BCOPY 1
#endif
/* Define to 1 if you have the <bits/type_traits.h> header file. */
/* #undef HAVE_BITS_TYPE_TRAITS_H */
/* Define to 1 if you have the <bzlib.h> header file. */
#ifndef HAVE_BZLIB_H
#define HAVE_BZLIB_H 1
#endif
/* Define to 1 if you have the <dirent.h> header file. */
#ifndef HAVE_DIRENT_H
#define HAVE_DIRENT_H 1
#endif
/* Define to 1 if you have the <dlfcn.h> header file. */
#ifndef HAVE_DLFCN_H
#define HAVE_DLFCN_H 1
#endif
/* Define to 1 if you have the <editline/readline.h> header file. */
/*#undef HAVE_EDITLINE_READLINE_H*/
/* Define to 1 if you have the <edit/readline/readline.h> header file. */
/* #undef HAVE_EDIT_READLINE_READLINE_H */
/* Define to 1 if you have the <inttypes.h> header file. */
#ifndef HAVE_INTTYPES_H
#define HAVE_INTTYPES_H 1
#endif
/* Define to 1 if you have the <limits.h> header file. */
#ifndef HAVE_LIMITS_H
#define HAVE_LIMITS_H 1
#endif
/* Define to 1 if the system has the type `long long'. */
#ifndef HAVE_LONG_LONG
#define HAVE_LONG_LONG 1
#endif
/* Define to 1 if you have the `memmove' function. */
#ifndef HAVE_MEMMOVE
#define HAVE_MEMMOVE 1
#endif
/* Define to 1 if you have the <memory.h> header file. */
#ifndef HAVE_MEMORY_H
#define HAVE_MEMORY_H 1
#endif
/* Define if you have POSIX threads libraries and header files. */
#undef HAVE_PTHREAD
/* Have PTHREAD_PRIO_INHERIT. */
#undef HAVE_PTHREAD_PRIO_INHERIT
/* Define to 1 if you have the <readline/history.h> header file. */
#ifndef HAVE_READLINE_HISTORY_H
#define HAVE_READLINE_HISTORY_H 1
#endif
/* Define to 1 if you have the <readline/readline.h> header file. */
#ifndef HAVE_READLINE_READLINE_H
#define HAVE_READLINE_READLINE_H 1
#endif
/* Define to 1 if you have the <stdint.h> header file. */
#ifndef HAVE_STDINT_H
#define HAVE_STDINT_H 1
#endif
/* Define to 1 if you have the <stdlib.h> header file. */
#ifndef HAVE_STDLIB_H
#define HAVE_STDLIB_H 1
#endif
/* Define to 1 if you have the `strerror' function. */
#ifndef HAVE_STRERROR
#define HAVE_STRERROR 1
#endif
/* Define to 1 if you have the <string> header file. */
#ifndef HAVE_STRING
#define HAVE_STRING 1
#endif
/* Define to 1 if you have the <strings.h> header file. */
#ifndef HAVE_STRINGS_H
#define HAVE_STRINGS_H 1
#endif
/* Define to 1 if you have the <string.h> header file. */
#ifndef HAVE_STRING_H
#define HAVE_STRING_H 1
#endif
/* Define to 1 if you have `strtoimax'. */
/* #undef HAVE_STRTOIMAX */
/* Define to 1 if you have `strtoll'. */
/* #undef HAVE_STRTOLL */
/* Define to 1 if you have `strtoq'. */
#ifndef HAVE_STRTOQ
#define HAVE_STRTOQ 1
#endif
/* Define to 1 if you have the <sys/stat.h> header file. */
#ifndef HAVE_SYS_STAT_H
#define HAVE_SYS_STAT_H 1
#endif
/* Define to 1 if you have the <sys/types.h> header file. */
#ifndef HAVE_SYS_TYPES_H
#define HAVE_SYS_TYPES_H 1
#endif
/* Define to 1 if you have the <type_traits.h> header file. */
/* #undef HAVE_TYPE_TRAITS_H */
/* Define to 1 if you have the <unistd.h> header file. */
#ifndef HAVE_UNISTD_H
#define HAVE_UNISTD_H 1
#endif
/* Define to 1 if the system has the type `unsigned long long'. */
#ifndef HAVE_UNSIGNED_LONG_LONG
#define HAVE_UNSIGNED_LONG_LONG 1
#endif
/* Define to 1 or 0, depending whether the compiler supports simple visibility
declarations. */
/* #undef HAVE_VISIBILITY */
/* Define to 1 if you have the <windows.h> header file. */
/* #undef HAVE_WINDOWS_H */
/* Define to 1 if you have the <zlib.h> header file. */
#ifndef HAVE_ZLIB_H
#define HAVE_ZLIB_H 1
#endif
/* Define to 1 if you have `_strtoi64'. */
/* #undef HAVE__STRTOI64 */
/* Exclude these above definitions when building within PHP */
#endif
/* The value of LINK_SIZE determines the number of bytes used to store links
as offsets within the compiled regex. The default is 2, which allows for
compiled patterns up to 64K long. This covers the vast majority of cases.
However, PCRE can also be compiled to use 3 or 4 bytes instead. This allows
for longer patterns in extreme cases. On systems that support it,
"configure" can be used to override this default. */
#ifndef LINK_SIZE
#define LINK_SIZE 2
#endif
/* Define to the sub-directory where libtool stores uninstalled libraries. */
/* This is ignored unless you are using libtool. */
#ifndef LT_OBJDIR
#define LT_OBJDIR ".libs/"
#endif
/* The value of MATCH_LIMIT determines the default number of times the
internal match() function can be called during a single execution of
pcre_exec(). There is a runtime interface for setting a different limit.
The limit exists in order to catch runaway regular expressions that take
for ever to determine that they do not match. The default is set very large
so that it does not accidentally catch legitimate cases. On systems that
support it, "configure" can be used to override this default default. */
#ifndef MATCH_LIMIT
#define MATCH_LIMIT 10000000
#endif
/* The above limit applies to all calls of match(), whether or not they
increase the recursion depth. In some environments it is desirable to limit
the depth of recursive calls of match() more strictly, in order to restrict
the maximum amount of stack (or heap, if NO_RECURSE is defined) that is
used. The value of MATCH_LIMIT_RECURSION applies only to recursive calls of
match(). To have any useful effect, it must be less than the value of
MATCH_LIMIT. The default is to use the same value as MATCH_LIMIT. There is
a runtime method for setting a different limit. On systems that support it,
"configure" can be used to override the default. */
#ifndef MATCH_LIMIT_RECURSION
#define MATCH_LIMIT_RECURSION MATCH_LIMIT
#endif
/* This limit is parameterized just in case anybody ever wants to change it.
Care must be taken if it is increased, because it guards against integer
overflow caused by enormously large patterns. */
#ifndef MAX_NAME_COUNT
#define MAX_NAME_COUNT 10000
#endif
/* This limit is parameterized just in case anybody ever wants to change it.
Care must be taken if it is increased, because it guards against integer
overflow caused by enormously large patterns. */
#ifndef MAX_NAME_SIZE
#define MAX_NAME_SIZE 32
#endif
/* The value of NEWLINE determines the default newline character sequence.
PCRE client programs can override this by selecting other values at run
time. In ASCII environments, the value can be 10 (LF), 13 (CR), or 3338
(CRLF); in EBCDIC environments the value can be 21 or 37 (LF), 13 (CR), or
3349 or 3365 (CRLF) because there are two alternative codepoints (0x15 and
0x25) that are used as the NL line terminator that is equivalent to ASCII
LF. In both ASCII and EBCDIC environments the value can also be -1 (ANY),
or -2 (ANYCRLF). */
#ifndef NEWLINE
#define NEWLINE 10
#endif
/* Define to 1 if your C compiler doesn't accept -c and -o together. */
/* #undef NO_MINUS_C_MINUS_O */
/* PCRE uses recursive function calls to handle backtracking while matching.
This can sometimes be a problem on systems that have stacks of limited
size. Define NO_RECURSE to any value to get a version that doesn't use
recursion in the match() function; instead it creates its own stack by
steam using pcre_recurse_malloc() to obtain memory from the heap. For more
detail, see the comments and other stuff just above the match() function.
*/
/* #undef NO_RECURSE */
#define PARENS_NEST_LIMIT 250
/* Name of package */
#define PACKAGE "pcre"
/* Define to the address where bug reports for this package should be sent. */
#define PACKAGE_BUGREPORT ""
/* Define to the full name of this package. */
#define PACKAGE_NAME "PCRE"
/* Define to the full name and version of this package. */
#define PACKAGE_STRING "PCRE 8.41"
/* Define to the one symbol short name of this package. */
#define PACKAGE_TARNAME "pcre"
/* Define to the home page for this package. */
#define PACKAGE_URL ""
/* Define to the version of this package. */
#define PACKAGE_VERSION "8.41"
/* to make a symbol visible */
/* #undef PCRECPP_EXP_DECL */
/* to make a symbol visible */
/* #undef PCRECPP_EXP_DEFN */
/* The value of PARENS_NEST_LIMIT specifies the maximum depth of nested
parentheses (of any kind) in a pattern. This limits the amount of system
stack that is used while compiling a pattern. */
#ifndef PARENS_NEST_LIMIT
#define PARENS_NEST_LIMIT 250
#endif
/* The value of PCREGREP_BUFSIZE determines the size of buffer used by
pcregrep to hold parts of the file it is searching. This is also the
minimum value. The actual amount of memory used by pcregrep is three times
this number, because it allows for the buffering of "before" and "after"
lines. */
/* #undef PCREGREP_BUFSIZE */
/* to make a symbol visible */
/* #undef PCREPOSIX_EXP_DECL */
/* to make a symbol visible */
/* #undef PCREPOSIX_EXP_DEFN */
/* to make a symbol visible */
/* #undef PCRE_EXP_DATA_DEFN */
/* to make a symbol visible */
/* #undef PCRE_EXP_DECL */
/* If you are compiling for a system other than a Unix-like system or
Win32, and it needs some magic to be inserted before the definition
of a function that is exported by the library, define this macro to
contain the relevant magic. If you do not define this macro, a suitable
__declspec value is used for Windows systems; in other environments
"extern" is used for a C compiler and "extern C" for a C++ compiler.
This macro apears at the start of every exported function that is part
of the external API. It does not appear on functions that are "external"
in the C sense, but which are internal to the library. */
/* #undef PCRE_EXP_DEFN */
/* Define to any value if linking statically (TODO: make nice with Libtool) */
/* #undef PCRE_STATIC */
/* When calling PCRE via the POSIX interface, additional working storage is
required for holding the pointers to capturing substrings because PCRE
requires three integers per substring, whereas the POSIX interface provides
only two. If the number of expected substrings is small, the wrapper
function uses space on the stack, because this is faster than using
malloc() for each call. The threshold above which the stack is no longer
used is defined by POSIX_MALLOC_THRESHOLD. */
#ifndef POSIX_MALLOC_THRESHOLD
#define POSIX_MALLOC_THRESHOLD 10
#endif
/* Define to necessary symbol if this constant uses a non-standard name on
your system. */
/* #undef PTHREAD_CREATE_JOINABLE */
/* Define to 1 if you have the ANSI C header files. */
#ifndef STDC_HEADERS
#define STDC_HEADERS 1
#endif
/* Define to allow pcretest and pcregrep to be linked with gcov, so that they
are able to generate code coverage reports. */
#undef SUPPORT_GCOV
/* Define to any value to enable support for Just-In-Time compiling. */
#if HAVE_PCRE_JIT_SUPPORT
#define SUPPORT_JIT
#endif
/* Define to any value to allow pcregrep to be linked with libbz2, so that it
is able to handle .bz2 files. */
/* #undef SUPPORT_LIBBZ2 */
/* Define to any value to allow pcretest to be linked with libedit. */
#undef SUPPORT_LIBEDIT
/* Define to any value to allow pcretest to be linked with libreadline. */
/* #undef SUPPORT_LIBREADLINE */
/* Define to any value to allow pcregrep to be linked with libz, so that it is
able to handle .gz files. */
/* #undef SUPPORT_LIBZ */
/* Define to any value to enable the 16 bit PCRE library. */
/* #undef SUPPORT_PCRE16 */
/* Define to any value to enable the 32 bit PCRE library. */
/* #undef SUPPORT_PCRE32 */
/* Define to any value to enable the 8 bit PCRE library. */
/* #undef SUPPORT_PCRE8 */
/* Define to any value to enable JIT support in pcregrep. */
/* #undef SUPPORT_PCREGREP_JIT */
/* Define to enable support for Unicode properties */
/* #undef SUPPORT_UCP */
/* Define to any value to enable support for the UTF-8/16/32 Unicode encoding.
This will work even in an EBCDIC environment, but it is incompatible with
the EBCDIC macro. That is, PCRE can support *either* EBCDIC code *or*
ASCII/UTF-8/16/32, but not both at once. */
/* #undef SUPPORT_UTF8 */
/* Valgrind support to find invalid memory reads. */
#if HAVE_PCRE_VALGRIND_SUPPORT
#define SUPPORT_VALGRIND 1
#endif
/* Version number of package */
#ifndef VERSION
#define VERSION "8.41"
#endif
/* Define to empty if `const' does not conform to ANSI C. */
/* #undef const */
/* Define to the type of a signed integer type of width exactly 64 bits if
such a type exists and the standard includes do not define it. */
/* #undef int64_t */
/* Define to `unsigned int' if <sys/types.h> does not define. */
/* #undef size_t */

File diff suppressed because it is too large Load diff

View file

@ -1,677 +0,0 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* This is the public header file for the PCRE library, to be #included by
applications that call the PCRE functions.
Copyright (c) 1997-2014 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
#ifndef _PCRE_H
#define _PCRE_H
/* The current PCRE version information. */
#define PCRE_MAJOR 8
#define PCRE_MINOR 41
#define PCRE_PRERELEASE
#define PCRE_DATE 2017-07-05
/* When an application links to a PCRE DLL in Windows, the symbols that are
imported have to be identified as such. When building PCRE, the appropriate
export setting is defined in pcre_internal.h, which includes this file. So we
don't change existing definitions of PCRE_EXP_DECL and PCRECPP_EXP_DECL. */
#if defined(_WIN32) && !defined(PCRE_STATIC)
# ifndef PCRE_EXP_DECL
# define PCRE_EXP_DECL extern __declspec(dllimport)
# endif
# ifdef __cplusplus
# ifndef PCRECPP_EXP_DECL
# define PCRECPP_EXP_DECL extern __declspec(dllimport)
# endif
# ifndef PCRECPP_EXP_DEFN
# define PCRECPP_EXP_DEFN __declspec(dllimport)
# endif
# endif
#endif
/* By default, we use the standard "extern" declarations. */
#ifndef PCRE_EXP_DECL
# ifdef __cplusplus
# define PCRE_EXP_DECL extern "C"
# else
# define PCRE_EXP_DECL extern
# endif
#endif
#ifdef __cplusplus
# ifndef PCRECPP_EXP_DECL
# define PCRECPP_EXP_DECL extern
# endif
# ifndef PCRECPP_EXP_DEFN
# define PCRECPP_EXP_DEFN
# endif
#endif
/* Have to include stdlib.h in order to ensure that size_t is defined;
it is needed here for malloc. */
#include <stdlib.h>
/* Allow for C++ users */
#ifdef __cplusplus
extern "C" {
#endif
/* Public options. Some are compile-time only, some are run-time only, and some
are both. Most of the compile-time options are saved with the compiled regex so
that they can be inspected during studying (and therefore JIT compiling). Note
that pcre_study() has its own set of options. Originally, all the options
defined here used distinct bits. However, almost all the bits in a 32-bit word
are now used, so in order to conserve them, option bits that were previously
only recognized at matching time (i.e. by pcre_exec() or pcre_dfa_exec()) may
also be used for compile-time options that affect only compiling and are not
relevant for studying or JIT compiling.
Some options for pcre_compile() change its behaviour but do not affect the
behaviour of the execution functions. Other options are passed through to the
execution functions and affect their behaviour, with or without affecting the
behaviour of pcre_compile().
Options that can be passed to pcre_compile() are tagged Cx below, with these
variants:
C1 Affects compile only
C2 Does not affect compile; affects exec, dfa_exec
C3 Affects compile, exec, dfa_exec
C4 Affects compile, exec, dfa_exec, study
C5 Affects compile, exec, study
Options that can be set for pcre_exec() and/or pcre_dfa_exec() are flagged with
E and D, respectively. They take precedence over C3, C4, and C5 settings passed
from pcre_compile(). Those that are compatible with JIT execution are flagged
with J. */
#define PCRE_CASELESS 0x00000001 /* C1 */
#define PCRE_MULTILINE 0x00000002 /* C1 */
#define PCRE_DOTALL 0x00000004 /* C1 */
#define PCRE_EXTENDED 0x00000008 /* C1 */
#define PCRE_ANCHORED 0x00000010 /* C4 E D */
#define PCRE_DOLLAR_ENDONLY 0x00000020 /* C2 */
#define PCRE_EXTRA 0x00000040 /* C1 */
#define PCRE_NOTBOL 0x00000080 /* E D J */
#define PCRE_NOTEOL 0x00000100 /* E D J */
#define PCRE_UNGREEDY 0x00000200 /* C1 */
#define PCRE_NOTEMPTY 0x00000400 /* E D J */
#define PCRE_UTF8 0x00000800 /* C4 ) */
#define PCRE_UTF16 0x00000800 /* C4 ) Synonyms */
#define PCRE_UTF32 0x00000800 /* C4 ) */
#define PCRE_NO_AUTO_CAPTURE 0x00001000 /* C1 */
#define PCRE_NO_UTF8_CHECK 0x00002000 /* C1 E D J ) */
#define PCRE_NO_UTF16_CHECK 0x00002000 /* C1 E D J ) Synonyms */
#define PCRE_NO_UTF32_CHECK 0x00002000 /* C1 E D J ) */
#define PCRE_AUTO_CALLOUT 0x00004000 /* C1 */
#define PCRE_PARTIAL_SOFT 0x00008000 /* E D J ) Synonyms */
#define PCRE_PARTIAL 0x00008000 /* E D J ) */
/* This pair use the same bit. */
#define PCRE_NEVER_UTF 0x00010000 /* C1 ) Overlaid */
#define PCRE_DFA_SHORTEST 0x00010000 /* D ) Overlaid */
/* This pair use the same bit. */
#define PCRE_NO_AUTO_POSSESS 0x00020000 /* C1 ) Overlaid */
#define PCRE_DFA_RESTART 0x00020000 /* D ) Overlaid */
#define PCRE_FIRSTLINE 0x00040000 /* C3 */
#define PCRE_DUPNAMES 0x00080000 /* C1 */
#define PCRE_NEWLINE_CR 0x00100000 /* C3 E D */
#define PCRE_NEWLINE_LF 0x00200000 /* C3 E D */
#define PCRE_NEWLINE_CRLF 0x00300000 /* C3 E D */
#define PCRE_NEWLINE_ANY 0x00400000 /* C3 E D */
#define PCRE_NEWLINE_ANYCRLF 0x00500000 /* C3 E D */
#define PCRE_BSR_ANYCRLF 0x00800000 /* C3 E D */
#define PCRE_BSR_UNICODE 0x01000000 /* C3 E D */
#define PCRE_JAVASCRIPT_COMPAT 0x02000000 /* C5 */
#define PCRE_NO_START_OPTIMIZE 0x04000000 /* C2 E D ) Synonyms */
#define PCRE_NO_START_OPTIMISE 0x04000000 /* C2 E D ) */
#define PCRE_PARTIAL_HARD 0x08000000 /* E D J */
#define PCRE_NOTEMPTY_ATSTART 0x10000000 /* E D J */
#define PCRE_UCP 0x20000000 /* C3 */
/* Exec-time and get/set-time error codes */
#define PCRE_ERROR_NOMATCH (-1)
#define PCRE_ERROR_NULL (-2)
#define PCRE_ERROR_BADOPTION (-3)
#define PCRE_ERROR_BADMAGIC (-4)
#define PCRE_ERROR_UNKNOWN_OPCODE (-5)
#define PCRE_ERROR_UNKNOWN_NODE (-5) /* For backward compatibility */
#define PCRE_ERROR_NOMEMORY (-6)
#define PCRE_ERROR_NOSUBSTRING (-7)
#define PCRE_ERROR_MATCHLIMIT (-8)
#define PCRE_ERROR_CALLOUT (-9) /* Never used by PCRE itself */
#define PCRE_ERROR_BADUTF8 (-10) /* Same for 8/16/32 */
#define PCRE_ERROR_BADUTF16 (-10) /* Same for 8/16/32 */
#define PCRE_ERROR_BADUTF32 (-10) /* Same for 8/16/32 */
#define PCRE_ERROR_BADUTF8_OFFSET (-11) /* Same for 8/16 */
#define PCRE_ERROR_BADUTF16_OFFSET (-11) /* Same for 8/16 */
#define PCRE_ERROR_PARTIAL (-12)
#define PCRE_ERROR_BADPARTIAL (-13)
#define PCRE_ERROR_INTERNAL (-14)
#define PCRE_ERROR_BADCOUNT (-15)
#define PCRE_ERROR_DFA_UITEM (-16)
#define PCRE_ERROR_DFA_UCOND (-17)
#define PCRE_ERROR_DFA_UMLIMIT (-18)
#define PCRE_ERROR_DFA_WSSIZE (-19)
#define PCRE_ERROR_DFA_RECURSE (-20)
#define PCRE_ERROR_RECURSIONLIMIT (-21)
#define PCRE_ERROR_NULLWSLIMIT (-22) /* No longer actually used */
#define PCRE_ERROR_BADNEWLINE (-23)
#define PCRE_ERROR_BADOFFSET (-24)
#define PCRE_ERROR_SHORTUTF8 (-25)
#define PCRE_ERROR_SHORTUTF16 (-25) /* Same for 8/16 */
#define PCRE_ERROR_RECURSELOOP (-26)
#define PCRE_ERROR_JIT_STACKLIMIT (-27)
#define PCRE_ERROR_BADMODE (-28)
#define PCRE_ERROR_BADENDIANNESS (-29)
#define PCRE_ERROR_DFA_BADRESTART (-30)
#define PCRE_ERROR_JIT_BADOPTION (-31)
#define PCRE_ERROR_BADLENGTH (-32)
#define PCRE_ERROR_UNSET (-33)
/* Specific error codes for UTF-8 validity checks */
#define PCRE_UTF8_ERR0 0
#define PCRE_UTF8_ERR1 1
#define PCRE_UTF8_ERR2 2
#define PCRE_UTF8_ERR3 3
#define PCRE_UTF8_ERR4 4
#define PCRE_UTF8_ERR5 5
#define PCRE_UTF8_ERR6 6
#define PCRE_UTF8_ERR7 7
#define PCRE_UTF8_ERR8 8
#define PCRE_UTF8_ERR9 9
#define PCRE_UTF8_ERR10 10
#define PCRE_UTF8_ERR11 11
#define PCRE_UTF8_ERR12 12
#define PCRE_UTF8_ERR13 13
#define PCRE_UTF8_ERR14 14
#define PCRE_UTF8_ERR15 15
#define PCRE_UTF8_ERR16 16
#define PCRE_UTF8_ERR17 17
#define PCRE_UTF8_ERR18 18
#define PCRE_UTF8_ERR19 19
#define PCRE_UTF8_ERR20 20
#define PCRE_UTF8_ERR21 21
#define PCRE_UTF8_ERR22 22 /* Unused (was non-character) */
/* Specific error codes for UTF-16 validity checks */
#define PCRE_UTF16_ERR0 0
#define PCRE_UTF16_ERR1 1
#define PCRE_UTF16_ERR2 2
#define PCRE_UTF16_ERR3 3
#define PCRE_UTF16_ERR4 4 /* Unused (was non-character) */
/* Specific error codes for UTF-32 validity checks */
#define PCRE_UTF32_ERR0 0
#define PCRE_UTF32_ERR1 1
#define PCRE_UTF32_ERR2 2 /* Unused (was non-character) */
#define PCRE_UTF32_ERR3 3
/* Request types for pcre_fullinfo() */
#define PCRE_INFO_OPTIONS 0
#define PCRE_INFO_SIZE 1
#define PCRE_INFO_CAPTURECOUNT 2
#define PCRE_INFO_BACKREFMAX 3
#define PCRE_INFO_FIRSTBYTE 4
#define PCRE_INFO_FIRSTCHAR 4 /* For backwards compatibility */
#define PCRE_INFO_FIRSTTABLE 5
#define PCRE_INFO_LASTLITERAL 6
#define PCRE_INFO_NAMEENTRYSIZE 7
#define PCRE_INFO_NAMECOUNT 8
#define PCRE_INFO_NAMETABLE 9
#define PCRE_INFO_STUDYSIZE 10
#define PCRE_INFO_DEFAULT_TABLES 11
#define PCRE_INFO_OKPARTIAL 12
#define PCRE_INFO_JCHANGED 13
#define PCRE_INFO_HASCRORLF 14
#define PCRE_INFO_MINLENGTH 15
#define PCRE_INFO_JIT 16
#define PCRE_INFO_JITSIZE 17
#define PCRE_INFO_MAXLOOKBEHIND 18
#define PCRE_INFO_FIRSTCHARACTER 19
#define PCRE_INFO_FIRSTCHARACTERFLAGS 20
#define PCRE_INFO_REQUIREDCHAR 21
#define PCRE_INFO_REQUIREDCHARFLAGS 22
#define PCRE_INFO_MATCHLIMIT 23
#define PCRE_INFO_RECURSIONLIMIT 24
#define PCRE_INFO_MATCH_EMPTY 25
/* Request types for pcre_config(). Do not re-arrange, in order to remain
compatible. */
#define PCRE_CONFIG_UTF8 0
#define PCRE_CONFIG_NEWLINE 1
#define PCRE_CONFIG_LINK_SIZE 2
#define PCRE_CONFIG_POSIX_MALLOC_THRESHOLD 3
#define PCRE_CONFIG_MATCH_LIMIT 4
#define PCRE_CONFIG_STACKRECURSE 5
#define PCRE_CONFIG_UNICODE_PROPERTIES 6
#define PCRE_CONFIG_MATCH_LIMIT_RECURSION 7
#define PCRE_CONFIG_BSR 8
#define PCRE_CONFIG_JIT 9
#define PCRE_CONFIG_UTF16 10
#define PCRE_CONFIG_JITTARGET 11
#define PCRE_CONFIG_UTF32 12
#define PCRE_CONFIG_PARENS_LIMIT 13
/* Request types for pcre_study(). Do not re-arrange, in order to remain
compatible. */
#define PCRE_STUDY_JIT_COMPILE 0x0001
#define PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE 0x0002
#define PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE 0x0004
#define PCRE_STUDY_EXTRA_NEEDED 0x0008
/* Bit flags for the pcre[16|32]_extra structure. Do not re-arrange or redefine
these bits, just add new ones on the end, in order to remain compatible. */
#define PCRE_EXTRA_STUDY_DATA 0x0001
#define PCRE_EXTRA_MATCH_LIMIT 0x0002
#define PCRE_EXTRA_CALLOUT_DATA 0x0004
#define PCRE_EXTRA_TABLES 0x0008
#define PCRE_EXTRA_MATCH_LIMIT_RECURSION 0x0010
#define PCRE_EXTRA_MARK 0x0020
#define PCRE_EXTRA_EXECUTABLE_JIT 0x0040
/* Types */
struct real_pcre; /* declaration; the definition is private */
typedef struct real_pcre pcre;
struct real_pcre16; /* declaration; the definition is private */
typedef struct real_pcre16 pcre16;
struct real_pcre32; /* declaration; the definition is private */
typedef struct real_pcre32 pcre32;
struct real_pcre_jit_stack; /* declaration; the definition is private */
typedef struct real_pcre_jit_stack pcre_jit_stack;
struct real_pcre16_jit_stack; /* declaration; the definition is private */
typedef struct real_pcre16_jit_stack pcre16_jit_stack;
struct real_pcre32_jit_stack; /* declaration; the definition is private */
typedef struct real_pcre32_jit_stack pcre32_jit_stack;
/* If PCRE is compiled with 16 bit character support, PCRE_UCHAR16 must contain
a 16 bit wide signed data type. Otherwise it can be a dummy data type since
pcre16 functions are not implemented. There is a check for this in pcre_internal.h. */
#ifndef PCRE_UCHAR16
#define PCRE_UCHAR16 unsigned short
#endif
#ifndef PCRE_SPTR16
#define PCRE_SPTR16 const PCRE_UCHAR16 *
#endif
/* If PCRE is compiled with 32 bit character support, PCRE_UCHAR32 must contain
a 32 bit wide signed data type. Otherwise it can be a dummy data type since
pcre32 functions are not implemented. There is a check for this in pcre_internal.h. */
#ifndef PCRE_UCHAR32
#define PCRE_UCHAR32 unsigned int
#endif
#ifndef PCRE_SPTR32
#define PCRE_SPTR32 const PCRE_UCHAR32 *
#endif
/* When PCRE is compiled as a C++ library, the subject pointer type can be
replaced with a custom type. For conventional use, the public interface is a
const char *. */
#ifndef PCRE_SPTR
#define PCRE_SPTR const char *
#endif
/* The structure for passing additional data to pcre_exec(). This is defined in
such as way as to be extensible. Always add new fields at the end, in order to
remain compatible. */
typedef struct pcre_extra {
unsigned long int flags; /* Bits for which fields are set */
void *study_data; /* Opaque data from pcre_study() */
unsigned long int match_limit; /* Maximum number of calls to match() */
void *callout_data; /* Data passed back in callouts */
const unsigned char *tables; /* Pointer to character tables */
unsigned long int match_limit_recursion; /* Max recursive calls to match() */
unsigned char **mark; /* For passing back a mark pointer */
void *executable_jit; /* Contains a pointer to a compiled jit code */
} pcre_extra;
/* Same structure as above, but with 16 bit char pointers. */
typedef struct pcre16_extra {
unsigned long int flags; /* Bits for which fields are set */
void *study_data; /* Opaque data from pcre_study() */
unsigned long int match_limit; /* Maximum number of calls to match() */
void *callout_data; /* Data passed back in callouts */
const unsigned char *tables; /* Pointer to character tables */
unsigned long int match_limit_recursion; /* Max recursive calls to match() */
PCRE_UCHAR16 **mark; /* For passing back a mark pointer */
void *executable_jit; /* Contains a pointer to a compiled jit code */
} pcre16_extra;
/* Same structure as above, but with 32 bit char pointers. */
typedef struct pcre32_extra {
unsigned long int flags; /* Bits for which fields are set */
void *study_data; /* Opaque data from pcre_study() */
unsigned long int match_limit; /* Maximum number of calls to match() */
void *callout_data; /* Data passed back in callouts */
const unsigned char *tables; /* Pointer to character tables */
unsigned long int match_limit_recursion; /* Max recursive calls to match() */
PCRE_UCHAR32 **mark; /* For passing back a mark pointer */
void *executable_jit; /* Contains a pointer to a compiled jit code */
} pcre32_extra;
/* The structure for passing out data via the pcre_callout_function. We use a
structure so that new fields can be added on the end in future versions,
without changing the API of the function, thereby allowing old clients to work
without modification. */
typedef struct pcre_callout_block {
int version; /* Identifies version of block */
/* ------------------------ Version 0 ------------------------------- */
int callout_number; /* Number compiled into pattern */
int *offset_vector; /* The offset vector */
PCRE_SPTR subject; /* The subject being matched */
int subject_length; /* The length of the subject */
int start_match; /* Offset to start of this match attempt */
int current_position; /* Where we currently are in the subject */
int capture_top; /* Max current capture */
int capture_last; /* Most recently closed capture */
void *callout_data; /* Data passed in with the call */
/* ------------------- Added for Version 1 -------------------------- */
int pattern_position; /* Offset to next item in the pattern */
int next_item_length; /* Length of next item in the pattern */
/* ------------------- Added for Version 2 -------------------------- */
const unsigned char *mark; /* Pointer to current mark or NULL */
/* ------------------------------------------------------------------ */
} pcre_callout_block;
/* Same structure as above, but with 16 bit char pointers. */
typedef struct pcre16_callout_block {
int version; /* Identifies version of block */
/* ------------------------ Version 0 ------------------------------- */
int callout_number; /* Number compiled into pattern */
int *offset_vector; /* The offset vector */
PCRE_SPTR16 subject; /* The subject being matched */
int subject_length; /* The length of the subject */
int start_match; /* Offset to start of this match attempt */
int current_position; /* Where we currently are in the subject */
int capture_top; /* Max current capture */
int capture_last; /* Most recently closed capture */
void *callout_data; /* Data passed in with the call */
/* ------------------- Added for Version 1 -------------------------- */
int pattern_position; /* Offset to next item in the pattern */
int next_item_length; /* Length of next item in the pattern */
/* ------------------- Added for Version 2 -------------------------- */
const PCRE_UCHAR16 *mark; /* Pointer to current mark or NULL */
/* ------------------------------------------------------------------ */
} pcre16_callout_block;
/* Same structure as above, but with 32 bit char pointers. */
typedef struct pcre32_callout_block {
int version; /* Identifies version of block */
/* ------------------------ Version 0 ------------------------------- */
int callout_number; /* Number compiled into pattern */
int *offset_vector; /* The offset vector */
PCRE_SPTR32 subject; /* The subject being matched */
int subject_length; /* The length of the subject */
int start_match; /* Offset to start of this match attempt */
int current_position; /* Where we currently are in the subject */
int capture_top; /* Max current capture */
int capture_last; /* Most recently closed capture */
void *callout_data; /* Data passed in with the call */
/* ------------------- Added for Version 1 -------------------------- */
int pattern_position; /* Offset to next item in the pattern */
int next_item_length; /* Length of next item in the pattern */
/* ------------------- Added for Version 2 -------------------------- */
const PCRE_UCHAR32 *mark; /* Pointer to current mark or NULL */
/* ------------------------------------------------------------------ */
} pcre32_callout_block;
/* Indirection for store get and free functions. These can be set to
alternative malloc/free functions if required. Special ones are used in the
non-recursive case for "frames". There is also an optional callout function
that is triggered by the (?) regex item. For Virtual Pascal, these definitions
have to take another form. */
#ifndef VPCOMPAT
PCRE_EXP_DECL void *(*pcre_malloc)(size_t);
PCRE_EXP_DECL void (*pcre_free)(void *);
PCRE_EXP_DECL void *(*pcre_stack_malloc)(size_t);
PCRE_EXP_DECL void (*pcre_stack_free)(void *);
PCRE_EXP_DECL int (*pcre_callout)(pcre_callout_block *);
PCRE_EXP_DECL int (*pcre_stack_guard)(void);
PCRE_EXP_DECL void *(*pcre16_malloc)(size_t);
PCRE_EXP_DECL void (*pcre16_free)(void *);
PCRE_EXP_DECL void *(*pcre16_stack_malloc)(size_t);
PCRE_EXP_DECL void (*pcre16_stack_free)(void *);
PCRE_EXP_DECL int (*pcre16_callout)(pcre16_callout_block *);
PCRE_EXP_DECL int (*pcre16_stack_guard)(void);
PCRE_EXP_DECL void *(*pcre32_malloc)(size_t);
PCRE_EXP_DECL void (*pcre32_free)(void *);
PCRE_EXP_DECL void *(*pcre32_stack_malloc)(size_t);
PCRE_EXP_DECL void (*pcre32_stack_free)(void *);
PCRE_EXP_DECL int (*pcre32_callout)(pcre32_callout_block *);
PCRE_EXP_DECL int (*pcre32_stack_guard)(void);
#else /* VPCOMPAT */
PCRE_EXP_DECL void *pcre_malloc(size_t);
PCRE_EXP_DECL void pcre_free(void *);
PCRE_EXP_DECL void *pcre_stack_malloc(size_t);
PCRE_EXP_DECL void pcre_stack_free(void *);
PCRE_EXP_DECL int pcre_callout(pcre_callout_block *);
PCRE_EXP_DECL int pcre_stack_guard(void);
PCRE_EXP_DECL void *pcre16_malloc(size_t);
PCRE_EXP_DECL void pcre16_free(void *);
PCRE_EXP_DECL void *pcre16_stack_malloc(size_t);
PCRE_EXP_DECL void pcre16_stack_free(void *);
PCRE_EXP_DECL int pcre16_callout(pcre16_callout_block *);
PCRE_EXP_DECL int pcre16_stack_guard(void);
PCRE_EXP_DECL void *pcre32_malloc(size_t);
PCRE_EXP_DECL void pcre32_free(void *);
PCRE_EXP_DECL void *pcre32_stack_malloc(size_t);
PCRE_EXP_DECL void pcre32_stack_free(void *);
PCRE_EXP_DECL int pcre32_callout(pcre32_callout_block *);
PCRE_EXP_DECL int pcre32_stack_guard(void);
#endif /* VPCOMPAT */
/* User defined callback which provides a stack just before the match starts. */
typedef pcre_jit_stack *(*pcre_jit_callback)(void *);
typedef pcre16_jit_stack *(*pcre16_jit_callback)(void *);
typedef pcre32_jit_stack *(*pcre32_jit_callback)(void *);
/* Exported PCRE functions */
PCRE_EXP_DECL pcre *pcre_compile(const char *, int, const char **, int *,
const unsigned char *);
PCRE_EXP_DECL pcre16 *pcre16_compile(PCRE_SPTR16, int, const char **, int *,
const unsigned char *);
PCRE_EXP_DECL pcre32 *pcre32_compile(PCRE_SPTR32, int, const char **, int *,
const unsigned char *);
PCRE_EXP_DECL pcre *pcre_compile2(const char *, int, int *, const char **,
int *, const unsigned char *);
PCRE_EXP_DECL pcre16 *pcre16_compile2(PCRE_SPTR16, int, int *, const char **,
int *, const unsigned char *);
PCRE_EXP_DECL pcre32 *pcre32_compile2(PCRE_SPTR32, int, int *, const char **,
int *, const unsigned char *);
PCRE_EXP_DECL int pcre_config(int, void *);
PCRE_EXP_DECL int pcre16_config(int, void *);
PCRE_EXP_DECL int pcre32_config(int, void *);
PCRE_EXP_DECL int pcre_copy_named_substring(const pcre *, const char *,
int *, int, const char *, char *, int);
PCRE_EXP_DECL int pcre16_copy_named_substring(const pcre16 *, PCRE_SPTR16,
int *, int, PCRE_SPTR16, PCRE_UCHAR16 *, int);
PCRE_EXP_DECL int pcre32_copy_named_substring(const pcre32 *, PCRE_SPTR32,
int *, int, PCRE_SPTR32, PCRE_UCHAR32 *, int);
PCRE_EXP_DECL int pcre_copy_substring(const char *, int *, int, int,
char *, int);
PCRE_EXP_DECL int pcre16_copy_substring(PCRE_SPTR16, int *, int, int,
PCRE_UCHAR16 *, int);
PCRE_EXP_DECL int pcre32_copy_substring(PCRE_SPTR32, int *, int, int,
PCRE_UCHAR32 *, int);
PCRE_EXP_DECL int pcre_dfa_exec(const pcre *, const pcre_extra *,
const char *, int, int, int, int *, int , int *, int);
PCRE_EXP_DECL int pcre16_dfa_exec(const pcre16 *, const pcre16_extra *,
PCRE_SPTR16, int, int, int, int *, int , int *, int);
PCRE_EXP_DECL int pcre32_dfa_exec(const pcre32 *, const pcre32_extra *,
PCRE_SPTR32, int, int, int, int *, int , int *, int);
PCRE_EXP_DECL int pcre_exec(const pcre *, const pcre_extra *, PCRE_SPTR,
int, int, int, int *, int);
PCRE_EXP_DECL int pcre16_exec(const pcre16 *, const pcre16_extra *,
PCRE_SPTR16, int, int, int, int *, int);
PCRE_EXP_DECL int pcre32_exec(const pcre32 *, const pcre32_extra *,
PCRE_SPTR32, int, int, int, int *, int);
PCRE_EXP_DECL int pcre_jit_exec(const pcre *, const pcre_extra *,
PCRE_SPTR, int, int, int, int *, int,
pcre_jit_stack *);
PCRE_EXP_DECL int pcre16_jit_exec(const pcre16 *, const pcre16_extra *,
PCRE_SPTR16, int, int, int, int *, int,
pcre16_jit_stack *);
PCRE_EXP_DECL int pcre32_jit_exec(const pcre32 *, const pcre32_extra *,
PCRE_SPTR32, int, int, int, int *, int,
pcre32_jit_stack *);
PCRE_EXP_DECL void pcre_free_substring(const char *);
PCRE_EXP_DECL void pcre16_free_substring(PCRE_SPTR16);
PCRE_EXP_DECL void pcre32_free_substring(PCRE_SPTR32);
PCRE_EXP_DECL void pcre_free_substring_list(const char **);
PCRE_EXP_DECL void pcre16_free_substring_list(PCRE_SPTR16 *);
PCRE_EXP_DECL void pcre32_free_substring_list(PCRE_SPTR32 *);
PCRE_EXP_DECL int pcre_fullinfo(const pcre *, const pcre_extra *, int,
void *);
PCRE_EXP_DECL int pcre16_fullinfo(const pcre16 *, const pcre16_extra *, int,
void *);
PCRE_EXP_DECL int pcre32_fullinfo(const pcre32 *, const pcre32_extra *, int,
void *);
PCRE_EXP_DECL int pcre_get_named_substring(const pcre *, const char *,
int *, int, const char *, const char **);
PCRE_EXP_DECL int pcre16_get_named_substring(const pcre16 *, PCRE_SPTR16,
int *, int, PCRE_SPTR16, PCRE_SPTR16 *);
PCRE_EXP_DECL int pcre32_get_named_substring(const pcre32 *, PCRE_SPTR32,
int *, int, PCRE_SPTR32, PCRE_SPTR32 *);
PCRE_EXP_DECL int pcre_get_stringnumber(const pcre *, const char *);
PCRE_EXP_DECL int pcre16_get_stringnumber(const pcre16 *, PCRE_SPTR16);
PCRE_EXP_DECL int pcre32_get_stringnumber(const pcre32 *, PCRE_SPTR32);
PCRE_EXP_DECL int pcre_get_stringtable_entries(const pcre *, const char *,
char **, char **);
PCRE_EXP_DECL int pcre16_get_stringtable_entries(const pcre16 *, PCRE_SPTR16,
PCRE_UCHAR16 **, PCRE_UCHAR16 **);
PCRE_EXP_DECL int pcre32_get_stringtable_entries(const pcre32 *, PCRE_SPTR32,
PCRE_UCHAR32 **, PCRE_UCHAR32 **);
PCRE_EXP_DECL int pcre_get_substring(const char *, int *, int, int,
const char **);
PCRE_EXP_DECL int pcre16_get_substring(PCRE_SPTR16, int *, int, int,
PCRE_SPTR16 *);
PCRE_EXP_DECL int pcre32_get_substring(PCRE_SPTR32, int *, int, int,
PCRE_SPTR32 *);
PCRE_EXP_DECL int pcre_get_substring_list(const char *, int *, int,
const char ***);
PCRE_EXP_DECL int pcre16_get_substring_list(PCRE_SPTR16, int *, int,
PCRE_SPTR16 **);
PCRE_EXP_DECL int pcre32_get_substring_list(PCRE_SPTR32, int *, int,
PCRE_SPTR32 **);
PCRE_EXP_DECL const unsigned char *pcre_maketables(void);
PCRE_EXP_DECL const unsigned char *pcre16_maketables(void);
PCRE_EXP_DECL const unsigned char *pcre32_maketables(void);
PCRE_EXP_DECL int pcre_refcount(pcre *, int);
PCRE_EXP_DECL int pcre16_refcount(pcre16 *, int);
PCRE_EXP_DECL int pcre32_refcount(pcre32 *, int);
PCRE_EXP_DECL pcre_extra *pcre_study(const pcre *, int, const char **);
PCRE_EXP_DECL pcre16_extra *pcre16_study(const pcre16 *, int, const char **);
PCRE_EXP_DECL pcre32_extra *pcre32_study(const pcre32 *, int, const char **);
PCRE_EXP_DECL void pcre_free_study(pcre_extra *);
PCRE_EXP_DECL void pcre16_free_study(pcre16_extra *);
PCRE_EXP_DECL void pcre32_free_study(pcre32_extra *);
PCRE_EXP_DECL const char *pcre_version(void);
PCRE_EXP_DECL const char *pcre16_version(void);
PCRE_EXP_DECL const char *pcre32_version(void);
/* Utility functions for byte order swaps. */
PCRE_EXP_DECL int pcre_pattern_to_host_byte_order(pcre *, pcre_extra *,
const unsigned char *);
PCRE_EXP_DECL int pcre16_pattern_to_host_byte_order(pcre16 *, pcre16_extra *,
const unsigned char *);
PCRE_EXP_DECL int pcre32_pattern_to_host_byte_order(pcre32 *, pcre32_extra *,
const unsigned char *);
PCRE_EXP_DECL int pcre16_utf16_to_host_byte_order(PCRE_UCHAR16 *,
PCRE_SPTR16, int, int *, int);
PCRE_EXP_DECL int pcre32_utf32_to_host_byte_order(PCRE_UCHAR32 *,
PCRE_SPTR32, int, int *, int);
/* JIT compiler related functions. */
PCRE_EXP_DECL pcre_jit_stack *pcre_jit_stack_alloc(int, int);
PCRE_EXP_DECL pcre16_jit_stack *pcre16_jit_stack_alloc(int, int);
PCRE_EXP_DECL pcre32_jit_stack *pcre32_jit_stack_alloc(int, int);
PCRE_EXP_DECL void pcre_jit_stack_free(pcre_jit_stack *);
PCRE_EXP_DECL void pcre16_jit_stack_free(pcre16_jit_stack *);
PCRE_EXP_DECL void pcre32_jit_stack_free(pcre32_jit_stack *);
PCRE_EXP_DECL void pcre_assign_jit_stack(pcre_extra *,
pcre_jit_callback, void *);
PCRE_EXP_DECL void pcre16_assign_jit_stack(pcre16_extra *,
pcre16_jit_callback, void *);
PCRE_EXP_DECL void pcre32_assign_jit_stack(pcre32_extra *,
pcre32_jit_callback, void *);
PCRE_EXP_DECL void pcre_jit_free_unused_memory(void);
PCRE_EXP_DECL void pcre16_jit_free_unused_memory(void);
PCRE_EXP_DECL void pcre32_jit_free_unused_memory(void);
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* End of pcre.h */

File diff suppressed because it is too large Load diff

View file

@ -1,190 +0,0 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2012 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
/* This module contains the external function pcre_config(). */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
/* Keep the original link size. */
static int real_link_size = LINK_SIZE;
#include "pcre_internal.h"
/*************************************************
* Return info about what features are configured *
*************************************************/
/* This function has an extensible interface so that additional items can be
added compatibly.
Arguments:
what what information is required
where where to put the information
Returns: 0 if data returned, negative on error
*/
#if defined COMPILE_PCRE8
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_config(int what, void *where)
#elif defined COMPILE_PCRE16
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre16_config(int what, void *where)
#elif defined COMPILE_PCRE32
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre32_config(int what, void *where)
#endif
{
switch (what)
{
case PCRE_CONFIG_UTF8:
#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
*((int *)where) = 0;
return PCRE_ERROR_BADOPTION;
#else
#if defined SUPPORT_UTF
*((int *)where) = 1;
#else
*((int *)where) = 0;
#endif
break;
#endif
case PCRE_CONFIG_UTF16:
#if defined COMPILE_PCRE8 || defined COMPILE_PCRE32
*((int *)where) = 0;
return PCRE_ERROR_BADOPTION;
#else
#if defined SUPPORT_UTF
*((int *)where) = 1;
#else
*((int *)where) = 0;
#endif
break;
#endif
case PCRE_CONFIG_UTF32:
#if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
*((int *)where) = 0;
return PCRE_ERROR_BADOPTION;
#else
#if defined SUPPORT_UTF
*((int *)where) = 1;
#else
*((int *)where) = 0;
#endif
break;
#endif
case PCRE_CONFIG_UNICODE_PROPERTIES:
#ifdef SUPPORT_UCP
*((int *)where) = 1;
#else
*((int *)where) = 0;
#endif
break;
case PCRE_CONFIG_JIT:
#ifdef SUPPORT_JIT
*((int *)where) = 1;
#else
*((int *)where) = 0;
#endif
break;
case PCRE_CONFIG_JITTARGET:
#ifdef SUPPORT_JIT
*((const char **)where) = PRIV(jit_get_target)();
#else
*((const char **)where) = NULL;
#endif
break;
case PCRE_CONFIG_NEWLINE:
*((int *)where) = NEWLINE;
break;
case PCRE_CONFIG_BSR:
#ifdef BSR_ANYCRLF
*((int *)where) = 1;
#else
*((int *)where) = 0;
#endif
break;
case PCRE_CONFIG_LINK_SIZE:
*((int *)where) = real_link_size;
break;
case PCRE_CONFIG_POSIX_MALLOC_THRESHOLD:
*((int *)where) = POSIX_MALLOC_THRESHOLD;
break;
case PCRE_CONFIG_PARENS_LIMIT:
*((unsigned long int *)where) = PARENS_NEST_LIMIT;
break;
case PCRE_CONFIG_MATCH_LIMIT:
*((unsigned long int *)where) = MATCH_LIMIT;
break;
case PCRE_CONFIG_MATCH_LIMIT_RECURSION:
*((unsigned long int *)where) = MATCH_LIMIT_RECURSION;
break;
case PCRE_CONFIG_STACKRECURSE:
#ifdef NO_RECURSE
*((int *)where) = 0;
#else
*((int *)where) = 1;
#endif
break;
default: return PCRE_ERROR_BADOPTION;
}
return 0;
}
/* End of pcre_config.c */

File diff suppressed because it is too large Load diff

View file

@ -1,245 +0,0 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2013 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
/* This module contains the external function pcre_fullinfo(), which returns
information about a compiled pattern. */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "pcre_internal.h"
/*************************************************
* Return info about compiled pattern *
*************************************************/
/* This is a newer "info" function which has an extensible interface so
that additional items can be added compatibly.
Arguments:
argument_re points to compiled code
extra_data points extra data, or NULL
what what information is required
where where to put the information
Returns: 0 if data returned, negative on error
*/
#if defined COMPILE_PCRE8
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_fullinfo(const pcre *argument_re, const pcre_extra *extra_data,
int what, void *where)
#elif defined COMPILE_PCRE16
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre16_fullinfo(const pcre16 *argument_re, const pcre16_extra *extra_data,
int what, void *where)
#elif defined COMPILE_PCRE32
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre32_fullinfo(const pcre32 *argument_re, const pcre32_extra *extra_data,
int what, void *where)
#endif
{
const REAL_PCRE *re = (const REAL_PCRE *)argument_re;
const pcre_study_data *study = NULL;
if (re == NULL || where == NULL) return PCRE_ERROR_NULL;
if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_STUDY_DATA) != 0)
study = (const pcre_study_data *)extra_data->study_data;
/* Check that the first field in the block is the magic number. If it is not,
return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to
REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which
means that the pattern is likely compiled with different endianness. */
if (re->magic_number != MAGIC_NUMBER)
return re->magic_number == REVERSED_MAGIC_NUMBER?
PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC;
/* Check that this pattern was compiled in the correct bit mode */
if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
switch (what)
{
case PCRE_INFO_OPTIONS:
*((unsigned long int *)where) = re->options & PUBLIC_COMPILE_OPTIONS;
break;
case PCRE_INFO_SIZE:
*((size_t *)where) = re->size;
break;
case PCRE_INFO_STUDYSIZE:
*((size_t *)where) = (study == NULL)? 0 : study->size;
break;
case PCRE_INFO_JITSIZE:
#ifdef SUPPORT_JIT
*((size_t *)where) =
(extra_data != NULL &&
(extra_data->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 &&
extra_data->executable_jit != NULL)?
PRIV(jit_get_size)(extra_data->executable_jit) : 0;
#else
*((size_t *)where) = 0;
#endif
break;
case PCRE_INFO_CAPTURECOUNT:
*((int *)where) = re->top_bracket;
break;
case PCRE_INFO_BACKREFMAX:
*((int *)where) = re->top_backref;
break;
case PCRE_INFO_FIRSTBYTE:
*((int *)where) =
((re->flags & PCRE_FIRSTSET) != 0)? (int)re->first_char :
((re->flags & PCRE_STARTLINE) != 0)? -1 : -2;
break;
case PCRE_INFO_FIRSTCHARACTER:
*((pcre_uint32 *)where) =
(re->flags & PCRE_FIRSTSET) != 0 ? re->first_char : 0;
break;
case PCRE_INFO_FIRSTCHARACTERFLAGS:
*((int *)where) =
((re->flags & PCRE_FIRSTSET) != 0) ? 1 :
((re->flags & PCRE_STARTLINE) != 0) ? 2 : 0;
break;
/* Make sure we pass back the pointer to the bit vector in the external
block, not the internal copy (with flipped integer fields). */
case PCRE_INFO_FIRSTTABLE:
*((const pcre_uint8 **)where) =
(study != NULL && (study->flags & PCRE_STUDY_MAPPED) != 0)?
((const pcre_study_data *)extra_data->study_data)->start_bits : NULL;
break;
case PCRE_INFO_MINLENGTH:
*((int *)where) =
(study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0)?
(int)(study->minlength) : -1;
break;
case PCRE_INFO_JIT:
*((int *)where) = extra_data != NULL &&
(extra_data->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 &&
extra_data->executable_jit != NULL;
break;
case PCRE_INFO_LASTLITERAL:
*((int *)where) =
((re->flags & PCRE_REQCHSET) != 0)? (int)re->req_char : -1;
break;
case PCRE_INFO_REQUIREDCHAR:
*((pcre_uint32 *)where) =
((re->flags & PCRE_REQCHSET) != 0) ? re->req_char : 0;
break;
case PCRE_INFO_REQUIREDCHARFLAGS:
*((int *)where) =
((re->flags & PCRE_REQCHSET) != 0);
break;
case PCRE_INFO_NAMEENTRYSIZE:
*((int *)where) = re->name_entry_size;
break;
case PCRE_INFO_NAMECOUNT:
*((int *)where) = re->name_count;
break;
case PCRE_INFO_NAMETABLE:
*((const pcre_uchar **)where) = (const pcre_uchar *)re + re->name_table_offset;
break;
case PCRE_INFO_DEFAULT_TABLES:
*((const pcre_uint8 **)where) = (const pcre_uint8 *)(PRIV(default_tables));
break;
/* From release 8.00 this will always return TRUE because NOPARTIAL is
no longer ever set (the restrictions have been removed). */
case PCRE_INFO_OKPARTIAL:
*((int *)where) = (re->flags & PCRE_NOPARTIAL) == 0;
break;
case PCRE_INFO_JCHANGED:
*((int *)where) = (re->flags & PCRE_JCHANGED) != 0;
break;
case PCRE_INFO_HASCRORLF:
*((int *)where) = (re->flags & PCRE_HASCRORLF) != 0;
break;
case PCRE_INFO_MAXLOOKBEHIND:
*((int *)where) = re->max_lookbehind;
break;
case PCRE_INFO_MATCHLIMIT:
if ((re->flags & PCRE_MLSET) == 0) return PCRE_ERROR_UNSET;
*((pcre_uint32 *)where) = re->limit_match;
break;
case PCRE_INFO_RECURSIONLIMIT:
if ((re->flags & PCRE_RLSET) == 0) return PCRE_ERROR_UNSET;
*((pcre_uint32 *)where) = re->limit_recursion;
break;
case PCRE_INFO_MATCH_EMPTY:
*((int *)where) = (re->flags & PCRE_MATCH_EMPTY) != 0;
break;
default: return PCRE_ERROR_BADOPTION;
}
return 0;
}
/* End of pcre_fullinfo.c */

View file

@ -1,669 +0,0 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2012 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
/* This module contains some convenience functions for extracting substrings
from the subject string after a regex match has succeeded. The original idea
for these functions came from Scott Wimer. */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "pcre_internal.h"
/*************************************************
* Find number for named string *
*************************************************/
/* This function is used by the get_first_set() function below, as well
as being generally available. It assumes that names are unique.
Arguments:
code the compiled regex
stringname the name whose number is required
Returns: the number of the named parentheses, or a negative number
(PCRE_ERROR_NOSUBSTRING) if not found
*/
#if defined COMPILE_PCRE8
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_get_stringnumber(const pcre *code, const char *stringname)
#elif defined COMPILE_PCRE16
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre16_get_stringnumber(const pcre16 *code, PCRE_SPTR16 stringname)
#elif defined COMPILE_PCRE32
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre32_get_stringnumber(const pcre32 *code, PCRE_SPTR32 stringname)
#endif
{
int rc;
int entrysize;
int top, bot;
pcre_uchar *nametable;
#ifdef COMPILE_PCRE8
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
return rc;
if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
return rc;
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
return rc;
#endif
#ifdef COMPILE_PCRE16
if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
return rc;
if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
return rc;
if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
return rc;
#endif
#ifdef COMPILE_PCRE32
if ((rc = pcre32_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
return rc;
if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
if ((rc = pcre32_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
return rc;
if ((rc = pcre32_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
return rc;
#endif
bot = 0;
while (top > bot)
{
int mid = (top + bot) / 2;
pcre_uchar *entry = nametable + entrysize*mid;
int c = STRCMP_UC_UC((pcre_uchar *)stringname,
(pcre_uchar *)(entry + IMM2_SIZE));
if (c == 0) return GET2(entry, 0);
if (c > 0) bot = mid + 1; else top = mid;
}
return PCRE_ERROR_NOSUBSTRING;
}
/*************************************************
* Find (multiple) entries for named string *
*************************************************/
/* This is used by the get_first_set() function below, as well as being
generally available. It is used when duplicated names are permitted.
Arguments:
code the compiled regex
stringname the name whose entries required
firstptr where to put the pointer to the first entry
lastptr where to put the pointer to the last entry
Returns: the length of each entry, or a negative number
(PCRE_ERROR_NOSUBSTRING) if not found
*/
#if defined COMPILE_PCRE8
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_get_stringtable_entries(const pcre *code, const char *stringname,
char **firstptr, char **lastptr)
#elif defined COMPILE_PCRE16
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre16_get_stringtable_entries(const pcre16 *code, PCRE_SPTR16 stringname,
PCRE_UCHAR16 **firstptr, PCRE_UCHAR16 **lastptr)
#elif defined COMPILE_PCRE32
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre32_get_stringtable_entries(const pcre32 *code, PCRE_SPTR32 stringname,
PCRE_UCHAR32 **firstptr, PCRE_UCHAR32 **lastptr)
#endif
{
int rc;
int entrysize;
int top, bot;
pcre_uchar *nametable, *lastentry;
#ifdef COMPILE_PCRE8
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
return rc;
if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
return rc;
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
return rc;
#endif
#ifdef COMPILE_PCRE16
if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
return rc;
if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
return rc;
if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
return rc;
#endif
#ifdef COMPILE_PCRE32
if ((rc = pcre32_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
return rc;
if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
if ((rc = pcre32_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
return rc;
if ((rc = pcre32_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
return rc;
#endif
lastentry = nametable + entrysize * (top - 1);
bot = 0;
while (top > bot)
{
int mid = (top + bot) / 2;
pcre_uchar *entry = nametable + entrysize*mid;
int c = STRCMP_UC_UC((pcre_uchar *)stringname,
(pcre_uchar *)(entry + IMM2_SIZE));
if (c == 0)
{
pcre_uchar *first = entry;
pcre_uchar *last = entry;
while (first > nametable)
{
if (STRCMP_UC_UC((pcre_uchar *)stringname,
(pcre_uchar *)(first - entrysize + IMM2_SIZE)) != 0) break;
first -= entrysize;
}
while (last < lastentry)
{
if (STRCMP_UC_UC((pcre_uchar *)stringname,
(pcre_uchar *)(last + entrysize + IMM2_SIZE)) != 0) break;
last += entrysize;
}
#if defined COMPILE_PCRE8
*firstptr = (char *)first;
*lastptr = (char *)last;
#elif defined COMPILE_PCRE16
*firstptr = (PCRE_UCHAR16 *)first;
*lastptr = (PCRE_UCHAR16 *)last;
#elif defined COMPILE_PCRE32
*firstptr = (PCRE_UCHAR32 *)first;
*lastptr = (PCRE_UCHAR32 *)last;
#endif
return entrysize;
}
if (c > 0) bot = mid + 1; else top = mid;
}
return PCRE_ERROR_NOSUBSTRING;
}
/*************************************************
* Find first set of multiple named strings *
*************************************************/
/* This function allows for duplicate names in the table of named substrings.
It returns the number of the first one that was set in a pattern match.
Arguments:
code the compiled regex
stringname the name of the capturing substring
ovector the vector of matched substrings
stringcount number of captured substrings
Returns: the number of the first that is set,
or the number of the last one if none are set,
or a negative number on error
*/
#if defined COMPILE_PCRE8
static int
get_first_set(const pcre *code, const char *stringname, int *ovector,
int stringcount)
#elif defined COMPILE_PCRE16
static int
get_first_set(const pcre16 *code, PCRE_SPTR16 stringname, int *ovector,
int stringcount)
#elif defined COMPILE_PCRE32
static int
get_first_set(const pcre32 *code, PCRE_SPTR32 stringname, int *ovector,
int stringcount)
#endif
{
const REAL_PCRE *re = (const REAL_PCRE *)code;
int entrysize;
pcre_uchar *entry;
#if defined COMPILE_PCRE8
char *first, *last;
#elif defined COMPILE_PCRE16
PCRE_UCHAR16 *first, *last;
#elif defined COMPILE_PCRE32
PCRE_UCHAR32 *first, *last;
#endif
#if defined COMPILE_PCRE8
if ((re->options & PCRE_DUPNAMES) == 0 && (re->flags & PCRE_JCHANGED) == 0)
return pcre_get_stringnumber(code, stringname);
entrysize = pcre_get_stringtable_entries(code, stringname, &first, &last);
#elif defined COMPILE_PCRE16
if ((re->options & PCRE_DUPNAMES) == 0 && (re->flags & PCRE_JCHANGED) == 0)
return pcre16_get_stringnumber(code, stringname);
entrysize = pcre16_get_stringtable_entries(code, stringname, &first, &last);
#elif defined COMPILE_PCRE32
if ((re->options & PCRE_DUPNAMES) == 0 && (re->flags & PCRE_JCHANGED) == 0)
return pcre32_get_stringnumber(code, stringname);
entrysize = pcre32_get_stringtable_entries(code, stringname, &first, &last);
#endif
if (entrysize <= 0) return entrysize;
for (entry = (pcre_uchar *)first; entry <= (pcre_uchar *)last; entry += entrysize)
{
int n = GET2(entry, 0);
if (n < stringcount && ovector[n*2] >= 0) return n;
}
return GET2(entry, 0);
}
/*************************************************
* Copy captured string to given buffer *
*************************************************/
/* This function copies a single captured substring into a given buffer.
Note that we use memcpy() rather than strncpy() in case there are binary zeros
in the string.
Arguments:
subject the subject string that was matched
ovector pointer to the offsets table
stringcount the number of substrings that were captured
(i.e. the yield of the pcre_exec call, unless
that was zero, in which case it should be 1/3
of the offset table size)
stringnumber the number of the required substring
buffer where to put the substring
size the size of the buffer
Returns: if successful:
the length of the copied string, not including the zero
that is put on the end; can be zero
if not successful:
PCRE_ERROR_NOMEMORY (-6) buffer too small
PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
*/
#if defined COMPILE_PCRE8
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_copy_substring(const char *subject, int *ovector, int stringcount,
int stringnumber, char *buffer, int size)
#elif defined COMPILE_PCRE16
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre16_copy_substring(PCRE_SPTR16 subject, int *ovector, int stringcount,
int stringnumber, PCRE_UCHAR16 *buffer, int size)
#elif defined COMPILE_PCRE32
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre32_copy_substring(PCRE_SPTR32 subject, int *ovector, int stringcount,
int stringnumber, PCRE_UCHAR32 *buffer, int size)
#endif
{
int yield;
if (stringnumber < 0 || stringnumber >= stringcount)
return PCRE_ERROR_NOSUBSTRING;
stringnumber *= 2;
yield = ovector[stringnumber+1] - ovector[stringnumber];
if (size < yield + 1) return PCRE_ERROR_NOMEMORY;
memcpy(buffer, subject + ovector[stringnumber], IN_UCHARS(yield));
buffer[yield] = 0;
return yield;
}
/*************************************************
* Copy named captured string to given buffer *
*************************************************/
/* This function copies a single captured substring into a given buffer,
identifying it by name. If the regex permits duplicate names, the first
substring that is set is chosen.
Arguments:
code the compiled regex
subject the subject string that was matched
ovector pointer to the offsets table
stringcount the number of substrings that were captured
(i.e. the yield of the pcre_exec call, unless
that was zero, in which case it should be 1/3
of the offset table size)
stringname the name of the required substring
buffer where to put the substring
size the size of the buffer
Returns: if successful:
the length of the copied string, not including the zero
that is put on the end; can be zero
if not successful:
PCRE_ERROR_NOMEMORY (-6) buffer too small
PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
*/
#if defined COMPILE_PCRE8
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_copy_named_substring(const pcre *code, const char *subject,
int *ovector, int stringcount, const char *stringname,
char *buffer, int size)
#elif defined COMPILE_PCRE16
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre16_copy_named_substring(const pcre16 *code, PCRE_SPTR16 subject,
int *ovector, int stringcount, PCRE_SPTR16 stringname,
PCRE_UCHAR16 *buffer, int size)
#elif defined COMPILE_PCRE32
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre32_copy_named_substring(const pcre32 *code, PCRE_SPTR32 subject,
int *ovector, int stringcount, PCRE_SPTR32 stringname,
PCRE_UCHAR32 *buffer, int size)
#endif
{
int n = get_first_set(code, stringname, ovector, stringcount);
if (n <= 0) return n;
#if defined COMPILE_PCRE8
return pcre_copy_substring(subject, ovector, stringcount, n, buffer, size);
#elif defined COMPILE_PCRE16
return pcre16_copy_substring(subject, ovector, stringcount, n, buffer, size);
#elif defined COMPILE_PCRE32
return pcre32_copy_substring(subject, ovector, stringcount, n, buffer, size);
#endif
}
/*************************************************
* Copy all captured strings to new store *
*************************************************/
/* This function gets one chunk of store and builds a list of pointers and all
of the captured substrings in it. A NULL pointer is put on the end of the list.
Arguments:
subject the subject string that was matched
ovector pointer to the offsets table
stringcount the number of substrings that were captured
(i.e. the yield of the pcre_exec call, unless
that was zero, in which case it should be 1/3
of the offset table size)
listptr set to point to the list of pointers
Returns: if successful: 0
if not successful:
PCRE_ERROR_NOMEMORY (-6) failed to get store
*/
#if defined COMPILE_PCRE8
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_get_substring_list(const char *subject, int *ovector, int stringcount,
const char ***listptr)
#elif defined COMPILE_PCRE16
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre16_get_substring_list(PCRE_SPTR16 subject, int *ovector, int stringcount,
PCRE_SPTR16 **listptr)
#elif defined COMPILE_PCRE32
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre32_get_substring_list(PCRE_SPTR32 subject, int *ovector, int stringcount,
PCRE_SPTR32 **listptr)
#endif
{
int i;
int size = sizeof(pcre_uchar *);
int double_count = stringcount * 2;
pcre_uchar **stringlist;
pcre_uchar *p;
for (i = 0; i < double_count; i += 2)
{
size += sizeof(pcre_uchar *) + IN_UCHARS(1);
if (ovector[i+1] > ovector[i]) size += IN_UCHARS(ovector[i+1] - ovector[i]);
}
stringlist = (pcre_uchar **)(PUBL(malloc))(size);
if (stringlist == NULL) return PCRE_ERROR_NOMEMORY;
#if defined COMPILE_PCRE8
*listptr = (const char **)stringlist;
#elif defined COMPILE_PCRE16
*listptr = (PCRE_SPTR16 *)stringlist;
#elif defined COMPILE_PCRE32
*listptr = (PCRE_SPTR32 *)stringlist;
#endif
p = (pcre_uchar *)(stringlist + stringcount + 1);
for (i = 0; i < double_count; i += 2)
{
int len = (ovector[i+1] > ovector[i])? (ovector[i+1] - ovector[i]) : 0;
memcpy(p, subject + ovector[i], IN_UCHARS(len));
*stringlist++ = p;
p += len;
*p++ = 0;
}
*stringlist = NULL;
return 0;
}
/*************************************************
* Free store obtained by get_substring_list *
*************************************************/
/* This function exists for the benefit of people calling PCRE from non-C
programs that can call its functions, but not free() or (PUBL(free))()
directly.
Argument: the result of a previous pcre_get_substring_list()
Returns: nothing
*/
#if defined COMPILE_PCRE8
PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
pcre_free_substring_list(const char **pointer)
#elif defined COMPILE_PCRE16
PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
pcre16_free_substring_list(PCRE_SPTR16 *pointer)
#elif defined COMPILE_PCRE32
PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
pcre32_free_substring_list(PCRE_SPTR32 *pointer)
#endif
{
(PUBL(free))((void *)pointer);
}
/*************************************************
* Copy captured string to new store *
*************************************************/
/* This function copies a single captured substring into a piece of new
store
Arguments:
subject the subject string that was matched
ovector pointer to the offsets table
stringcount the number of substrings that were captured
(i.e. the yield of the pcre_exec call, unless
that was zero, in which case it should be 1/3
of the offset table size)
stringnumber the number of the required substring
stringptr where to put a pointer to the substring
Returns: if successful:
the length of the string, not including the zero that
is put on the end; can be zero
if not successful:
PCRE_ERROR_NOMEMORY (-6) failed to get store
PCRE_ERROR_NOSUBSTRING (-7) substring not present
*/
#if defined COMPILE_PCRE8
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_get_substring(const char *subject, int *ovector, int stringcount,
int stringnumber, const char **stringptr)
#elif defined COMPILE_PCRE16
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre16_get_substring(PCRE_SPTR16 subject, int *ovector, int stringcount,
int stringnumber, PCRE_SPTR16 *stringptr)
#elif defined COMPILE_PCRE32
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre32_get_substring(PCRE_SPTR32 subject, int *ovector, int stringcount,
int stringnumber, PCRE_SPTR32 *stringptr)
#endif
{
int yield;
pcre_uchar *substring;
if (stringnumber < 0 || stringnumber >= stringcount)
return PCRE_ERROR_NOSUBSTRING;
stringnumber *= 2;
yield = ovector[stringnumber+1] - ovector[stringnumber];
substring = (pcre_uchar *)(PUBL(malloc))(IN_UCHARS(yield + 1));
if (substring == NULL) return PCRE_ERROR_NOMEMORY;
memcpy(substring, subject + ovector[stringnumber], IN_UCHARS(yield));
substring[yield] = 0;
#if defined COMPILE_PCRE8
*stringptr = (const char *)substring;
#elif defined COMPILE_PCRE16
*stringptr = (PCRE_SPTR16)substring;
#elif defined COMPILE_PCRE32
*stringptr = (PCRE_SPTR32)substring;
#endif
return yield;
}
/*************************************************
* Copy named captured string to new store *
*************************************************/
/* This function copies a single captured substring, identified by name, into
new store. If the regex permits duplicate names, the first substring that is
set is chosen.
Arguments:
code the compiled regex
subject the subject string that was matched
ovector pointer to the offsets table
stringcount the number of substrings that were captured
(i.e. the yield of the pcre_exec call, unless
that was zero, in which case it should be 1/3
of the offset table size)
stringname the name of the required substring
stringptr where to put the pointer
Returns: if successful:
the length of the copied string, not including the zero
that is put on the end; can be zero
if not successful:
PCRE_ERROR_NOMEMORY (-6) couldn't get memory
PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
*/
#if defined COMPILE_PCRE8
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_get_named_substring(const pcre *code, const char *subject,
int *ovector, int stringcount, const char *stringname,
const char **stringptr)
#elif defined COMPILE_PCRE16
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre16_get_named_substring(const pcre16 *code, PCRE_SPTR16 subject,
int *ovector, int stringcount, PCRE_SPTR16 stringname,
PCRE_SPTR16 *stringptr)
#elif defined COMPILE_PCRE32
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre32_get_named_substring(const pcre32 *code, PCRE_SPTR32 subject,
int *ovector, int stringcount, PCRE_SPTR32 stringname,
PCRE_SPTR32 *stringptr)
#endif
{
int n = get_first_set(code, stringname, ovector, stringcount);
if (n <= 0) return n;
#if defined COMPILE_PCRE8
return pcre_get_substring(subject, ovector, stringcount, n, stringptr);
#elif defined COMPILE_PCRE16
return pcre16_get_substring(subject, ovector, stringcount, n, stringptr);
#elif defined COMPILE_PCRE32
return pcre32_get_substring(subject, ovector, stringcount, n, stringptr);
#endif
}
/*************************************************
* Free store obtained by get_substring *
*************************************************/
/* This function exists for the benefit of people calling PCRE from non-C
programs that can call its functions, but not free() or (PUBL(free))()
directly.
Argument: the result of a previous pcre_get_substring()
Returns: nothing
*/
#if defined COMPILE_PCRE8
PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
pcre_free_substring(const char *pointer)
#elif defined COMPILE_PCRE16
PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
pcre16_free_substring(PCRE_SPTR16 pointer)
#elif defined COMPILE_PCRE32
PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
pcre32_free_substring(PCRE_SPTR32 pointer)
#endif
{
(PUBL(free))((void *)pointer);
}
/* End of pcre_get.c */

View file

@ -1,86 +0,0 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2014 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
/* This module contains global variables that are exported by the PCRE library.
PCRE is thread-clean and doesn't use any global variables in the normal sense.
However, it calls memory allocation and freeing functions via the four
indirections below, and it can optionally do callouts, using the fifth
indirection. These values can be changed by the caller, but are shared between
all threads.
For MS Visual Studio and Symbian OS, there are problems in initializing these
variables to non-local functions. In these cases, therefore, an indirection via
a local function is used.
Also, when compiling for Virtual Pascal, things are done differently, and
global variables are not used. */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "pcre_internal.h"
#if defined _MSC_VER || defined __SYMBIAN32__
static void* LocalPcreMalloc(size_t aSize)
{
return malloc(aSize);
}
static void LocalPcreFree(void* aPtr)
{
free(aPtr);
}
PCRE_EXP_DATA_DEFN void *(*PUBL(malloc))(size_t) = LocalPcreMalloc;
PCRE_EXP_DATA_DEFN void (*PUBL(free))(void *) = LocalPcreFree;
PCRE_EXP_DATA_DEFN void *(*PUBL(stack_malloc))(size_t) = LocalPcreMalloc;
PCRE_EXP_DATA_DEFN void (*PUBL(stack_free))(void *) = LocalPcreFree;
PCRE_EXP_DATA_DEFN int (*PUBL(callout))(PUBL(callout_block) *) = NULL;
PCRE_EXP_DATA_DEFN int (*PUBL(stack_guard))(void) = NULL;
#elif !defined VPCOMPAT
PCRE_EXP_DATA_DEFN void *(*PUBL(malloc))(size_t) = malloc;
PCRE_EXP_DATA_DEFN void (*PUBL(free))(void *) = free;
PCRE_EXP_DATA_DEFN void *(*PUBL(stack_malloc))(size_t) = malloc;
PCRE_EXP_DATA_DEFN void (*PUBL(stack_free))(void *) = free;
PCRE_EXP_DATA_DEFN int (*PUBL(callout))(PUBL(callout_block) *) = NULL;
PCRE_EXP_DATA_DEFN int (*PUBL(stack_guard))(void) = NULL;
#endif
/* End of pcre_globals.c */

View file

@ -1,92 +0,0 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2012 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
/* This module contains the external function pcre_refcount(), which is an
auxiliary function that can be used to maintain a reference count in a compiled
pattern data block. This might be helpful in applications where the block is
shared by different users. */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "pcre_internal.h"
/*************************************************
* Maintain reference count *
*************************************************/
/* The reference count is a 16-bit field, initialized to zero. It is not
possible to transfer a non-zero count from one host to a different host that
has a different byte order - though I can't see why anyone in their right mind
would ever want to do that!
Arguments:
argument_re points to compiled code
adjust value to add to the count
Returns: the (possibly updated) count value (a non-negative number), or
a negative error number
*/
#if defined COMPILE_PCRE8
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_refcount(pcre *argument_re, int adjust)
#elif defined COMPILE_PCRE16
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre16_refcount(pcre16 *argument_re, int adjust)
#elif defined COMPILE_PCRE32
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre32_refcount(pcre32 *argument_re, int adjust)
#endif
{
REAL_PCRE *re = (REAL_PCRE *)argument_re;
if (re == NULL) return PCRE_ERROR_NULL;
if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC;
if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
re->ref_count = (-adjust > re->ref_count)? 0 :
(adjust + re->ref_count > 65535)? 65535 :
re->ref_count + adjust;
return re->ref_count;
}
/* End of pcre_refcount.c */

File diff suppressed because it is too large Load diff

View file

@ -1,98 +0,0 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2012 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
/* This module contains the external function pcre_version(), which returns a
string that identifies the PCRE version that is in use. */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "pcre_internal.h"
/*************************************************
* Return version string *
*************************************************/
/* These macros are the standard way of turning unquoted text into C strings.
They allow macros like PCRE_MAJOR to be defined without quotes, which is
convenient for user programs that want to test its value. */
#define STRING(a) # a
#define XSTRING(s) STRING(s)
/* A problem turned up with PCRE_PRERELEASE, which is defined empty for
production releases. Originally, it was used naively in this code:
return XSTRING(PCRE_MAJOR)
"." XSTRING(PCRE_MINOR)
XSTRING(PCRE_PRERELEASE)
" " XSTRING(PCRE_DATE);
However, when PCRE_PRERELEASE is empty, this leads to an attempted expansion of
STRING(). The C standard states: "If (before argument substitution) any
argument consists of no preprocessing tokens, the behavior is undefined." It
turns out the gcc treats this case as a single empty string - which is what we
really want - but Visual C grumbles about the lack of an argument for the
macro. Unfortunately, both are within their rights. To cope with both ways of
handling this, I had resort to some messy hackery that does a test at run time.
I could find no way of detecting that a macro is defined as an empty string at
pre-processor time. This hack uses a standard trick for avoiding calling
the STRING macro with an empty argument when doing the test. */
#if defined COMPILE_PCRE8
PCRE_EXP_DEFN const char * PCRE_CALL_CONVENTION
pcre_version(void)
#elif defined COMPILE_PCRE16
PCRE_EXP_DEFN const char * PCRE_CALL_CONVENTION
pcre16_version(void)
#elif defined COMPILE_PCRE32
PCRE_EXP_DEFN const char * PCRE_CALL_CONVENTION
pcre32_version(void)
#endif
{
return (XSTRING(Z PCRE_PRERELEASE)[1] == 0)?
XSTRING(PCRE_MAJOR.PCRE_MINOR PCRE_DATE) :
XSTRING(PCRE_MAJOR.PCRE_MINOR) XSTRING(PCRE_PRERELEASE PCRE_DATE);
}
/* End of pcre_version.c */

View file

@ -1,406 +0,0 @@
/*************************************************
* PCRE DEMONSTRATION PROGRAM *
*************************************************/
/* This is a demonstration program to illustrate the most straightforward ways
of calling the PCRE regular expression library from a C program. See the
pcresample documentation for a short discussion ("man pcresample" if you have
the PCRE man pages installed).
In Unix-like environments, if PCRE is installed in your standard system
libraries, you should be able to compile this program using this command:
gcc -Wall pcredemo.c -lpcre -o pcredemo
If PCRE is not installed in a standard place, it is likely to be installed with
support for the pkg-config mechanism. If you have pkg-config, you can compile
this program using this command:
gcc -Wall pcredemo.c `pkg-config --cflags --libs libpcre` -o pcredemo
If you do not have pkg-config, you may have to use this:
gcc -Wall pcredemo.c -I/usr/local/include -L/usr/local/lib \
-R/usr/local/lib -lpcre -o pcredemo
Replace "/usr/local/include" and "/usr/local/lib" with wherever the include and
library files for PCRE are installed on your system. Only some operating
systems (e.g. Solaris) use the -R option.
Building under Windows:
If you want to statically link this program against a non-dll .a file, you must
define PCRE_STATIC before including pcre.h, otherwise the pcre_malloc() and
pcre_free() exported functions will be declared __declspec(dllimport), with
unwanted results. So in this environment, uncomment the following line. */
/* #define PCRE_STATIC */
#include <stdio.h>
#include <string.h>
#include <pcre.h>
#define OVECCOUNT 30 /* should be a multiple of 3 */
int main(int argc, char **argv)
{
pcre *re;
const char *error;
char *pattern;
char *subject;
unsigned char *name_table;
unsigned int option_bits;
int erroffset;
int find_all;
int crlf_is_newline;
int namecount;
int name_entry_size;
int ovector[OVECCOUNT];
int subject_length;
int rc, i;
int utf8;
/**************************************************************************
* First, sort out the command line. There is only one possible option at *
* the moment, "-g" to request repeated matching to find all occurrences, *
* like Perl's /g option. We set the variable find_all to a non-zero value *
* if the -g option is present. Apart from that, there must be exactly two *
* arguments. *
**************************************************************************/
find_all = 0;
for (i = 1; i < argc; i++)
{
if (strcmp(argv[i], "-g") == 0) find_all = 1;
else break;
}
/* After the options, we require exactly two arguments, which are the pattern,
and the subject string. */
if (argc - i != 2)
{
printf("Two arguments required: a regex and a subject string\n");
return 1;
}
pattern = argv[i];
subject = argv[i+1];
subject_length = (int)strlen(subject);
/*************************************************************************
* Now we are going to compile the regular expression pattern, and handle *
* and errors that are detected. *
*************************************************************************/
re = pcre_compile(
pattern, /* the pattern */
0, /* default options */
&error, /* for error message */
&erroffset, /* for error offset */
NULL); /* use default character tables */
/* Compilation failed: print the error message and exit */
if (re == NULL)
{
printf("PCRE compilation failed at offset %d: %s\n", erroffset, error);
return 1;
}
/*************************************************************************
* If the compilation succeeded, we call PCRE again, in order to do a *
* pattern match against the subject string. This does just ONE match. If *
* further matching is needed, it will be done below. *
*************************************************************************/
rc = pcre_exec(
re, /* the compiled pattern */
NULL, /* no extra data - we didn't study the pattern */
subject, /* the subject string */
subject_length, /* the length of the subject */
0, /* start at offset 0 in the subject */
0, /* default options */
ovector, /* output vector for substring information */
OVECCOUNT); /* number of elements in the output vector */
/* Matching failed: handle error cases */
if (rc < 0)
{
switch(rc)
{
case PCRE_ERROR_NOMATCH: printf("No match\n"); break;
/*
Handle other special cases if you like
*/
default: printf("Matching error %d\n", rc); break;
}
pcre_free(re); /* Release memory used for the compiled pattern */
return 1;
}
/* Match succeded */
printf("\nMatch succeeded at offset %d\n", ovector[0]);
/*************************************************************************
* We have found the first match within the subject string. If the output *
* vector wasn't big enough, say so. Then output any substrings that were *
* captured. *
*************************************************************************/
/* The output vector wasn't big enough */
if (rc == 0)
{
rc = OVECCOUNT/3;
printf("ovector only has room for %d captured substrings\n", rc - 1);
}
/* Show substrings stored in the output vector by number. Obviously, in a real
application you might want to do things other than print them. */
for (i = 0; i < rc; i++)
{
char *substring_start = subject + ovector[2*i];
int substring_length = ovector[2*i+1] - ovector[2*i];
printf("%2d: %.*s\n", i, substring_length, substring_start);
}
/**************************************************************************
* That concludes the basic part of this demonstration program. We have *
* compiled a pattern, and performed a single match. The code that follows *
* shows first how to access named substrings, and then how to code for *
* repeated matches on the same subject. *
**************************************************************************/
/* See if there are any named substrings, and if so, show them by name. First
we have to extract the count of named parentheses from the pattern. */
(void)pcre_fullinfo(
re, /* the compiled pattern */
NULL, /* no extra data - we didn't study the pattern */
PCRE_INFO_NAMECOUNT, /* number of named substrings */
&namecount); /* where to put the answer */
if (namecount <= 0) printf("No named substrings\n"); else
{
unsigned char *tabptr;
printf("Named substrings\n");
/* Before we can access the substrings, we must extract the table for
translating names to numbers, and the size of each entry in the table. */
(void)pcre_fullinfo(
re, /* the compiled pattern */
NULL, /* no extra data - we didn't study the pattern */
PCRE_INFO_NAMETABLE, /* address of the table */
&name_table); /* where to put the answer */
(void)pcre_fullinfo(
re, /* the compiled pattern */
NULL, /* no extra data - we didn't study the pattern */
PCRE_INFO_NAMEENTRYSIZE, /* size of each entry in the table */
&name_entry_size); /* where to put the answer */
/* Now we can scan the table and, for each entry, print the number, the name,
and the substring itself. */
tabptr = name_table;
for (i = 0; i < namecount; i++)
{
int n = (tabptr[0] << 8) | tabptr[1];
printf("(%d) %*s: %.*s\n", n, name_entry_size - 3, tabptr + 2,
ovector[2*n+1] - ovector[2*n], subject + ovector[2*n]);
tabptr += name_entry_size;
}
}
/*************************************************************************
* If the "-g" option was given on the command line, we want to continue *
* to search for additional matches in the subject string, in a similar *
* way to the /g option in Perl. This turns out to be trickier than you *
* might think because of the possibility of matching an empty string. *
* What happens is as follows: *
* *
* If the previous match was NOT for an empty string, we can just start *
* the next match at the end of the previous one. *
* *
* If the previous match WAS for an empty string, we can't do that, as it *
* would lead to an infinite loop. Instead, a special call of pcre_exec() *
* is made with the PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED flags set. *
* The first of these tells PCRE that an empty string at the start of the *
* subject is not a valid match; other possibilities must be tried. The *
* second flag restricts PCRE to one match attempt at the initial string *
* position. If this match succeeds, an alternative to the empty string *
* match has been found, and we can print it and proceed round the loop, *
* advancing by the length of whatever was found. If this match does not *
* succeed, we still stay in the loop, advancing by just one character. *
* In UTF-8 mode, which can be set by (*UTF8) in the pattern, this may be *
* more than one byte. *
* *
* However, there is a complication concerned with newlines. When the *
* newline convention is such that CRLF is a valid newline, we must *
* advance by two characters rather than one. The newline convention can *
* be set in the regex by (*CR), etc.; if not, we must find the default. *
*************************************************************************/
if (!find_all) /* Check for -g */
{
pcre_free(re); /* Release the memory used for the compiled pattern */
return 0; /* Finish unless -g was given */
}
/* Before running the loop, check for UTF-8 and whether CRLF is a valid newline
sequence. First, find the options with which the regex was compiled; extract
the UTF-8 state, and mask off all but the newline options. */
(void)pcre_fullinfo(re, NULL, PCRE_INFO_OPTIONS, &option_bits);
utf8 = option_bits & PCRE_UTF8;
option_bits &= PCRE_NEWLINE_CR|PCRE_NEWLINE_LF|PCRE_NEWLINE_CRLF|
PCRE_NEWLINE_ANY|PCRE_NEWLINE_ANYCRLF;
/* If no newline options were set, find the default newline convention from the
build configuration. */
if (option_bits == 0)
{
int d;
(void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
/* Note that these values are always the ASCII ones, even in
EBCDIC environments. CR = 13, NL = 10. */
option_bits = (d == 13)? PCRE_NEWLINE_CR :
(d == 10)? PCRE_NEWLINE_LF :
(d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
(d == -2)? PCRE_NEWLINE_ANYCRLF :
(d == -1)? PCRE_NEWLINE_ANY : 0;
}
/* See if CRLF is a valid newline sequence. */
crlf_is_newline =
option_bits == PCRE_NEWLINE_ANY ||
option_bits == PCRE_NEWLINE_CRLF ||
option_bits == PCRE_NEWLINE_ANYCRLF;
/* Loop for second and subsequent matches */
for (;;)
{
int options = 0; /* Normally no options */
int start_offset = ovector[1]; /* Start at end of previous match */
/* If the previous match was for an empty string, we are finished if we are
at the end of the subject. Otherwise, arrange to run another match at the
same point to see if a non-empty match can be found. */
if (ovector[0] == ovector[1])
{
if (ovector[0] == subject_length) break;
options = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
}
/* Run the next matching operation */
rc = pcre_exec(
re, /* the compiled pattern */
NULL, /* no extra data - we didn't study the pattern */
subject, /* the subject string */
subject_length, /* the length of the subject */
start_offset, /* starting offset in the subject */
options, /* options */
ovector, /* output vector for substring information */
OVECCOUNT); /* number of elements in the output vector */
/* This time, a result of NOMATCH isn't an error. If the value in "options"
is zero, it just means we have found all possible matches, so the loop ends.
Otherwise, it means we have failed to find a non-empty-string match at a
point where there was a previous empty-string match. In this case, we do what
Perl does: advance the matching position by one character, and continue. We
do this by setting the "end of previous match" offset, because that is picked
up at the top of the loop as the point at which to start again.
There are two complications: (a) When CRLF is a valid newline sequence, and
the current position is just before it, advance by an extra byte. (b)
Otherwise we must ensure that we skip an entire UTF-8 character if we are in
UTF-8 mode. */
if (rc == PCRE_ERROR_NOMATCH)
{
if (options == 0) break; /* All matches found */
ovector[1] = start_offset + 1; /* Advance one byte */
if (crlf_is_newline && /* If CRLF is newline & */
start_offset < subject_length - 1 && /* we are at CRLF, */
subject[start_offset] == '\r' &&
subject[start_offset + 1] == '\n')
ovector[1] += 1; /* Advance by one more. */
else if (utf8) /* Otherwise, ensure we */
{ /* advance a whole UTF-8 */
while (ovector[1] < subject_length) /* character. */
{
if ((subject[ovector[1]] & 0xc0) != 0x80) break;
ovector[1] += 1;
}
}
continue; /* Go round the loop again */
}
/* Other matching errors are not recoverable. */
if (rc < 0)
{
printf("Matching error %d\n", rc);
pcre_free(re); /* Release memory used for the compiled pattern */
return 1;
}
/* Match succeded */
printf("\nMatch succeeded again at offset %d\n", ovector[0]);
/* The match succeeded, but the output vector wasn't big enough. */
if (rc == 0)
{
rc = OVECCOUNT/3;
printf("ovector only has room for %d captured substrings\n", rc - 1);
}
/* As before, show substrings stored in the output vector by number, and then
also any named substrings. */
for (i = 0; i < rc; i++)
{
char *substring_start = subject + ovector[2*i];
int substring_length = ovector[2*i+1] - ovector[2*i];
printf("%2d: %.*s\n", i, substring_length, substring_start);
}
if (namecount <= 0) printf("No named substrings\n"); else
{
unsigned char *tabptr = name_table;
printf("Named substrings\n");
for (i = 0; i < namecount; i++)
{
int n = (tabptr[0] << 8) | tabptr[1];
printf("(%d) %*s: %.*s\n", n, name_entry_size - 3, tabptr + 2,
ovector[2*n+1] - ovector[2*n], subject + ovector[2*n]);
tabptr += name_entry_size;
}
}
} /* End of loop to find second and subsequent matches */
printf("\n");
pcre_free(re); /* Release memory used for the compiled pattern */
return 0;
}
/* End of pcredemo.c */

View file

@ -1,432 +0,0 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2017 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
/* This module is a wrapper that provides a POSIX API to the underlying PCRE
functions. */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
/* Ensure that the PCREPOSIX_EXP_xxx macros are set appropriately for
compiling these functions. This must come before including pcreposix.h, where
they are set for an application (using these functions) if they have not
previously been set. */
#if defined(_WIN32) && !defined(PCRE_STATIC)
# define PCREPOSIX_EXP_DECL extern __declspec(dllexport)
# define PCREPOSIX_EXP_DEFN __declspec(dllexport)
#endif
/* We include pcre.h before pcre_internal.h so that the PCRE library functions
are declared as "import" for Windows by defining PCRE_EXP_DECL as "import".
This is needed even though pcre_internal.h itself includes pcre.h, because it
does so after it has set PCRE_EXP_DECL to "export" if it is not already set. */
#include "pcre.h"
#include "pcre_internal.h"
#include "pcreposix.h"
/* Table to translate PCRE compile time error codes into POSIX error codes. */
static const int eint[] = {
0, /* no error */
REG_EESCAPE, /* \ at end of pattern */
REG_EESCAPE, /* \c at end of pattern */
REG_EESCAPE, /* unrecognized character follows \ */
REG_BADBR, /* numbers out of order in {} quantifier */
/* 5 */
REG_BADBR, /* number too big in {} quantifier */
REG_EBRACK, /* missing terminating ] for character class */
REG_ECTYPE, /* invalid escape sequence in character class */
REG_ERANGE, /* range out of order in character class */
REG_BADRPT, /* nothing to repeat */
/* 10 */
REG_BADRPT, /* operand of unlimited repeat could match the empty string */
REG_ASSERT, /* internal error: unexpected repeat */
REG_BADPAT, /* unrecognized character after (? */
REG_BADPAT, /* POSIX named classes are supported only within a class */
REG_EPAREN, /* missing ) */
/* 15 */
REG_ESUBREG, /* reference to non-existent subpattern */
REG_INVARG, /* erroffset passed as NULL */
REG_INVARG, /* unknown option bit(s) set */
REG_EPAREN, /* missing ) after comment */
REG_ESIZE, /* parentheses nested too deeply */
/* 20 */
REG_ESIZE, /* regular expression too large */
REG_ESPACE, /* failed to get memory */
REG_EPAREN, /* unmatched parentheses */
REG_ASSERT, /* internal error: code overflow */
REG_BADPAT, /* unrecognized character after (?< */
/* 25 */
REG_BADPAT, /* lookbehind assertion is not fixed length */
REG_BADPAT, /* malformed number or name after (?( */
REG_BADPAT, /* conditional group contains more than two branches */
REG_BADPAT, /* assertion expected after (?( */
REG_BADPAT, /* (?R or (?[+-]digits must be followed by ) */
/* 30 */
REG_ECTYPE, /* unknown POSIX class name */
REG_BADPAT, /* POSIX collating elements are not supported */
REG_INVARG, /* this version of PCRE is not compiled with PCRE_UTF8 support */
REG_BADPAT, /* spare error */
REG_BADPAT, /* character value in \x{} or \o{} is too large */
/* 35 */
REG_BADPAT, /* invalid condition (?(0) */
REG_BADPAT, /* \C not allowed in lookbehind assertion */
REG_EESCAPE, /* PCRE does not support \L, \l, \N, \U, or \u */
REG_BADPAT, /* number after (?C is > 255 */
REG_BADPAT, /* closing ) for (?C expected */
/* 40 */
REG_BADPAT, /* recursive call could loop indefinitely */
REG_BADPAT, /* unrecognized character after (?P */
REG_BADPAT, /* syntax error in subpattern name (missing terminator) */
REG_BADPAT, /* two named subpatterns have the same name */
REG_BADPAT, /* invalid UTF-8 string */
/* 45 */
REG_BADPAT, /* support for \P, \p, and \X has not been compiled */
REG_BADPAT, /* malformed \P or \p sequence */
REG_BADPAT, /* unknown property name after \P or \p */
REG_BADPAT, /* subpattern name is too long (maximum 32 characters) */
REG_BADPAT, /* too many named subpatterns (maximum 10,000) */
/* 50 */
REG_BADPAT, /* repeated subpattern is too long */
REG_BADPAT, /* octal value is greater than \377 (not in UTF-8 mode) */
REG_BADPAT, /* internal error: overran compiling workspace */
REG_BADPAT, /* internal error: previously-checked referenced subpattern not found */
REG_BADPAT, /* DEFINE group contains more than one branch */
/* 55 */
REG_BADPAT, /* repeating a DEFINE group is not allowed */
REG_INVARG, /* inconsistent NEWLINE options */
REG_BADPAT, /* \g is not followed followed by an (optionally braced) non-zero number */
REG_BADPAT, /* a numbered reference must not be zero */
REG_BADPAT, /* an argument is not allowed for (*ACCEPT), (*FAIL), or (*COMMIT) */
/* 60 */
REG_BADPAT, /* (*VERB) not recognized */
REG_BADPAT, /* number is too big */
REG_BADPAT, /* subpattern name expected */
REG_BADPAT, /* digit expected after (?+ */
REG_BADPAT, /* ] is an invalid data character in JavaScript compatibility mode */
/* 65 */
REG_BADPAT, /* different names for subpatterns of the same number are not allowed */
REG_BADPAT, /* (*MARK) must have an argument */
REG_INVARG, /* this version of PCRE is not compiled with PCRE_UCP support */
REG_BADPAT, /* \c must be followed by an ASCII character */
REG_BADPAT, /* \k is not followed by a braced, angle-bracketed, or quoted name */
/* 70 */
REG_BADPAT, /* internal error: unknown opcode in find_fixedlength() */
REG_BADPAT, /* \N is not supported in a class */
REG_BADPAT, /* too many forward references */
REG_BADPAT, /* disallowed UTF-8/16/32 code point (>= 0xd800 && <= 0xdfff) */
REG_BADPAT, /* invalid UTF-16 string (should not occur) */
/* 75 */
REG_BADPAT, /* overlong MARK name */
REG_BADPAT, /* character value in \u.... sequence is too large */
REG_BADPAT, /* invalid UTF-32 string (should not occur) */
REG_BADPAT, /* setting UTF is disabled by the application */
REG_BADPAT, /* non-hex character in \\x{} (closing brace missing?) */
/* 80 */
REG_BADPAT, /* non-octal character in \o{} (closing brace missing?) */
REG_BADPAT, /* missing opening brace after \o */
REG_BADPAT, /* parentheses too deeply nested */
REG_BADPAT, /* invalid range in character class */
REG_BADPAT, /* group name must start with a non-digit */
/* 85 */
REG_BADPAT, /* parentheses too deeply nested (stack check) */
REG_BADPAT, /* missing digits in \x{} or \o{} */
REG_BADPAT /* pattern too complicated */
};
/* Table of texts corresponding to POSIX error codes */
static const char *const pstring[] = {
"", /* Dummy for value 0 */
"internal error", /* REG_ASSERT */
"invalid repeat counts in {}", /* BADBR */
"pattern error", /* BADPAT */
"? * + invalid", /* BADRPT */
"unbalanced {}", /* EBRACE */
"unbalanced []", /* EBRACK */
"collation error - not relevant", /* ECOLLATE */
"bad class", /* ECTYPE */
"bad escape sequence", /* EESCAPE */
"empty expression", /* EMPTY */
"unbalanced ()", /* EPAREN */
"bad range inside []", /* ERANGE */
"expression too big", /* ESIZE */
"failed to get memory", /* ESPACE */
"bad back reference", /* ESUBREG */
"bad argument", /* INVARG */
"match failed" /* NOMATCH */
};
/*************************************************
* Translate error code to string *
*************************************************/
PCREPOSIX_EXP_DEFN size_t PCRE_CALL_CONVENTION
regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size)
{
const char *message, *addmessage;
size_t length, addlength;
message = (errcode >= (int)(sizeof(pstring)/sizeof(char *)))?
"unknown error code" : pstring[errcode];
length = strlen(message) + 1;
addmessage = " at offset ";
addlength = (preg != NULL && (int)preg->re_erroffset != -1)?
strlen(addmessage) + 6 : 0;
if (errbuf_size > 0)
{
if (addlength > 0 && errbuf_size >= length + addlength)
sprintf(errbuf, "%s%s%-6d", message, addmessage, (int)preg->re_erroffset);
else
{
strncpy(errbuf, message, errbuf_size - 1);
errbuf[errbuf_size-1] = 0;
}
}
return length + addlength;
}
/*************************************************
* Free store held by a regex *
*************************************************/
PCREPOSIX_EXP_DEFN void PCRE_CALL_CONVENTION
regfree(regex_t *preg)
{
(PUBL(free))(preg->re_pcre);
}
/*************************************************
* Compile a regular expression *
*************************************************/
/*
Arguments:
preg points to a structure for recording the compiled expression
pattern the pattern to compile
cflags compilation flags
Returns: 0 on success
various non-zero codes on failure
*/
PCREPOSIX_EXP_DEFN int PCRE_CALL_CONVENTION
regcomp(regex_t *preg, const char *pattern, int cflags)
{
const char *errorptr;
int erroffset;
int errorcode;
int options = 0;
int re_nsub = 0;
if ((cflags & REG_ICASE) != 0) options |= PCRE_CASELESS;
if ((cflags & REG_NEWLINE) != 0) options |= PCRE_MULTILINE;
if ((cflags & REG_DOTALL) != 0) options |= PCRE_DOTALL;
if ((cflags & REG_NOSUB) != 0) options |= PCRE_NO_AUTO_CAPTURE;
if ((cflags & REG_UTF8) != 0) options |= PCRE_UTF8;
if ((cflags & REG_UCP) != 0) options |= PCRE_UCP;
if ((cflags & REG_UNGREEDY) != 0) options |= PCRE_UNGREEDY;
preg->re_pcre = pcre_compile2(pattern, options, &errorcode, &errorptr,
&erroffset, NULL);
preg->re_erroffset = erroffset;
/* Safety: if the error code is too big for the translation vector (which
should not happen, but we all make mistakes), return REG_BADPAT. */
if (preg->re_pcre == NULL)
{
return (errorcode < (int)(sizeof(eint)/sizeof(const int)))?
eint[errorcode] : REG_BADPAT;
}
(void)pcre_fullinfo((const pcre *)preg->re_pcre, NULL, PCRE_INFO_CAPTURECOUNT,
&re_nsub);
preg->re_nsub = (size_t)re_nsub;
return 0;
}
/*************************************************
* Match a regular expression *
*************************************************/
/* Unfortunately, PCRE requires 3 ints of working space for each captured
substring, so we have to get and release working store instead of just using
the POSIX structures as was done in earlier releases when PCRE needed only 2
ints. However, if the number of possible capturing brackets is small, use a
block of store on the stack, to reduce the use of malloc/free. The threshold is
in a macro that can be changed at configure time.
If REG_NOSUB was specified at compile time, the PCRE_NO_AUTO_CAPTURE flag will
be set. When this is the case, the nmatch and pmatch arguments are ignored, and
the only result is yes/no/error. */
PCREPOSIX_EXP_DEFN int PCRE_CALL_CONVENTION
regexec(const regex_t *preg, const char *string, size_t nmatch,
regmatch_t pmatch[], int eflags)
{
int rc, so, eo;
int options = 0;
int *ovector = NULL;
int small_ovector[POSIX_MALLOC_THRESHOLD * 3];
BOOL allocated_ovector = FALSE;
BOOL nosub =
(REAL_PCRE_OPTIONS((const pcre *)preg->re_pcre) & PCRE_NO_AUTO_CAPTURE) != 0;
if ((eflags & REG_NOTBOL) != 0) options |= PCRE_NOTBOL;
if ((eflags & REG_NOTEOL) != 0) options |= PCRE_NOTEOL;
if ((eflags & REG_NOTEMPTY) != 0) options |= PCRE_NOTEMPTY;
((regex_t *)preg)->re_erroffset = (size_t)(-1); /* Only has meaning after compile */
/* When no string data is being returned, or no vector has been passed in which
to put it, ensure that nmatch is zero. Otherwise, ensure the vector for holding
the return data is large enough. */
if (nosub || pmatch == NULL) nmatch = 0;
else if (nmatch > 0)
{
if (nmatch <= POSIX_MALLOC_THRESHOLD)
{
ovector = &(small_ovector[0]);
}
else
{
if (nmatch > INT_MAX/(sizeof(int) * 3)) return REG_ESPACE;
ovector = (int *)malloc(sizeof(int) * nmatch * 3);
if (ovector == NULL) return REG_ESPACE;
allocated_ovector = TRUE;
}
}
/* REG_STARTEND is a BSD extension, to allow for non-NUL-terminated strings.
The man page from OS X says "REG_STARTEND affects only the location of the
string, not how it is matched". That is why the "so" value is used to bump the
start location rather than being passed as a PCRE "starting offset". */
if ((eflags & REG_STARTEND) != 0)
{
if (pmatch == NULL) return REG_INVARG;
so = pmatch[0].rm_so;
eo = pmatch[0].rm_eo;
}
else
{
so = 0;
eo = (int)strlen(string);
}
rc = pcre_exec((const pcre *)preg->re_pcre, NULL, string + so, (eo - so),
0, options, ovector, (int)(nmatch * 3));
if (rc == 0) rc = (int)nmatch; /* All captured slots were filled in */
/* Successful match */
if (rc >= 0)
{
size_t i;
if (!nosub)
{
for (i = 0; i < (size_t)rc; i++)
{
pmatch[i].rm_so = ovector[i*2] + so;
pmatch[i].rm_eo = ovector[i*2+1] + so;
}
if (allocated_ovector) free(ovector);
for (; i < nmatch; i++) pmatch[i].rm_so = pmatch[i].rm_eo = -1;
}
return 0;
}
/* Unsuccessful match */
if (allocated_ovector) free(ovector);
switch(rc)
{
/* ========================================================================== */
/* These cases are never obeyed. This is a fudge that causes a compile-time
error if the vector eint, which is indexed by compile-time error number, is
not the correct length. It seems to be the only way to do such a check at
compile time, as the sizeof() operator does not work in the C preprocessor.
As all the PCRE_ERROR_xxx values are negative, we can use 0 and 1. */
case 0:
case (sizeof(eint)/sizeof(int) == ERRCOUNT):
return REG_ASSERT;
/* ========================================================================== */
case PCRE_ERROR_NOMATCH: return REG_NOMATCH;
case PCRE_ERROR_NULL: return REG_INVARG;
case PCRE_ERROR_BADOPTION: return REG_INVARG;
case PCRE_ERROR_BADMAGIC: return REG_INVARG;
case PCRE_ERROR_UNKNOWN_NODE: return REG_ASSERT;
case PCRE_ERROR_NOMEMORY: return REG_ESPACE;
case PCRE_ERROR_MATCHLIMIT: return REG_ESPACE;
case PCRE_ERROR_BADUTF8: return REG_INVARG;
case PCRE_ERROR_BADUTF8_OFFSET: return REG_INVARG;
case PCRE_ERROR_BADMODE: return REG_INVARG;
default: return REG_ASSERT;
}
}
/* End of pcreposix.c */

View file

@ -1,146 +0,0 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
#ifndef _PCREPOSIX_H
#define _PCREPOSIX_H
/* This is the header for the POSIX wrapper interface to the PCRE Perl-
Compatible Regular Expression library. It defines the things POSIX says should
be there. I hope.
Copyright (c) 1997-2012 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
/* Have to include stdlib.h in order to ensure that size_t is defined. */
#include <stdlib.h>
/* Allow for C++ users */
#ifdef __cplusplus
extern "C" {
#endif
/* Options, mostly defined by POSIX, but with some extras. */
#define REG_ICASE 0x0001 /* Maps to PCRE_CASELESS */
#define REG_NEWLINE 0x0002 /* Maps to PCRE_MULTILINE */
#define REG_NOTBOL 0x0004 /* Maps to PCRE_NOTBOL */
#define REG_NOTEOL 0x0008 /* Maps to PCRE_NOTEOL */
#define REG_DOTALL 0x0010 /* NOT defined by POSIX; maps to PCRE_DOTALL */
#define REG_NOSUB 0x0020 /* Maps to PCRE_NO_AUTO_CAPTURE */
#define REG_UTF8 0x0040 /* NOT defined by POSIX; maps to PCRE_UTF8 */
#define REG_STARTEND 0x0080 /* BSD feature: pass subject string by so,eo */
#define REG_NOTEMPTY 0x0100 /* NOT defined by POSIX; maps to PCRE_NOTEMPTY */
#define REG_UNGREEDY 0x0200 /* NOT defined by POSIX; maps to PCRE_UNGREEDY */
#define REG_UCP 0x0400 /* NOT defined by POSIX; maps to PCRE_UCP */
/* This is not used by PCRE, but by defining it we make it easier
to slot PCRE into existing programs that make POSIX calls. */
#define REG_EXTENDED 0
/* Error values. Not all these are relevant or used by the wrapper. */
enum {
REG_ASSERT = 1, /* internal error ? */
REG_BADBR, /* invalid repeat counts in {} */
REG_BADPAT, /* pattern error */
REG_BADRPT, /* ? * + invalid */
REG_EBRACE, /* unbalanced {} */
REG_EBRACK, /* unbalanced [] */
REG_ECOLLATE, /* collation error - not relevant */
REG_ECTYPE, /* bad class */
REG_EESCAPE, /* bad escape sequence */
REG_EMPTY, /* empty expression */
REG_EPAREN, /* unbalanced () */
REG_ERANGE, /* bad range inside [] */
REG_ESIZE, /* expression too big */
REG_ESPACE, /* failed to get memory */
REG_ESUBREG, /* bad back reference */
REG_INVARG, /* bad argument */
REG_NOMATCH /* match failed */
};
/* The structure representing a compiled regular expression. */
typedef struct {
void *re_pcre;
size_t re_nsub;
size_t re_erroffset;
} regex_t;
/* The structure in which a captured offset is returned. */
typedef int regoff_t;
typedef struct {
regoff_t rm_so;
regoff_t rm_eo;
} regmatch_t;
/* When an application links to a PCRE DLL in Windows, the symbols that are
imported have to be identified as such. When building PCRE, the appropriate
export settings are needed, and are set in pcreposix.c before including this
file. */
#if defined(_WIN32) && !defined(PCRE_STATIC) && !defined(PCREPOSIX_EXP_DECL)
# define PCREPOSIX_EXP_DECL extern __declspec(dllimport)
# define PCREPOSIX_EXP_DEFN __declspec(dllimport)
#endif
/* By default, we use the standard "extern" declarations. */
#ifndef PCREPOSIX_EXP_DECL
# ifdef __cplusplus
# define PCREPOSIX_EXP_DECL extern "C"
# define PCREPOSIX_EXP_DEFN extern "C"
# else
# define PCREPOSIX_EXP_DECL extern
# define PCREPOSIX_EXP_DEFN extern
# endif
#endif
/* The functions */
PCREPOSIX_EXP_DECL int regcomp(regex_t *, const char *, int);
PCREPOSIX_EXP_DECL int regexec(const regex_t *, const char *, size_t,
regmatch_t *, int);
PCREPOSIX_EXP_DECL size_t regerror(int, const regex_t *, char *, size_t);
PCREPOSIX_EXP_DECL void regfree(regex_t *);
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* End of pcreposix.h */

Binary file not shown.

View file

@ -1,3 +0,0 @@
testdata/grepinputv
testdata/grepinputx

View file

@ -1,611 +0,0 @@
This is a file of miscellaneous text that is used as test data for checking
that the pcregrep command is working correctly. The file must be more than 24K
long so that it needs more than a single read() call to process it. New
features should be added at the end, because some of the tests involve the
output of line numbers, and we don't want these to change.
PATTERN at the start of a line.
In the middle of a line, PATTERN appears.
This pattern is in lower case.
Here follows a whole lot of stuff that makes the file over 24K long.
-------------------------------------------------------------------------------
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
-------------------------------------------------------------------------------
aaaaa0
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
bbbbbb
cccccccccccccccccccccccccccccccccccccccccc
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
eeeee
aaaaa2
ffffffffff
This is a line before the binary zero.
This line contains a binary zero here >< for testing.
This is a line after the binary zero.
ABOVE the elephant
ABOVE
ABOVE theatre
AB.VE
AB.VE the turtle
010203040506
PUT NEW DATA ABOVE THIS LINE.
=============================
Check up on PATTERN near the end.
This is the last line of this file.

View file

@ -1,15 +0,0 @@
triple: t1_txt s1_tag s_txt p_tag p_txt o_tag o_txt
triple: t2_txt s1_tag s_txt p_tag p_txt o_tag
Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
triple: t3_txt s2_tag s_txt p_tag p_txt o_tag o_txt
triple: t4_txt s1_tag s_txt p_tag p_txt o_tag o_txt
triple: t5_txt s1_tag s_txt p_tag p_txt o_tag
o_txt
triple: t6_txt s2_tag s_txt p_tag p_txt o_tag o_txt
triple: t7_txt s1_tag s_txt p_tag p_txt o_tag o_txt

View file

@ -1,11 +0,0 @@
X one
X two X three X four X five
X six
X seven…X eightX nineX ten
Before 111
Before 222Before 333…Match
After 111
After 222After 333
And so on and so on
And so on and so on

View file

@ -1,4 +0,0 @@
The quick brown
fox jumps
over the lazy dog.
This time it jumps and jumps and jumps.

View file

@ -1,43 +0,0 @@
This is a second file of input for the pcregrep tests.
Here is the pattern again.
Pattern
That time it was on a line by itself.
To pat or not to pat, that is the question.
complete pair
of lines
That was a complete pair
of lines all by themselves.
complete pair
of lines
And there they were again, to check line numbers.
one
two
three
four
five
six
seven
eight
nine
ten
eleven
twelve
thirteen
fourteen
fifteen
sixteen
seventeen
eighteen
nineteen
twenty
This line contains pattern not on a line by itself.
This is the last line of this file.

View file

@ -1,7 +0,0 @@
This is a file of patterns for testing the -f option. Don't include any blank
lines because they will match everything! This is no longer true, so have one.
pattern
line by itself
End of the list of patterns.

View file

@ -1,757 +0,0 @@
---------------------------- Test 1 ------------------------------
PATTERN at the start of a line.
In the middle of a line, PATTERN appears.
Check up on PATTERN near the end.
RC=0
---------------------------- Test 2 ------------------------------
PATTERN at the start of a line.
RC=0
---------------------------- Test 3 ------------------------------
7:PATTERN at the start of a line.
8:In the middle of a line, PATTERN appears.
10:This pattern is in lower case.
610:Check up on PATTERN near the end.
RC=0
---------------------------- Test 4 ------------------------------
4
RC=0
---------------------------- Test 5 ------------------------------
./testdata/grepinput:7:PATTERN at the start of a line.
./testdata/grepinput:8:In the middle of a line, PATTERN appears.
./testdata/grepinput:10:This pattern is in lower case.
./testdata/grepinput:610:Check up on PATTERN near the end.
./testdata/grepinputx:3:Here is the pattern again.
./testdata/grepinputx:5:Pattern
./testdata/grepinputx:42:This line contains pattern not on a line by itself.
RC=0
---------------------------- Test 6 ------------------------------
7:PATTERN at the start of a line.
8:In the middle of a line, PATTERN appears.
10:This pattern is in lower case.
610:Check up on PATTERN near the end.
3:Here is the pattern again.
5:Pattern
42:This line contains pattern not on a line by itself.
RC=0
---------------------------- Test 7 ------------------------------
./testdata/grepinput
./testdata/grepinputx
RC=0
---------------------------- Test 8 ------------------------------
./testdata/grepinput
RC=0
---------------------------- Test 9 ------------------------------
RC=0
---------------------------- Test 10 -----------------------------
RC=1
---------------------------- Test 11 -----------------------------
1:This is a second file of input for the pcregrep tests.
2:
4:
5:Pattern
6:That time it was on a line by itself.
7:
8:To pat or not to pat, that is the question.
9:
10:complete pair
11:of lines
12:
13:That was a complete pair
14:of lines all by themselves.
15:
16:complete pair
17:of lines
18:
19:And there they were again, to check line numbers.
20:
21:one
22:two
23:three
24:four
25:five
26:six
27:seven
28:eight
29:nine
30:ten
31:eleven
32:twelve
33:thirteen
34:fourteen
35:fifteen
36:sixteen
37:seventeen
38:eighteen
39:nineteen
40:twenty
41:
43:This is the last line of this file.
RC=0
---------------------------- Test 12 -----------------------------
Pattern
RC=0
---------------------------- Test 13 -----------------------------
Here is the pattern again.
That time it was on a line by itself.
seventeen
This line contains pattern not on a line by itself.
RC=0
---------------------------- Test 14 -----------------------------
./testdata/grepinputx:To pat or not to pat, that is the question.
RC=0
---------------------------- Test 15 -----------------------------
pcregrep: Error in command-line regex at offset 4: nothing to repeat
RC=2
---------------------------- Test 16 -----------------------------
pcregrep: Failed to open ./testdata/nonexistfile: No such file or directory
RC=2
---------------------------- Test 17 -----------------------------
features should be added at the end, because some of the tests involve the
output of line numbers, and we don't want these to change.
RC=0
---------------------------- Test 18 -----------------------------
4:features should be added at the end, because some of the tests involve the
output of line numbers, and we don't want these to change.
583:brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
-------------------------------------------------------------------------------
RC=0
---------------------------- Test 19 -----------------------------
Pattern
RC=0
---------------------------- Test 20 -----------------------------
10:complete pair
of lines
16:complete pair
of lines
RC=0
---------------------------- Test 21 -----------------------------
24:four
25-five
26-six
27-seven
--
34:fourteen
35-fifteen
36-sixteen
37-seventeen
RC=0
---------------------------- Test 22 -----------------------------
21-one
22-two
23-three
24:four
--
31-eleven
32-twelve
33-thirteen
34:fourteen
RC=0
---------------------------- Test 23 -----------------------------
one
two
three
four
five
six
seven
--
eleven
twelve
thirteen
fourteen
fifteen
sixteen
seventeen
RC=0
---------------------------- Test 24 -----------------------------
four
five
six
seven
eight
nine
ten
eleven
twelve
thirteen
fourteen
fifteen
sixteen
seventeen
eighteen
nineteen
twenty
This line contains pattern not on a line by itself.
This is the last line of this file.
RC=0
---------------------------- Test 25 -----------------------------
15-
16-complete pair
17-of lines
18-
19-And there they were again, to check line numbers.
20-
21-one
22-two
23-three
24:four
25-five
26-six
27-seven
28-eight
29-nine
30-ten
31-eleven
32-twelve
33-thirteen
34:fourteen
RC=0
---------------------------- Test 26 -----------------------------
complete pair
of lines
And there they were again, to check line numbers.
one
two
three
four
five
six
seven
eight
nine
ten
eleven
twelve
thirteen
fourteen
fifteen
sixteen
seventeen
eighteen
nineteen
twenty
This line contains pattern not on a line by itself.
This is the last line of this file.
RC=0
---------------------------- Test 27 -----------------------------
four
five
six
seven
eight
nine
ten
eleven
twelve
thirteen
fourteen
fifteen
sixteen
seventeen
eighteen
nineteen
twenty
This line contains pattern not on a line by itself.
This is the last line of this file.
RC=0
---------------------------- Test 28 -----------------------------
14-of lines all by themselves.
15-
16-complete pair
17-of lines
18-
19-And there they were again, to check line numbers.
20-
21-one
22-two
23-three
24:four
25-five
26-six
27-seven
28-eight
29-nine
30-ten
31-eleven
32-twelve
33-thirteen
34:fourteen
RC=0
---------------------------- Test 29 -----------------------------
of lines all by themselves.
complete pair
of lines
And there they were again, to check line numbers.
one
two
three
four
five
six
seven
eight
nine
ten
eleven
twelve
thirteen
fourteen
fifteen
sixteen
seventeen
eighteen
nineteen
twenty
This line contains pattern not on a line by itself.
This is the last line of this file.
RC=0
---------------------------- Test 30 -----------------------------
./testdata/grepinput-4-features should be added at the end, because some of the tests involve the
./testdata/grepinput-5-output of line numbers, and we don't want these to change.
./testdata/grepinput-6-
./testdata/grepinput:7:PATTERN at the start of a line.
./testdata/grepinput:8:In the middle of a line, PATTERN appears.
./testdata/grepinput-9-
./testdata/grepinput:10:This pattern is in lower case.
--
./testdata/grepinput-607-PUT NEW DATA ABOVE THIS LINE.
./testdata/grepinput-608-=============================
./testdata/grepinput-609-
./testdata/grepinput:610:Check up on PATTERN near the end.
--
./testdata/grepinputx-1-This is a second file of input for the pcregrep tests.
./testdata/grepinputx-2-
./testdata/grepinputx:3:Here is the pattern again.
./testdata/grepinputx-4-
./testdata/grepinputx:5:Pattern
--
./testdata/grepinputx-39-nineteen
./testdata/grepinputx-40-twenty
./testdata/grepinputx-41-
./testdata/grepinputx:42:This line contains pattern not on a line by itself.
RC=0
---------------------------- Test 31 -----------------------------
./testdata/grepinput:7:PATTERN at the start of a line.
./testdata/grepinput:8:In the middle of a line, PATTERN appears.
./testdata/grepinput-9-
./testdata/grepinput:10:This pattern is in lower case.
./testdata/grepinput-11-
./testdata/grepinput-12-Here follows a whole lot of stuff that makes the file over 24K long.
./testdata/grepinput-13-
--
./testdata/grepinput:610:Check up on PATTERN near the end.
./testdata/grepinput-611-This is the last line of this file.
--
./testdata/grepinputx:3:Here is the pattern again.
./testdata/grepinputx-4-
./testdata/grepinputx:5:Pattern
./testdata/grepinputx-6-That time it was on a line by itself.
./testdata/grepinputx-7-
./testdata/grepinputx-8-To pat or not to pat, that is the question.
--
./testdata/grepinputx:42:This line contains pattern not on a line by itself.
./testdata/grepinputx-43-This is the last line of this file.
RC=0
---------------------------- Test 32 -----------------------------
./testdata/grepinputx
RC=0
---------------------------- Test 33 -----------------------------
pcregrep: Failed to open ./testdata/grepnonexist: No such file or directory
RC=2
---------------------------- Test 34 -----------------------------
RC=2
---------------------------- Test 35 -----------------------------
./testdata/grepinput8
./testdata/grepinputx
RC=0
---------------------------- Test 36 -----------------------------
./testdata/grepinput3
./testdata/grepinputx
RC=0
---------------------------- Test 37 -----------------------------
aaaaa0
aaaaa2
010203040506
RC=0
======== STDERR ========
pcregrep: pcre_exec() gave error -8 while matching this text:
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
pcregrep: pcre_exec() gave error -8 while matching this text:
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
pcregrep: Error -8, -21 or -27 means that a resource limit was exceeded.
pcregrep: Check your regex for nested unlimited loops.
---------------------------- Test 38 ------------------------------
This line contains a binary zero here >< for testing.
RC=0
---------------------------- Test 39 ------------------------------
This is a line before the binary zero.
This line contains a binary zero here >< for testing.
RC=0
---------------------------- Test 40 ------------------------------
This line contains a binary zero here >< for testing.
This is a line after the binary zero.
RC=0
---------------------------- Test 41 ------------------------------
before the binary zero
after the binary zero
RC=0
---------------------------- Test 42 ------------------------------
./testdata/grepinput:595:before the binary zero
./testdata/grepinput:597:after the binary zero
RC=0
---------------------------- Test 43 ------------------------------
595:before
595:zero
596:zero
597:after
597:zero
RC=0
---------------------------- Test 44 ------------------------------
595:before
595:zero
596:zero
597:zero
RC=0
---------------------------- Test 45 ------------------------------
10:pattern
595:binary
596:binary
597:binary
RC=0
---------------------------- Test 46 ------------------------------
pcregrep: Error in 2nd command-line regex at offset 9: missing )
RC=2
---------------------------- Test 47 ------------------------------
AB.VE
RC=0
---------------------------- Test 48 ------------------------------
ABOVE the elephant
AB.VE
AB.VE the turtle
RC=0
---------------------------- Test 49 ------------------------------
ABOVE the elephant
AB.VE
AB.VE the turtle
PUT NEW DATA ABOVE THIS LINE.
RC=0
---------------------------- Test 50 ------------------------------
RC=1
---------------------------- Test 51 ------------------------------
over the lazy dog.
This time it jumps and jumps and jumps.
RC=0
---------------------------- Test 52 ------------------------------
fox jumps
This time it jumps and jumps and jumps.
RC=0
---------------------------- Test 53 ------------------------------
36972,6
36990,4
37024,4
37066,5
37083,4
RC=0
---------------------------- Test 54 ------------------------------
595:15,6
595:33,4
596:28,4
597:15,5
597:32,4
RC=0
---------------------------- Test 55 -----------------------------
Here is the pattern again.
That time it was on a line by itself.
This line contains pattern not on a line by itself.
RC=0
---------------------------- Test 56 -----------------------------
./testdata/grepinput:456
./testdata/grepinput3:0
./testdata/grepinput8:0
./testdata/grepinputv:1
./testdata/grepinputx:0
RC=0
---------------------------- Test 57 -----------------------------
./testdata/grepinput:456
./testdata/grepinputv:1
RC=0
---------------------------- Test 58 -----------------------------
PATTERN at the start of a line.
In the middle of a line, PATTERN appears.
Check up on PATTERN near the end.
RC=0
---------------------------- Test 59 -----------------------------
PATTERN at the start of a line.
In the middle of a line, PATTERN appears.
Check up on PATTERN near the end.
RC=0
---------------------------- Test 60 -----------------------------
PATTERN at the start of a line.
In the middle of a line, PATTERN appears.
Check up on PATTERN near the end.
RC=0
---------------------------- Test 61 -----------------------------
PATTERN at the start of a line.
In the middle of a line, PATTERN appears.
Check up on PATTERN near the end.
RC=0
---------------------------- Test 62 -----------------------------
pcregrep: pcre_exec() gave error -8 while matching text that starts:
This is a file of miscellaneous text that is used as test data for checking
that the pcregrep command is working correctly. The file must be more than 24K
long so that it needs more than a single read
pcregrep: Error -8, -21 or -27 means that a resource limit was exceeded.
pcregrep: Check your regex for nested unlimited loops.
RC=1
---------------------------- Test 63 -----------------------------
pcregrep: pcre_exec() gave error -21 while matching text that starts:
This is a file of miscellaneous text that is used as test data for checking
that the pcregrep command is working correctly. The file must be more than 24K
long so that it needs more than a single read
pcregrep: Error -8, -21 or -27 means that a resource limit was exceeded.
pcregrep: Check your regex for nested unlimited loops.
RC=1
---------------------------- Test 64 ------------------------------
appears
RC=0
---------------------------- Test 65 ------------------------------
pear
RC=0
---------------------------- Test 66 ------------------------------
RC=0
---------------------------- Test 67 ------------------------------
RC=0
---------------------------- Test 68 ------------------------------
pear
RC=0
---------------------------- Test 69 -----------------------------
1:This is a second file of input for the pcregrep tests.
2:
4:
5:Pattern
6:That time it was on a line by itself.
7:
8:To pat or not to pat, that is the question.
9:
10:complete pair
11:of lines
12:
13:That was a complete pair
14:of lines all by themselves.
15:
16:complete pair
17:of lines
18:
19:And there they were again, to check line numbers.
20:
21:one
22:two
23:three
24:four
25:five
26:six
27:seven
28:eight
29:nine
30:ten
31:eleven
32:twelve
33:thirteen
34:fourteen
35:fifteen
36:sixteen
37:seventeen
38:eighteen
39:nineteen
40:twenty
41:
43:This is the last line of this file.
RC=0
---------------------------- Test 70 -----------------------------
triple: t1_txt s1_tag s_txt p_tag p_txt o_tag o_txt
triple: t3_txt s2_tag s_txt p_tag p_txt o_tag o_txt
triple: t4_txt s1_tag s_txt p_tag p_txt o_tag o_txt
triple: t6_txt s2_tag s_txt p_tag p_txt o_tag o_txt
RC=0
---------------------------- Test 71 -----------------------------
01
RC=0
---------------------------- Test 72 -----------------------------
010203040506
RC=0
---------------------------- Test 73 -----------------------------
01
RC=0
---------------------------- Test 74 -----------------------------
01
02
RC=0
---------------------------- Test 75 -----------------------------
010203040506
RC=0
---------------------------- Test 76 -----------------------------
01
02
RC=0
---------------------------- Test 77 -----------------------------
01
03
RC=0
---------------------------- Test 78 -----------------------------
010203040506
RC=0
---------------------------- Test 79 -----------------------------
01
03
RC=0
---------------------------- Test 80 -----------------------------
01
RC=0
---------------------------- Test 81 -----------------------------
010203040506
RC=0
---------------------------- Test 82 -----------------------------
01
RC=0
---------------------------- Test 83 -----------------------------
pcregrep: line 4 of file ./testdata/grepinput3 is too long for the internal buffer
pcregrep: check the --buffer-size option
RC=2
---------------------------- Test 84 -----------------------------
testdata/grepinputv:fox jumps
testdata/grepinputx:complete pair
testdata/grepinputx:That was a complete pair
testdata/grepinputx:complete pair
testdata/grepinput3:triple: t7_txt s1_tag s_txt p_tag p_txt o_tag o_txt
RC=0
---------------------------- Test 85 -----------------------------
./testdata/grepinput3:Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
RC=0
---------------------------- Test 86 -----------------------------
Binary file ./testdata/grepbinary matches
RC=0
---------------------------- Test 87 -----------------------------
RC=1
---------------------------- Test 88 -----------------------------
Binary file ./testdata/grepbinary matches
RC=0
---------------------------- Test 89 -----------------------------
RC=1
---------------------------- Test 90 -----------------------------
RC=1
---------------------------- Test 91 -----------------------------
The quick brown fx jumps over the lazy dog.
RC=0
---------------------------- Test 92 -----------------------------
The quick brown fx jumps over the lazy dog.
RC=0
---------------------------- Test 93 -----------------------------
The quick brown fx jumps over the lazy dog.
RC=0
---------------------------- Test 94 -----------------------------
./testdata/grepinput8
./testdata/grepinputx
RC=0
---------------------------- Test 95 -----------------------------
testdata/grepinputx:complete pair
testdata/grepinputx:That was a complete pair
testdata/grepinputx:complete pair
RC=0
---------------------------- Test 96 -----------------------------
./testdata/grepinput3
./testdata/grepinput8
./testdata/grepinputx
RC=0
---------------------------- Test 97 -----------------------------
./testdata/grepinput3
./testdata/grepinputx
RC=0
---------------------------- Test 98 -----------------------------
./testdata/grepinputx
RC=0
---------------------------- Test 99 -----------------------------
./testdata/grepinput3
./testdata/grepinputx
RC=0
---------------------------- Test 100 ------------------------------
./testdata/grepinput:zerothe.
./testdata/grepinput:zeroa
./testdata/grepinput:zerothe.
RC=0
---------------------------- Test 101 ------------------------------
./testdata/grepinput:.|zero|the|.
./testdata/grepinput:zero|a
./testdata/grepinput:.|zero|the|.
RC=0
---------------------------- Test 102 -----------------------------
2:
5:
7:
9:
12:
14:
RC=0
---------------------------- Test 103 -----------------------------
RC=0
---------------------------- Test 104 -----------------------------
2:
5:
7:
9:
12:
14:
RC=0
---------------------------- Test 105 -----------------------------
triple: t1_txt s1_tag s_txt p_tag p_txt o_tag o_txt

triple: t2_txt s1_tag s_txt p_tag p_txt o_tag
Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.

triple: t3_txt s2_tag s_txt p_tag p_txt o_tag o_txt

triple: t4_txt s1_tag s_txt p_tag p_txt o_tag o_txt

triple: t5_txt s1_tag s_txt p_tag p_txt o_tag
o_txt

triple: t6_txt s2_tag s_txt p_tag p_txt o_tag o_txt

triple: t7_txt s1_tag s_txt p_tag p_txt o_tag o_txt
RC=0
---------------------------- Test 106 -----------------------------
a
RC=0
---------------------------- Test 107 -----------------------------
1:0,1
2:0,1
2:1,1
2:2,1
2:3,1
2:4,1
RC=0
---------------------------- Test 108 ------------------------------
RC=0
---------------------------- Test 109 -----------------------------
RC=0

View file

@ -1,12 +0,0 @@
---------------------------- Test U1 ------------------------------
1:X one
2:X two 3:X three 4:X four 5:X five
6:X six
7:X seven…8:X eight9:X nine10:X ten
RC=0
---------------------------- Test U2 ------------------------------
12-Before 111
13-Before 22214-Before 333…15:Match
16-After 111
17-After 22218-After 333
RC=0

View file

@ -1,16 +0,0 @@
---------------------------- Test N1 ------------------------------
1:abc 2:def ---------------------------- Test N2 ------------------------------
1:abc def
2:ghi
jkl---------------------------- Test N3 ------------------------------
2:def 3:
ghi
jkl---------------------------- Test N4 ------------------------------
2:ghi
jkl---------------------------- Test N5 ------------------------------
1:abc 2:def
3:ghi
4:jkl---------------------------- Test N6 ------------------------------
1:abc 2:def
3:ghi
4:jkl

View file

@ -1,2 +0,0 @@
xxx
jkl

Binary file not shown.

Some files were not shown because too many files have changed in this diff Show more