diff --git a/sapi/fuzzer/Makefile.frag b/sapi/fuzzer/Makefile.frag index 2f15ad92f89..8a456ca6c4f 100644 --- a/sapi/fuzzer/Makefile.frag +++ b/sapi/fuzzer/Makefile.frag @@ -28,3 +28,6 @@ $(SAPI_FUZZER_PATH)/php-fuzz-exif: $(PHP_GLOBAL_OBJS) $(PHP_SAPI_OBJS) $(PHP_FUZ $(SAPI_FUZZER_PATH)/php-fuzz-mbstring: $(PHP_GLOBAL_OBJS) $(PHP_SAPI_OBJS) $(PHP_FUZZER_MBSTRING_OBJS) $(FUZZER_BUILD) $(PHP_FUZZER_MBSTRING_OBJS) -o $@ + +$(SAPI_FUZZER_PATH)/php-fuzz-mbregex: $(PHP_GLOBAL_OBJS) $(PHP_SAPI_OBJS) $(PHP_FUZZER_MBREGEX_OBJS) + $(FUZZER_BUILD) $(PHP_FUZZER_MBREGEX_OBJS) -o $@ diff --git a/sapi/fuzzer/README.md b/sapi/fuzzer/README.md index 63de20aa1ca..ee2d0ee4e02 100644 --- a/sapi/fuzzer/README.md +++ b/sapi/fuzzer/README.md @@ -28,7 +28,8 @@ When running `make` it creates these binaries in `sapi/fuzzer/`: * `php-fuzz-unserializehash`: Fuzzing unserialize() for HashContext objects * `php-fuzz-json`: Fuzzing JSON parser (requires --enable-json) * `php-fuzz-exif`: Fuzzing `exif_read_data()` function (requires --enable-exif) -* `php-fuzz-mbstring`: Fuzzing `mb_ereg[i]()` (requires --enable-mbstring) +* `php-fuzz-mbstring`: Fuzzing `mb_convert_encoding()` (requires `--enable-mbstring`) +* `php-fuzz-mbregex`: Fuzzing `mb_ereg[i]()` (requires --enable-mbstring) * `php-fuzz-execute`: Fuzzing the executor * `php-fuzz-function-jit`: Fuzzing the function JIT (requires --enable-opcache) * `php-fuzz-tracing-jit`: Fuzzing the tracing JIT (requires --enable-opcache) @@ -72,7 +73,14 @@ sapi/cli/php sapi/fuzzer/generate_execute_corpus.php ./execute-corpus Zend/tests sapi/fuzzer/php-fuzzer-function-jit ./execute-corpus ``` -For the mbstring fuzzer, you may want to build the libonig dependency with instrumentation. At this time, libonig is not clean under ubsan, so only the fuzzer and address sanitizers may be used. +For the mbstring fuzzer, a dictionary of encodings should be generated first: + +```sh +sapi/cli/php sapi/fuzzer/generate_mbstring_dict.php +sapi/fuzzer/php-fuzz-mbstring -dict=$PWD/sapi/fuzzer/dict/mbstring ./my-mbstring-corpus +``` + +For the mbregex fuzzer, you may want to build the libonig dependency with instrumentation. At this time, libonig is not clean under ubsan, so only the fuzzer and address sanitizers may be used. ```sh git clone https://github.com/kkos/oniguruma.git diff --git a/sapi/fuzzer/config.m4 b/sapi/fuzzer/config.m4 index 72953a8295d..aac06d18277 100644 --- a/sapi/fuzzer/config.m4 +++ b/sapi/fuzzer/config.m4 @@ -65,6 +65,9 @@ if test "$PHP_FUZZER" != "no"; then fi if test -n "$enable_mbstring" && test "$enable_mbstring" != "no"; then PHP_FUZZER_TARGET([mbstring], PHP_FUZZER_MBSTRING_OBJS) + if test -n "$enable_mbregex" && test "$enable_mbregex" != "no"; then + PHP_FUZZER_TARGET([mbregex], PHP_FUZZER_MBREGEX_OBJS) + fi fi PHP_SUBST(PHP_FUZZER_BINARIES) diff --git a/sapi/fuzzer/fuzzer-mbregex.c b/sapi/fuzzer/fuzzer-mbregex.c new file mode 100644 index 00000000000..970a7b5baee --- /dev/null +++ b/sapi/fuzzer/fuzzer-mbregex.c @@ -0,0 +1,78 @@ +/* + +----------------------------------------------------------------------+ + | Copyright (c) The PHP Group | + +----------------------------------------------------------------------+ + | This source file is subject to version 3.01 of the PHP license, | + | that is bundled with this package in the file LICENSE, and is | + | available through the world-wide-web at the following url: | + | https://www.php.net/license/3_01.txt | + | If you did not receive a copy of the PHP license and are unable to | + | obtain it through the world-wide-web, please send a note to | + | license@php.net so we can mail you a copy immediately. | + +----------------------------------------------------------------------+ + | Authors: Stanislav Malyshev | + +----------------------------------------------------------------------+ + */ + + +#include "fuzzer.h" + +#include "Zend/zend.h" +#include "main/php_config.h" +#include "main/php_main.h" +#include "oniguruma.h" + +#include +#include +#include + +#include "fuzzer-sapi.h" + +int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { +#ifdef HAVE_MBREGEX + char *args[2]; + char *data = malloc(Size+1); + memcpy(data, Data, Size); + data[Size] = '\0'; + + if (fuzzer_request_startup() == FAILURE) { + return 0; + } + + fuzzer_setup_dummy_frame(); + + args[0] = data; + args[1] = "test123"; + fuzzer_call_php_func("mb_ereg", 2, args); + + args[0] = data; + args[1] = "test123"; + fuzzer_call_php_func("mb_eregi", 2, args); + + args[0] = data; + args[1] = data; + fuzzer_call_php_func("mb_ereg", 2, args); + + args[0] = data; + args[1] = data; + fuzzer_call_php_func("mb_eregi", 2, args); + + fuzzer_request_shutdown(); + + free(data); +#else + fprintf(stderr, "\n\nERROR:\nPHP built without mbstring, recompile with --enable-mbstring to use this fuzzer\n"); + exit(1); +#endif + return 0; +} + +int LLVMFuzzerInitialize(int *argc, char ***argv) { + fuzzer_init_php(NULL); + + /* The default parse depth limit allows stack overflows under asan. */ + onig_set_parse_depth_limit(512); + + /* fuzzer_shutdown_php(); */ + return 0; +} diff --git a/sapi/fuzzer/fuzzer-mbstring.c b/sapi/fuzzer/fuzzer-mbstring.c index 970a7b5baee..9294e71dd7a 100644 --- a/sapi/fuzzer/fuzzer-mbstring.c +++ b/sapi/fuzzer/fuzzer-mbstring.c @@ -15,64 +15,52 @@ */ +#include "zend.h" #include "fuzzer.h" - -#include "Zend/zend.h" -#include "main/php_config.h" -#include "main/php_main.h" -#include "oniguruma.h" - -#include -#include -#include - #include "fuzzer-sapi.h" +#include "ext/mbstring/mbstring.h" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { -#ifdef HAVE_MBREGEX - char *args[2]; - char *data = malloc(Size+1); - memcpy(data, Data, Size); - data[Size] = '\0'; - - if (fuzzer_request_startup() == FAILURE) { + const uint8_t *Comma1 = memchr(Data, ',', Size); + if (!Comma1) { return 0; } - fuzzer_setup_dummy_frame(); + size_t ToEncodingNameLen = Comma1 - Data; + char *ToEncodingName = estrndup((char *) Data, ToEncodingNameLen); + Data = Comma1 + 1; + Size -= ToEncodingNameLen + 1; - args[0] = data; - args[1] = "test123"; - fuzzer_call_php_func("mb_ereg", 2, args); + const uint8_t *Comma2 = memchr(Data, ',', Size); + if (!Comma2) { + efree(ToEncodingName); + return 0; + } - args[0] = data; - args[1] = "test123"; - fuzzer_call_php_func("mb_eregi", 2, args); + size_t FromEncodingNameLen = Comma2 - Data; + char *FromEncodingName = estrndup((char *) Data, FromEncodingNameLen); + Data = Comma2 + 1; + Size -= FromEncodingNameLen + 1; - args[0] = data; - args[1] = data; - fuzzer_call_php_func("mb_ereg", 2, args); + const mbfl_encoding *ToEncoding = mbfl_name2encoding(ToEncodingName); + const mbfl_encoding *FromEncoding = mbfl_name2encoding(FromEncodingName); - args[0] = data; - args[1] = data; - fuzzer_call_php_func("mb_eregi", 2, args); + if (!ToEncoding || !FromEncoding || fuzzer_request_startup() == FAILURE) { + efree(ToEncodingName); + efree(FromEncodingName); + return 0; + } + + char *Result = php_mb_convert_encoding_ex((char *) Data, Size, ToEncoding, FromEncoding, NULL); + efree(Result); + efree(ToEncodingName); + efree(FromEncodingName); fuzzer_request_shutdown(); - - free(data); -#else - fprintf(stderr, "\n\nERROR:\nPHP built without mbstring, recompile with --enable-mbstring to use this fuzzer\n"); - exit(1); -#endif return 0; } int LLVMFuzzerInitialize(int *argc, char ***argv) { fuzzer_init_php(NULL); - - /* The default parse depth limit allows stack overflows under asan. */ - onig_set_parse_depth_limit(512); - - /* fuzzer_shutdown_php(); */ return 0; } diff --git a/sapi/fuzzer/generate_all.php b/sapi/fuzzer/generate_all.php index 7a81bc94244..5794bdacc6b 100644 --- a/sapi/fuzzer/generate_all.php +++ b/sapi/fuzzer/generate_all.php @@ -1,5 +1,6 @@