Add fuzzer for mb_convert_encoding

This uses the php-fuzz-mbstring name, moving the existing fuzzer
to php-fuzz-mbregex.
This commit is contained in:
Nikita Popov 2022-05-08 18:51:41 +02:00
parent be9adc4fbc
commit 1584352e19
6 changed files with 124 additions and 43 deletions

View file

@ -28,3 +28,6 @@ $(SAPI_FUZZER_PATH)/php-fuzz-exif: $(PHP_GLOBAL_OBJS) $(PHP_SAPI_OBJS) $(PHP_FUZ
$(SAPI_FUZZER_PATH)/php-fuzz-mbstring: $(PHP_GLOBAL_OBJS) $(PHP_SAPI_OBJS) $(PHP_FUZZER_MBSTRING_OBJS)
$(FUZZER_BUILD) $(PHP_FUZZER_MBSTRING_OBJS) -o $@
$(SAPI_FUZZER_PATH)/php-fuzz-mbregex: $(PHP_GLOBAL_OBJS) $(PHP_SAPI_OBJS) $(PHP_FUZZER_MBREGEX_OBJS)
$(FUZZER_BUILD) $(PHP_FUZZER_MBREGEX_OBJS) -o $@

View file

@ -28,7 +28,8 @@ When running `make` it creates these binaries in `sapi/fuzzer/`:
* `php-fuzz-unserializehash`: Fuzzing unserialize() for HashContext objects
* `php-fuzz-json`: Fuzzing JSON parser (requires --enable-json)
* `php-fuzz-exif`: Fuzzing `exif_read_data()` function (requires --enable-exif)
* `php-fuzz-mbstring`: Fuzzing `mb_ereg[i]()` (requires --enable-mbstring)
* `php-fuzz-mbstring`: Fuzzing `mb_convert_encoding()` (requires `--enable-mbstring`)
* `php-fuzz-mbregex`: Fuzzing `mb_ereg[i]()` (requires --enable-mbstring)
* `php-fuzz-execute`: Fuzzing the executor
* `php-fuzz-function-jit`: Fuzzing the function JIT (requires --enable-opcache)
* `php-fuzz-tracing-jit`: Fuzzing the tracing JIT (requires --enable-opcache)
@ -72,7 +73,14 @@ sapi/cli/php sapi/fuzzer/generate_execute_corpus.php ./execute-corpus Zend/tests
sapi/fuzzer/php-fuzzer-function-jit ./execute-corpus
```
For the mbstring fuzzer, you may want to build the libonig dependency with instrumentation. At this time, libonig is not clean under ubsan, so only the fuzzer and address sanitizers may be used.
For the mbstring fuzzer, a dictionary of encodings should be generated first:
```sh
sapi/cli/php sapi/fuzzer/generate_mbstring_dict.php
sapi/fuzzer/php-fuzz-mbstring -dict=$PWD/sapi/fuzzer/dict/mbstring ./my-mbstring-corpus
```
For the mbregex fuzzer, you may want to build the libonig dependency with instrumentation. At this time, libonig is not clean under ubsan, so only the fuzzer and address sanitizers may be used.
```sh
git clone https://github.com/kkos/oniguruma.git

View file

@ -65,6 +65,9 @@ if test "$PHP_FUZZER" != "no"; then
fi
if test -n "$enable_mbstring" && test "$enable_mbstring" != "no"; then
PHP_FUZZER_TARGET([mbstring], PHP_FUZZER_MBSTRING_OBJS)
if test -n "$enable_mbregex" && test "$enable_mbregex" != "no"; then
PHP_FUZZER_TARGET([mbregex], PHP_FUZZER_MBREGEX_OBJS)
fi
fi
PHP_SUBST(PHP_FUZZER_BINARIES)

View file

@ -0,0 +1,78 @@
/*
+----------------------------------------------------------------------+
| Copyright (c) The PHP Group |
+----------------------------------------------------------------------+
| This source file is subject to version 3.01 of the PHP license, |
| that is bundled with this package in the file LICENSE, and is |
| available through the world-wide-web at the following url: |
| https://www.php.net/license/3_01.txt |
| If you did not receive a copy of the PHP license and are unable to |
| obtain it through the world-wide-web, please send a note to |
| license@php.net so we can mail you a copy immediately. |
+----------------------------------------------------------------------+
| Authors: Stanislav Malyshev <stas@php.net> |
+----------------------------------------------------------------------+
*/
#include "fuzzer.h"
#include "Zend/zend.h"
#include "main/php_config.h"
#include "main/php_main.h"
#include "oniguruma.h"
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include "fuzzer-sapi.h"
int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
#ifdef HAVE_MBREGEX
char *args[2];
char *data = malloc(Size+1);
memcpy(data, Data, Size);
data[Size] = '\0';
if (fuzzer_request_startup() == FAILURE) {
return 0;
}
fuzzer_setup_dummy_frame();
args[0] = data;
args[1] = "test123";
fuzzer_call_php_func("mb_ereg", 2, args);
args[0] = data;
args[1] = "test123";
fuzzer_call_php_func("mb_eregi", 2, args);
args[0] = data;
args[1] = data;
fuzzer_call_php_func("mb_ereg", 2, args);
args[0] = data;
args[1] = data;
fuzzer_call_php_func("mb_eregi", 2, args);
fuzzer_request_shutdown();
free(data);
#else
fprintf(stderr, "\n\nERROR:\nPHP built without mbstring, recompile with --enable-mbstring to use this fuzzer\n");
exit(1);
#endif
return 0;
}
int LLVMFuzzerInitialize(int *argc, char ***argv) {
fuzzer_init_php(NULL);
/* The default parse depth limit allows stack overflows under asan. */
onig_set_parse_depth_limit(512);
/* fuzzer_shutdown_php(); */
return 0;
}

View file

@ -15,64 +15,52 @@
*/
#include "zend.h"
#include "fuzzer.h"
#include "Zend/zend.h"
#include "main/php_config.h"
#include "main/php_main.h"
#include "oniguruma.h"
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include "fuzzer-sapi.h"
#include "ext/mbstring/mbstring.h"
int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
#ifdef HAVE_MBREGEX
char *args[2];
char *data = malloc(Size+1);
memcpy(data, Data, Size);
data[Size] = '\0';
if (fuzzer_request_startup() == FAILURE) {
const uint8_t *Comma1 = memchr(Data, ',', Size);
if (!Comma1) {
return 0;
}
fuzzer_setup_dummy_frame();
size_t ToEncodingNameLen = Comma1 - Data;
char *ToEncodingName = estrndup((char *) Data, ToEncodingNameLen);
Data = Comma1 + 1;
Size -= ToEncodingNameLen + 1;
args[0] = data;
args[1] = "test123";
fuzzer_call_php_func("mb_ereg", 2, args);
const uint8_t *Comma2 = memchr(Data, ',', Size);
if (!Comma2) {
efree(ToEncodingName);
return 0;
}
args[0] = data;
args[1] = "test123";
fuzzer_call_php_func("mb_eregi", 2, args);
size_t FromEncodingNameLen = Comma2 - Data;
char *FromEncodingName = estrndup((char *) Data, FromEncodingNameLen);
Data = Comma2 + 1;
Size -= FromEncodingNameLen + 1;
args[0] = data;
args[1] = data;
fuzzer_call_php_func("mb_ereg", 2, args);
const mbfl_encoding *ToEncoding = mbfl_name2encoding(ToEncodingName);
const mbfl_encoding *FromEncoding = mbfl_name2encoding(FromEncodingName);
args[0] = data;
args[1] = data;
fuzzer_call_php_func("mb_eregi", 2, args);
if (!ToEncoding || !FromEncoding || fuzzer_request_startup() == FAILURE) {
efree(ToEncodingName);
efree(FromEncodingName);
return 0;
}
char *Result = php_mb_convert_encoding_ex((char *) Data, Size, ToEncoding, FromEncoding, NULL);
efree(Result);
efree(ToEncodingName);
efree(FromEncodingName);
fuzzer_request_shutdown();
free(data);
#else
fprintf(stderr, "\n\nERROR:\nPHP built without mbstring, recompile with --enable-mbstring to use this fuzzer\n");
exit(1);
#endif
return 0;
}
int LLVMFuzzerInitialize(int *argc, char ***argv) {
fuzzer_init_php(NULL);
/* The default parse depth limit allows stack overflows under asan. */
onig_set_parse_depth_limit(512);
/* fuzzer_shutdown_php(); */
return 0;
}

View file

@ -1,5 +1,6 @@
<?php
require __DIR__ . '/generate_unserialize_dict.php';
require __DIR__ . '/generate_mbstring_dict.php';
require __DIR__ . '/generate_unserializehash_corpus.php';
require __DIR__ . '/generate_parser_corpus.php';