Port Oniguruma patches to prevent integer overflow (#14027)

* Port a Oniguruma patch: Integer overflow in forward_search_range()

db64ef3189

Co-Authored-By: K.Kosako <kkos@users.noreply.github.com>

* Port a Oniguruma patch: Integer overflow in backward_search_range() and onig_search_gpos()

bfc36d3d81

Co-Authored-By: K.Kosako <kkos@users.noreply.github.com>

* Port a Oniguruma patch: Integer overflow in onig_search_gpos()

778a43dd56

It differs from the Oniguruma patch in that it dosen't use `onigenc_get_prev_char_head()`
because this function's signature has been changed by Oniguruma and the change is not ported
in Onigmo for now. This patch respects the current Onigmo implementation.

Co-Authored-By: K.Kosako <kkos@users.noreply.github.com>

* Add castings to prevent warnings

* Correct castings to use OnigDistance

---------

Co-authored-by: K.Kosako <kkos@users.noreply.github.com>
This commit is contained in:
Hiroya Fujinami 2025-07-30 20:56:13 +09:00 committed by GitHub
parent cbe5241c77
commit 66bcb69054
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -4959,14 +4959,14 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
} }
p = s; p = s;
if (reg->dmin > 0) { if (reg->dmin != 0) {
if ((OnigDistance)(end - p) <= reg->dmin) return 0; /* fail */
if (ONIGENC_IS_SINGLEBYTE(reg->enc)) { if (ONIGENC_IS_SINGLEBYTE(reg->enc)) {
p += reg->dmin; p += reg->dmin;
} }
else { else {
UChar *q = p + reg->dmin; UChar *q = p + reg->dmin;
if (q >= end) return 0; /* fail */
while (p < q) p += enclen(reg->enc, p, end); while (p < q) p += enclen(reg->enc, p, end);
} }
} }
@ -5003,7 +5003,7 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
} }
if (p && p < range) { if (p && p < range) {
if (p - reg->dmin < s) { if ((OnigDistance)(p - s) < reg->dmin) {
retry_gate: retry_gate:
pprev = p; pprev = p;
p += enclen(reg->enc, p, end); p += enclen(reg->enc, p, end);
@ -5047,10 +5047,11 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
*low_prev = onigenc_get_prev_char_head(reg->enc, *low_prev = onigenc_get_prev_char_head(reg->enc,
(pprev ? pprev : str), p, end); (pprev ? pprev : str), p, end);
} }
*high = p;
} }
else { else {
if (reg->dmax != ONIG_INFINITE_DISTANCE) { if (reg->dmax != ONIG_INFINITE_DISTANCE) {
if (p < str + reg->dmax) { if ((OnigDistance)(p - str) < reg->dmax) {
*low = (UChar* )str; *low = (UChar* )str;
if (low_prev) if (low_prev)
*low_prev = onigenc_get_prev_char_head(reg->enc, str, *low, end); *low_prev = onigenc_get_prev_char_head(reg->enc, str, *low, end);
@ -5071,9 +5072,12 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
} }
} }
} }
}
/* no needs to adjust *high, *high is used as range check only */ /* no needs to adjust *high, *high is used as range check only */
if ((OnigDistance)(p - str) < reg->dmin)
*high = (UChar* )str;
else
*high = p - reg->dmin; *high = p - reg->dmin;
}
#ifdef ONIG_DEBUG_SEARCH #ifdef ONIG_DEBUG_SEARCH
fprintf(stderr, fprintf(stderr,
@ -5100,7 +5104,6 @@ backward_search_range(regex_t* reg, const UChar* str, const UChar* end,
return 0; return 0;
} }
range += reg->dmin;
p = s; p = s;
retry: retry:
@ -5178,10 +5181,22 @@ backward_search_range(regex_t* reg, const UChar* str, const UChar* end,
} }
} }
/* no needs to adjust *high, *high is used as range check only */
if (reg->dmax != ONIG_INFINITE_DISTANCE) { if (reg->dmax != ONIG_INFINITE_DISTANCE) {
if ((OnigDistance)(p - str) < reg->dmax)
*low = (UChar* )str;
else
*low = p - reg->dmax; *low = p - reg->dmax;
if (reg->dmin != 0) {
if ((OnigDistance)(p - str) < reg->dmin)
*high = (UChar* )str;
else
*high = p - reg->dmin; *high = p - reg->dmin;
}
else {
*high = p;
}
*high = onigenc_get_right_adjust_char_head(reg->enc, adjrange, *high, end); *high = onigenc_get_right_adjust_char_head(reg->enc, adjrange, *high, end);
} }
@ -5340,16 +5355,19 @@ onig_search_gpos(regex_t* reg, const UChar* str, const UChar* end,
min_semi_end = max_semi_end = (UChar* )end; min_semi_end = max_semi_end = (UChar* )end;
end_buf: end_buf:
if ((OnigDistance )(max_semi_end - str) < reg->anchor_dmin) if ((OnigDistance)(max_semi_end - str) < reg->anchor_dmin)
goto mismatch_no_msa; goto mismatch_no_msa;
if (range > start) { if (range > start) {
if ((OnigDistance )(min_semi_end - start) > reg->anchor_dmax) { if ((OnigDistance)(min_semi_end - start) > reg->anchor_dmax) {
start = min_semi_end - reg->anchor_dmax; start = min_semi_end - reg->anchor_dmax;
if (start < end) if (start < end)
start = onigenc_get_right_adjust_char_head(reg->enc, str, start, end); start = onigenc_get_right_adjust_char_head(reg->enc, str, start, end);
} }
if ((OnigDistance )(max_semi_end - (range - 1)) < reg->anchor_dmin) { if ((OnigDistance)(max_semi_end - (range - 1)) < reg->anchor_dmin) {
if ((OnigDistance)(max_semi_end - str + 1) < reg->anchor_dmin)
goto mismatch_no_msa;
else
range = max_semi_end - reg->anchor_dmin + 1; range = max_semi_end - reg->anchor_dmin + 1;
} }
@ -5358,13 +5376,17 @@ onig_search_gpos(regex_t* reg, const UChar* str, const UChar* end,
Backward search is used. */ Backward search is used. */
} }
else { else {
if ((OnigDistance )(min_semi_end - range) > reg->anchor_dmax) { if ((OnigDistance)(min_semi_end - range) > reg->anchor_dmax) {
range = min_semi_end - reg->anchor_dmax; range = min_semi_end - reg->anchor_dmax;
} }
if ((OnigDistance )(max_semi_end - start) < reg->anchor_dmin) { if ((OnigDistance)(max_semi_end - start) < reg->anchor_dmin) {
if ((OnigDistance)(max_semi_end - str) < reg->anchor_dmin)
goto mismatch_no_msa;
else {
start = max_semi_end - reg->anchor_dmin; start = max_semi_end - reg->anchor_dmin;
start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, start, end); start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, start, end);
} }
}
if (range > start) goto mismatch_no_msa; if (range > start) goto mismatch_no_msa;
} }
} }
@ -5442,15 +5464,19 @@ onig_search_gpos(regex_t* reg, const UChar* str, const UChar* end,
if (reg->optimize != ONIG_OPTIMIZE_NONE) { if (reg->optimize != ONIG_OPTIMIZE_NONE) {
UChar *sch_range, *low, *high, *low_prev; UChar *sch_range, *low, *high, *low_prev;
sch_range = (UChar* )range;
if (reg->dmax != 0) { if (reg->dmax != 0) {
if (reg->dmax == ONIG_INFINITE_DISTANCE) if (reg->dmax == ONIG_INFINITE_DISTANCE)
sch_range = (UChar* )end; sch_range = (UChar* )end;
else { else {
sch_range += reg->dmax; if ((OnigDistance)(end - range) < reg->dmax)
if (sch_range > end) sch_range = (UChar* )end; sch_range = (UChar* )end;
else {
sch_range = (UChar* )range + reg->dmax;
} }
} }
}
else
sch_range = (UChar* )range;
if ((end - start) < reg->threshold_len) if ((end - start) < reg->threshold_len)
goto mismatch; goto mismatch;
@ -5507,18 +5533,27 @@ onig_search_gpos(regex_t* reg, const UChar* str, const UChar* end,
else { /* backward search */ else { /* backward search */
if (reg->optimize != ONIG_OPTIMIZE_NONE) { if (reg->optimize != ONIG_OPTIMIZE_NONE) {
UChar *low, *high, *adjrange, *sch_start; UChar *low, *high, *adjrange, *sch_start;
const UChar *min_range;
if (range < end) if (range < end)
adjrange = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, range, end); adjrange = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, range, end);
else else
adjrange = (UChar* )end; adjrange = (UChar* )end;
if ((OnigDistance)(end - range) > reg->dmin)
min_range = range + reg->dmin;
else
min_range = end;
if (reg->dmax != ONIG_INFINITE_DISTANCE && if (reg->dmax != ONIG_INFINITE_DISTANCE &&
(end - range) >= reg->threshold_len) { end - range >= reg->threshold_len) {
do { do {
if ((OnigDistance)(end - s) > reg->dmax)
sch_start = s + reg->dmax; sch_start = s + reg->dmax;
if (sch_start > end) sch_start = (UChar* )end; else
if (backward_search_range(reg, str, end, sch_start, range, adjrange, sch_start = (UChar* )end;
if (backward_search_range(reg, str, end, sch_start, min_range, adjrange,
&low, &high) <= 0) &low, &high) <= 0)
goto mismatch; goto mismatch;
@ -5534,21 +5569,24 @@ onig_search_gpos(regex_t* reg, const UChar* str, const UChar* end,
goto mismatch; goto mismatch;
} }
else { /* check only. */ else { /* check only. */
if ((end - range) < reg->threshold_len) goto mismatch; if (end - range < reg->threshold_len) goto mismatch;
sch_start = s;
if (reg->dmax != 0) { if (reg->dmax != 0) {
if (reg->dmax == ONIG_INFINITE_DISTANCE) if (reg->dmax == ONIG_INFINITE_DISTANCE)
sch_start = (UChar* )end; sch_start = (UChar* )end;
else { else {
sch_start += reg->dmax; if ((OnigDistance)(end - s) > reg->dmax) {
if (sch_start > end) sch_start = (UChar* )end; sch_start = s + reg->dmax;
else
sch_start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, sch_start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc,
start, sch_start, end); start, sch_start, end);
} else
sch_start = (UChar* )end;
} }
} }
if (backward_search_range(reg, str, end, sch_start, range, adjrange, else
sch_start = (UChar* )s;
if (backward_search_range(reg, str, end, sch_start, min_range, adjrange,
&low, &high) <= 0) goto mismatch; &low, &high) <= 0) goto mismatch;
} }
} }