merge revision(s) 3a04ea2d03: [Backport #20305]

[Bug #20305] Fix matching against an incomplete character

	When matching against an incomplete character, some `enclen` calls are
	expected not to exceed the limit, and some are expected to return the
	required length and then the results are checked if it exceeds.
This commit is contained in:
Takashi Kokubun 2024-05-29 15:11:56 -07:00
parent 6e46a363a8
commit 72a45ac7a3
2 changed files with 26 additions and 13 deletions

View file

@ -1943,6 +1943,19 @@ static int string_cmp_ic(OnigEncoding enc, int case_fold_flag,
# define ABSENT_END_POS end
#endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */
int onigenc_mbclen_approximate(const OnigUChar* p,const OnigUChar* e, const struct OnigEncodingTypeST* enc);
static inline int
enclen_approx(OnigEncoding enc, const OnigUChar* p, const OnigUChar* e)
{
if (enc->max_enc_len == enc->min_enc_len) {
return (p < e ? enc->min_enc_len : 0);
}
else {
return onigenc_mbclen_approximate(p, e, enc);
}
}
#ifdef USE_CAPTURE_HISTORY
static int
@ -2923,7 +2936,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
int mb_len;
DATA_ENSURE(1);
mb_len = enclen(encode, s, end);
mb_len = enclen_approx(encode, s, end);
DATA_ENSURE(mb_len);
ss = s;
s += mb_len;
@ -3028,7 +3041,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
CASE(OP_ANYCHAR) MOP_IN(OP_ANYCHAR);
DATA_ENSURE(1);
n = enclen(encode, s, end);
n = enclen_approx(encode, s, end);
DATA_ENSURE(n);
if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail;
s += n;
@ -3037,7 +3050,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
CASE(OP_ANYCHAR_ML) MOP_IN(OP_ANYCHAR_ML);
DATA_ENSURE(1);
n = enclen(encode, s, end);
n = enclen_approx(encode, s, end);
DATA_ENSURE(n);
s += n;
MOP_OUT;
@ -3047,7 +3060,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
while (DATA_ENSURE_CHECK1) {
CHECK_MATCH_CACHE;
STACK_PUSH_ALT(p, s, sprev, pkeep);
n = enclen(encode, s, end);
n = enclen_approx(encode, s, end);
DATA_ENSURE(n);
if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail;
sprev = s;
@ -3060,7 +3073,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
while (DATA_ENSURE_CHECK1) {
CHECK_MATCH_CACHE;
STACK_PUSH_ALT(p, s, sprev, pkeep);
n = enclen(encode, s, end);
n = enclen_approx(encode, s, end);
if (n > 1) {
DATA_ENSURE(n);
sprev = s;
@ -3086,7 +3099,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
msa->num_fails++;
#endif
}
n = enclen(encode, s, end);
n = enclen_approx(encode, s, end);
DATA_ENSURE(n);
if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail;
sprev = s;
@ -3108,7 +3121,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
msa->num_fails++;
#endif
}
n = enclen(encode, s, end);
n = enclen_approx(encode, s, end);
if (n > 1) {
DATA_ENSURE(n);
sprev = s;
@ -3131,7 +3144,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
if (scv) goto fail;
STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem, pkeep);
n = enclen(encode, s, end);
n = enclen_approx(encode, s, end);
DATA_ENSURE(n);
if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail;
sprev = s;
@ -3149,7 +3162,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
if (scv) goto fail;
STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem, pkeep);
n = enclen(encode, s, end);
n = enclen_approx(encode, s, end);
if (n > 1) {
DATA_ENSURE(n);
sprev = s;
@ -3491,7 +3504,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
DATA_ENSURE(n);
sprev = s;
STRING_CMP(pstart, s, n);
while (sprev + (len = enclen(encode, sprev, end)) < s)
while (sprev + (len = enclen_approx(encode, sprev, end)) < s)
sprev += len;
MOP_OUT;
@ -3522,7 +3535,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
DATA_ENSURE(n);
sprev = s;
STRING_CMP_IC(case_fold_flag, pstart, &s, (int)n, end);
while (sprev + (len = enclen(encode, sprev, end)) < s)
while (sprev + (len = enclen_approx(encode, sprev, end)) < s)
sprev += len;
MOP_OUT;
@ -3557,7 +3570,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
STRING_CMP_VALUE(pstart, swork, n, is_fail);
if (is_fail) continue;
s = swork;
while (sprev + (len = enclen(encode, sprev, end)) < s)
while (sprev + (len = enclen_approx(encode, sprev, end)) < s)
sprev += len;
p += (SIZE_MEMNUM * (tlen - i - 1));