diff --git a/ext/mbstring/libmbfl/filters/mbfilter_cp1252.c b/ext/mbstring/libmbfl/filters/mbfilter_cp1252.c index 1f6717d2ac6..af5a879e166 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_cp1252.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_cp1252.c @@ -74,32 +74,22 @@ const struct mbfl_convert_vtbl vtbl_wchar_cp1252 = { #define CK(statement) do { if ((statement) < 0) return (-1); } while (0) -/* - * wchar => cp1252 - */ int mbfl_filt_conv_wchar_cp1252(int c, mbfl_convert_filter *filter) { - int s=-1, n; + int s = -1; if (c >= 0x100) { - /* look it up from the cp1252 table */ - s = -1; - n = 31; - while (n >= 0) { - if (c == cp1252_ucs_table[n] && c != 0xfffe) { + /* Look it up from the CP1252 table */ + for (int n = 31; n >= 0; n--) { + if (c == cp1252_ucs_table[n]) { s = 0x80 + n; break; } - n--; } - if (s <= 0 && (c & ~MBFL_WCSPLANE_MASK) == MBFL_WCSPLANE_8859_1) - { - s = c & MBFL_WCSPLANE_MASK; - } - } - else if (c >= 0 && c < 0x100) { + } else if (c <= 0x7F || c >= 0xA0) { s = c; } + if (s >= 0) { CK((*filter->output_function)(s, filter->data)); } else { @@ -108,15 +98,15 @@ int mbfl_filt_conv_wchar_cp1252(int c, mbfl_convert_filter *filter) return c; } -/* - * cp1252 => wchar - */ int mbfl_filt_conv_cp1252_wchar(int c, mbfl_convert_filter *filter) { int s; - if (c >= 0x80 && c < 0xa0) { + if (c >= 0x80 && c < 0xA0) { s = cp1252_ucs_table[c - 0x80]; + if (!s) { + s = c | MBFL_WCSGROUP_THROUGH; + } } else { s = c; } @@ -126,17 +116,10 @@ int mbfl_filt_conv_cp1252_wchar(int c, mbfl_convert_filter *filter) return c; } -/* We only distinguish the MS extensions to ISO-8859-1. - * Actually, this is pretty much a NO-OP, since the identification - * system doesn't allow us to discriminate between a positive match, - * a possible match and a definite non-match. - * The problem here is that cp1252 looks like SJIS for certain chars. - * */ static int mbfl_filt_ident_cp1252(int c, mbfl_identify_filter *filter) { - if (c >= 0x80 && c < 0xa0) - filter->flag = 0; - else - filter->flag = 1; /* not it */ + if (c >= 0x80 && c < 0xA0 && !cp1252_ucs_table[c - 0x80]) { + filter->flag = 1; + } return c; } diff --git a/ext/mbstring/libmbfl/filters/unicode_table_cp1252.h b/ext/mbstring/libmbfl/filters/unicode_table_cp1252.h index 92d20893cd6..9eb813885b3 100644 --- a/ext/mbstring/libmbfl/filters/unicode_table_cp1252.h +++ b/ext/mbstring/libmbfl/filters/unicode_table_cp1252.h @@ -32,9 +32,9 @@ * as it only covers this range, while the rest cover 0xa0 onwards */ static const unsigned short cp1252_ucs_table[] = { - 0x20ac,0xfffd,0x201a,0x0192,0x201e,0x2026,0x2020,0x2021, - 0x02c6,0x2030,0x0160,0x2039,0x0152,0xfffd,0x017d,0xfffd, - 0xfffd,0x2018,0x2019,0x201c,0x201d,0x2022,0x2013,0x2014, - 0x02dc,0x2122,0x0161,0x203a,0x0153,0xfffd,0x017e,0x0178 + 0x20ac,0x0000,0x201a,0x0192,0x201e,0x2026,0x2020,0x2021, + 0x02c6,0x2030,0x0160,0x2039,0x0152,0x0000,0x017d,0x0000, + 0x0000,0x2018,0x2019,0x201c,0x201d,0x2022,0x2013,0x2014, + 0x02dc,0x2122,0x0161,0x203a,0x0153,0x0000,0x017e,0x0178 }; #endif /* UNICODE_TABLE_CP1252_H */