From 7c2587b1f6bc86a74b80cb8e80a98a99eba8266e Mon Sep 17 00:00:00 2001 From: Alex Dowad Date: Sat, 23 Apr 2022 20:56:52 +0200 Subject: [PATCH] Implement fast text conversion interface for UUENCODE --- .../libmbfl/filters/mbfilter_uuencode.c | 163 +++++++++++++++++- ext/mbstring/tests/uuencode_encoding.phpt | 41 +++++ 2 files changed, 195 insertions(+), 9 deletions(-) create mode 100644 ext/mbstring/tests/uuencode_encoding.phpt diff --git a/ext/mbstring/libmbfl/filters/mbfilter_uuencode.c b/ext/mbstring/libmbfl/filters/mbfilter_uuencode.c index 9571131aad5..e2fef079c59 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_uuencode.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_uuencode.c @@ -30,6 +30,9 @@ #include "mbfilter.h" #include "mbfilter_uuencode.h" +static size_t mb_uuencode_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state); +static void mb_wchar_to_uuencode(uint32_t *in, size_t len, mb_convert_buf *buf, bool end); + const mbfl_encoding mbfl_encoding_uuencode = { mbfl_no_encoding_uuencode, "UUENCODE", @@ -39,8 +42,8 @@ const mbfl_encoding mbfl_encoding_uuencode = { MBFL_ENCTYPE_SBCS, NULL, NULL, - NULL, - NULL + mb_uuencode_to_wchar, + mb_wchar_to_uuencode }; const struct mbfl_convert_vtbl vtbl_uuencode_8bit = { @@ -55,15 +58,21 @@ const struct mbfl_convert_vtbl vtbl_uuencode_8bit = { #define CK(statement) do { if ((statement) < 0) return (-1); } while (0) -/* uuencode => any */ -#define UUDEC(c) (char)(((c)-' ')&077) -static const char * uuenc_begin_text = "begin "; -enum { uudec_state_ground=0, uudec_state_inbegin, +#define UUDEC(c) (char)(((c)-' ') & 077) +static const char *uuenc_begin_text = "begin "; +enum { + uudec_state_ground=0, + uudec_state_inbegin, uudec_state_until_newline, - uudec_state_size, uudec_state_a, uudec_state_b, uudec_state_c, uudec_state_d, - uudec_state_skip_newline}; + uudec_state_size, + uudec_state_a, + uudec_state_b, + uudec_state_c, + uudec_state_d, + uudec_state_skip_newline +}; -int mbfl_filt_conv_uudec(int c, mbfl_convert_filter * filter) +int mbfl_filt_conv_uudec(int c, mbfl_convert_filter *filter) { int n; @@ -135,6 +144,8 @@ int mbfl_filt_conv_uudec(int c, mbfl_convert_filter * filter) CK((*filter->output_function)( (B << 4) | (C >> 2), filter->data)); if (n-- > 0) CK((*filter->output_function)( (C << 6) | D, filter->data)); + if (n < 0) + n = 0; filter->cache = n << 24; if (n == 0) @@ -149,3 +160,137 @@ int mbfl_filt_conv_uudec(int c, mbfl_convert_filter * filter) } return 0; } + +/* Using mbstring to decode UUEncoded text is already deprecated + * However, to facilitate the move to the new, faster internal conversion interface, + * We will temporarily implement it for UUEncode */ + +static size_t mb_uuencode_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) +{ + unsigned char *p = *in, *e = p + *in_len; + uint32_t *out = buf, *limit = buf + bufsize; + + unsigned int _state = *state & 0xFF; + unsigned int size = *state >> 8; + + while (p < e && (limit - out) >= 3) { + unsigned char c = *p++; + + switch (_state) { + case uudec_state_ground: + if (c == 'b') { + if ((e - p) >= 5 && memcmp(p, uuenc_begin_text+1, 5) == 0) { + p += 5; + while (p < e && *p++ != '\n'); /* Consume everything up to newline */ + _state = uudec_state_size; + } + /* We didn't find "begin " */ + } + break; + + case uudec_state_size: + size = UUDEC(c); + _state = uudec_state_a; + break; + + case uudec_state_a: + if ((e - p) < 4) { + p = e; + break; + } + + unsigned int a = UUDEC(c); + unsigned int b = UUDEC(*p++); + unsigned int c = UUDEC(*p++); + unsigned int d = UUDEC(*p++); + + if (size > 0) { + *out++ = ((a << 2) | (b >> 4)) & 0xFF; + size--; + } + if (size > 0) { + *out++ = ((b << 4) | (c >> 2)) & 0xFF; + size--; + } + if (size > 0) { + *out++ = ((c << 6) | d) & 0xFF; + size--; + } + + _state = size ? uudec_state_a : uudec_state_skip_newline; + break; + + case uudec_state_skip_newline: + _state = uudec_state_size; + break; + } + } + + *state = (size << 8) | _state; + *in_len = e - p; + *in = p; + return out - buf; +} + +static unsigned char uuencode_six_bits(unsigned int bits) +{ + if (bits == 0) { + return '`'; + } else { + return bits + 32; + } +} + +static void mb_wchar_to_uuencode(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) +{ + unsigned char *out, *limit; + MB_CONVERT_BUF_LOAD(buf, out, limit); + /* Every 3 bytes of input gets encoded as 4 bytes of output + * Additionally, we have a 'length' byte and a newline for each line of output + * (Maximum 45 input bytes can be encoded on a single output line) + * Make space for two more bytes in case we start close to where a line must end */ + MB_CONVERT_BUF_ENSURE(buf, out, limit, ((len + 2) * 4 / 3) + (((len + 44) / 45) * 2) + (buf->state ? 0 : sizeof("begin 0644 filename\n")) + 2); + + unsigned int bytes_encoded = buf->state >> 1; + + if (!buf->state) { + for (char *s = "begin 0644 filename\n"; *s; s++) { + out = mb_convert_buf_add(out, *s); + } + out = mb_convert_buf_add(out, MIN(len, 45) + 32); + buf->state |= 1; + } + + while (len--) { + uint32_t w = *in++; + uint32_t w2 = 0, w3 = 0; + + if (len) { + w2 = *in++; + len--; + } + if (len) { + w3 = *in++; + len--; + } + + out = mb_convert_buf_add4(out, uuencode_six_bits((w >> 2) & 0x3F), uuencode_six_bits(((w & 0x3) << 4) + ((w2 >> 4) & 0xF)), uuencode_six_bits(((w2 & 0xF) << 2) + ((w3 >> 6) & 0x3)), uuencode_six_bits(w3 & 0x3F)); + + bytes_encoded += 3; + + if (bytes_encoded >= 45) { + out = mb_convert_buf_add(out, '\n'); + if (len) { + out = mb_convert_buf_add(out, MIN(len, 45) + 32); + } + bytes_encoded = 0; + } + } + + if (bytes_encoded) { + out = mb_convert_buf_add(out, '\n'); + } + + buf->state = (bytes_encoded << 1) | (buf->state & 1); + MB_CONVERT_BUF_STORE(buf, out, limit); +} diff --git a/ext/mbstring/tests/uuencode_encoding.phpt b/ext/mbstring/tests/uuencode_encoding.phpt new file mode 100644 index 00000000000..70dedc60e90 --- /dev/null +++ b/ext/mbstring/tests/uuencode_encoding.phpt @@ -0,0 +1,41 @@ +--TEST-- +Temporary test of mbstring's UUEncode 'encoding' +--EXTENSIONS-- +mbstring +--FILE-- + +--EXPECTF-- +Deprecated: mb_convert_encoding(): Handling Uuencode via mbstring is deprecated; use convert_uuencode/convert_uudecode instead in %s + +Deprecated: mb_convert_encoding(): Handling Uuencode via mbstring is deprecated; use convert_uuencode/convert_uudecode instead in %s + +Deprecated: mb_convert_encoding(): Handling Uuencode via mbstring is deprecated; use convert_uuencode/convert_uudecode instead in %s + +Deprecated: mb_convert_encoding(): Handling Uuencode via mbstring is deprecated; use convert_uuencode/convert_uudecode instead in %s + +Deprecated: mb_convert_encoding(): Handling Uuencode via mbstring is deprecated; use convert_uuencode/convert_uudecode instead in %s +Done!