Implement fast text conversion interface for UUENCODE

This commit is contained in:
Alex Dowad 2022-04-23 20:56:52 +02:00
parent 06a15e6395
commit 7c2587b1f6
2 changed files with 195 additions and 9 deletions

View file

@ -30,6 +30,9 @@
#include "mbfilter.h"
#include "mbfilter_uuencode.h"
static size_t mb_uuencode_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
static void mb_wchar_to_uuencode(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
const mbfl_encoding mbfl_encoding_uuencode = {
mbfl_no_encoding_uuencode,
"UUENCODE",
@ -39,8 +42,8 @@ const mbfl_encoding mbfl_encoding_uuencode = {
MBFL_ENCTYPE_SBCS,
NULL,
NULL,
NULL,
NULL
mb_uuencode_to_wchar,
mb_wchar_to_uuencode
};
const struct mbfl_convert_vtbl vtbl_uuencode_8bit = {
@ -55,15 +58,21 @@ const struct mbfl_convert_vtbl vtbl_uuencode_8bit = {
#define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
/* uuencode => any */
#define UUDEC(c) (char)(((c)-' ')&077)
static const char * uuenc_begin_text = "begin ";
enum { uudec_state_ground=0, uudec_state_inbegin,
#define UUDEC(c) (char)(((c)-' ') & 077)
static const char *uuenc_begin_text = "begin ";
enum {
uudec_state_ground=0,
uudec_state_inbegin,
uudec_state_until_newline,
uudec_state_size, uudec_state_a, uudec_state_b, uudec_state_c, uudec_state_d,
uudec_state_skip_newline};
uudec_state_size,
uudec_state_a,
uudec_state_b,
uudec_state_c,
uudec_state_d,
uudec_state_skip_newline
};
int mbfl_filt_conv_uudec(int c, mbfl_convert_filter * filter)
int mbfl_filt_conv_uudec(int c, mbfl_convert_filter *filter)
{
int n;
@ -135,6 +144,8 @@ int mbfl_filt_conv_uudec(int c, mbfl_convert_filter * filter)
CK((*filter->output_function)( (B << 4) | (C >> 2), filter->data));
if (n-- > 0)
CK((*filter->output_function)( (C << 6) | D, filter->data));
if (n < 0)
n = 0;
filter->cache = n << 24;
if (n == 0)
@ -149,3 +160,137 @@ int mbfl_filt_conv_uudec(int c, mbfl_convert_filter * filter)
}
return 0;
}
/* Using mbstring to decode UUEncoded text is already deprecated
* However, to facilitate the move to the new, faster internal conversion interface,
* We will temporarily implement it for UUEncode */
static size_t mb_uuencode_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
{
unsigned char *p = *in, *e = p + *in_len;
uint32_t *out = buf, *limit = buf + bufsize;
unsigned int _state = *state & 0xFF;
unsigned int size = *state >> 8;
while (p < e && (limit - out) >= 3) {
unsigned char c = *p++;
switch (_state) {
case uudec_state_ground:
if (c == 'b') {
if ((e - p) >= 5 && memcmp(p, uuenc_begin_text+1, 5) == 0) {
p += 5;
while (p < e && *p++ != '\n'); /* Consume everything up to newline */
_state = uudec_state_size;
}
/* We didn't find "begin " */
}
break;
case uudec_state_size:
size = UUDEC(c);
_state = uudec_state_a;
break;
case uudec_state_a:
if ((e - p) < 4) {
p = e;
break;
}
unsigned int a = UUDEC(c);
unsigned int b = UUDEC(*p++);
unsigned int c = UUDEC(*p++);
unsigned int d = UUDEC(*p++);
if (size > 0) {
*out++ = ((a << 2) | (b >> 4)) & 0xFF;
size--;
}
if (size > 0) {
*out++ = ((b << 4) | (c >> 2)) & 0xFF;
size--;
}
if (size > 0) {
*out++ = ((c << 6) | d) & 0xFF;
size--;
}
_state = size ? uudec_state_a : uudec_state_skip_newline;
break;
case uudec_state_skip_newline:
_state = uudec_state_size;
break;
}
}
*state = (size << 8) | _state;
*in_len = e - p;
*in = p;
return out - buf;
}
static unsigned char uuencode_six_bits(unsigned int bits)
{
if (bits == 0) {
return '`';
} else {
return bits + 32;
}
}
static void mb_wchar_to_uuencode(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
{
unsigned char *out, *limit;
MB_CONVERT_BUF_LOAD(buf, out, limit);
/* Every 3 bytes of input gets encoded as 4 bytes of output
* Additionally, we have a 'length' byte and a newline for each line of output
* (Maximum 45 input bytes can be encoded on a single output line)
* Make space for two more bytes in case we start close to where a line must end */
MB_CONVERT_BUF_ENSURE(buf, out, limit, ((len + 2) * 4 / 3) + (((len + 44) / 45) * 2) + (buf->state ? 0 : sizeof("begin 0644 filename\n")) + 2);
unsigned int bytes_encoded = buf->state >> 1;
if (!buf->state) {
for (char *s = "begin 0644 filename\n"; *s; s++) {
out = mb_convert_buf_add(out, *s);
}
out = mb_convert_buf_add(out, MIN(len, 45) + 32);
buf->state |= 1;
}
while (len--) {
uint32_t w = *in++;
uint32_t w2 = 0, w3 = 0;
if (len) {
w2 = *in++;
len--;
}
if (len) {
w3 = *in++;
len--;
}
out = mb_convert_buf_add4(out, uuencode_six_bits((w >> 2) & 0x3F), uuencode_six_bits(((w & 0x3) << 4) + ((w2 >> 4) & 0xF)), uuencode_six_bits(((w2 & 0xF) << 2) + ((w3 >> 6) & 0x3)), uuencode_six_bits(w3 & 0x3F));
bytes_encoded += 3;
if (bytes_encoded >= 45) {
out = mb_convert_buf_add(out, '\n');
if (len) {
out = mb_convert_buf_add(out, MIN(len, 45) + 32);
}
bytes_encoded = 0;
}
}
if (bytes_encoded) {
out = mb_convert_buf_add(out, '\n');
}
buf->state = (bytes_encoded << 1) | (buf->state & 1);
MB_CONVERT_BUF_STORE(buf, out, limit);
}

View file

@ -0,0 +1,41 @@
--TEST--
Temporary test of mbstring's UUEncode 'encoding'
--EXTENSIONS--
mbstring
--FILE--
<?php
/* Using mbstring to convert strings from UUEncode has already been deprecated
* So this test should be removed when the UUEncode 'encoding' is */
function testConversion($uuencode, $raw) {
$converted = mb_convert_encoding($uuencode, '8bit', 'UUENCODE');
if ($converted !== $raw)
die('Expected "' . $uuencode . '" to convert to ' . bin2hex($raw) . '; actually got ' . bin2hex($converted));
$converted = mb_convert_encoding($raw, 'UUENCODE', '8bit');
if ($converted !== $uuencode)
die('Expected ' . bin2hex($raw) . ' to convert to "' . $uuencode . '"; actually got "' . $converted . '"');
}
testConversion('', '');
/* mbstring's uuencode requires the user to strip off the terminating "`\nend\n" */
testConversion("begin 0644 filename\n#0V%T\n", 'Cat');
testConversion("begin 0644 filename\n::'1T<#HO+W=W=RYW:6MI<&5D:6\$N;W)G#0H`\n", "http://www.wikipedia.org\r\n");
testConversion("begin 0644 filename\n#`0(#\n", "\x01\x02\x03");
testConversion("begin 0644 filename\n$`0(#\"@``\n", "\x01\x02\x03\n");
echo "Done!\n";
?>
--EXPECTF--
Deprecated: mb_convert_encoding(): Handling Uuencode via mbstring is deprecated; use convert_uuencode/convert_uudecode instead in %s
Deprecated: mb_convert_encoding(): Handling Uuencode via mbstring is deprecated; use convert_uuencode/convert_uudecode instead in %s
Deprecated: mb_convert_encoding(): Handling Uuencode via mbstring is deprecated; use convert_uuencode/convert_uudecode instead in %s
Deprecated: mb_convert_encoding(): Handling Uuencode via mbstring is deprecated; use convert_uuencode/convert_uudecode instead in %s
Deprecated: mb_convert_encoding(): Handling Uuencode via mbstring is deprecated; use convert_uuencode/convert_uudecode instead in %s
Done!