mirror of
https://github.com/php/php-src.git
synced 2025-08-16 14:08:47 +02:00
Implement fast text conversion interface for UUENCODE
This commit is contained in:
parent
06a15e6395
commit
7c2587b1f6
2 changed files with 195 additions and 9 deletions
|
@ -30,6 +30,9 @@
|
|||
#include "mbfilter.h"
|
||||
#include "mbfilter_uuencode.h"
|
||||
|
||||
static size_t mb_uuencode_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
|
||||
static void mb_wchar_to_uuencode(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
|
||||
|
||||
const mbfl_encoding mbfl_encoding_uuencode = {
|
||||
mbfl_no_encoding_uuencode,
|
||||
"UUENCODE",
|
||||
|
@ -39,8 +42,8 @@ const mbfl_encoding mbfl_encoding_uuencode = {
|
|||
MBFL_ENCTYPE_SBCS,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL
|
||||
mb_uuencode_to_wchar,
|
||||
mb_wchar_to_uuencode
|
||||
};
|
||||
|
||||
const struct mbfl_convert_vtbl vtbl_uuencode_8bit = {
|
||||
|
@ -55,15 +58,21 @@ const struct mbfl_convert_vtbl vtbl_uuencode_8bit = {
|
|||
|
||||
#define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
|
||||
|
||||
/* uuencode => any */
|
||||
#define UUDEC(c) (char)(((c)-' ')&077)
|
||||
static const char * uuenc_begin_text = "begin ";
|
||||
enum { uudec_state_ground=0, uudec_state_inbegin,
|
||||
#define UUDEC(c) (char)(((c)-' ') & 077)
|
||||
static const char *uuenc_begin_text = "begin ";
|
||||
enum {
|
||||
uudec_state_ground=0,
|
||||
uudec_state_inbegin,
|
||||
uudec_state_until_newline,
|
||||
uudec_state_size, uudec_state_a, uudec_state_b, uudec_state_c, uudec_state_d,
|
||||
uudec_state_skip_newline};
|
||||
uudec_state_size,
|
||||
uudec_state_a,
|
||||
uudec_state_b,
|
||||
uudec_state_c,
|
||||
uudec_state_d,
|
||||
uudec_state_skip_newline
|
||||
};
|
||||
|
||||
int mbfl_filt_conv_uudec(int c, mbfl_convert_filter * filter)
|
||||
int mbfl_filt_conv_uudec(int c, mbfl_convert_filter *filter)
|
||||
{
|
||||
int n;
|
||||
|
||||
|
@ -135,6 +144,8 @@ int mbfl_filt_conv_uudec(int c, mbfl_convert_filter * filter)
|
|||
CK((*filter->output_function)( (B << 4) | (C >> 2), filter->data));
|
||||
if (n-- > 0)
|
||||
CK((*filter->output_function)( (C << 6) | D, filter->data));
|
||||
if (n < 0)
|
||||
n = 0;
|
||||
filter->cache = n << 24;
|
||||
|
||||
if (n == 0)
|
||||
|
@ -149,3 +160,137 @@ int mbfl_filt_conv_uudec(int c, mbfl_convert_filter * filter)
|
|||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Using mbstring to decode UUEncoded text is already deprecated
|
||||
* However, to facilitate the move to the new, faster internal conversion interface,
|
||||
* We will temporarily implement it for UUEncode */
|
||||
|
||||
static size_t mb_uuencode_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
|
||||
{
|
||||
unsigned char *p = *in, *e = p + *in_len;
|
||||
uint32_t *out = buf, *limit = buf + bufsize;
|
||||
|
||||
unsigned int _state = *state & 0xFF;
|
||||
unsigned int size = *state >> 8;
|
||||
|
||||
while (p < e && (limit - out) >= 3) {
|
||||
unsigned char c = *p++;
|
||||
|
||||
switch (_state) {
|
||||
case uudec_state_ground:
|
||||
if (c == 'b') {
|
||||
if ((e - p) >= 5 && memcmp(p, uuenc_begin_text+1, 5) == 0) {
|
||||
p += 5;
|
||||
while (p < e && *p++ != '\n'); /* Consume everything up to newline */
|
||||
_state = uudec_state_size;
|
||||
}
|
||||
/* We didn't find "begin " */
|
||||
}
|
||||
break;
|
||||
|
||||
case uudec_state_size:
|
||||
size = UUDEC(c);
|
||||
_state = uudec_state_a;
|
||||
break;
|
||||
|
||||
case uudec_state_a:
|
||||
if ((e - p) < 4) {
|
||||
p = e;
|
||||
break;
|
||||
}
|
||||
|
||||
unsigned int a = UUDEC(c);
|
||||
unsigned int b = UUDEC(*p++);
|
||||
unsigned int c = UUDEC(*p++);
|
||||
unsigned int d = UUDEC(*p++);
|
||||
|
||||
if (size > 0) {
|
||||
*out++ = ((a << 2) | (b >> 4)) & 0xFF;
|
||||
size--;
|
||||
}
|
||||
if (size > 0) {
|
||||
*out++ = ((b << 4) | (c >> 2)) & 0xFF;
|
||||
size--;
|
||||
}
|
||||
if (size > 0) {
|
||||
*out++ = ((c << 6) | d) & 0xFF;
|
||||
size--;
|
||||
}
|
||||
|
||||
_state = size ? uudec_state_a : uudec_state_skip_newline;
|
||||
break;
|
||||
|
||||
case uudec_state_skip_newline:
|
||||
_state = uudec_state_size;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
*state = (size << 8) | _state;
|
||||
*in_len = e - p;
|
||||
*in = p;
|
||||
return out - buf;
|
||||
}
|
||||
|
||||
static unsigned char uuencode_six_bits(unsigned int bits)
|
||||
{
|
||||
if (bits == 0) {
|
||||
return '`';
|
||||
} else {
|
||||
return bits + 32;
|
||||
}
|
||||
}
|
||||
|
||||
static void mb_wchar_to_uuencode(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
|
||||
{
|
||||
unsigned char *out, *limit;
|
||||
MB_CONVERT_BUF_LOAD(buf, out, limit);
|
||||
/* Every 3 bytes of input gets encoded as 4 bytes of output
|
||||
* Additionally, we have a 'length' byte and a newline for each line of output
|
||||
* (Maximum 45 input bytes can be encoded on a single output line)
|
||||
* Make space for two more bytes in case we start close to where a line must end */
|
||||
MB_CONVERT_BUF_ENSURE(buf, out, limit, ((len + 2) * 4 / 3) + (((len + 44) / 45) * 2) + (buf->state ? 0 : sizeof("begin 0644 filename\n")) + 2);
|
||||
|
||||
unsigned int bytes_encoded = buf->state >> 1;
|
||||
|
||||
if (!buf->state) {
|
||||
for (char *s = "begin 0644 filename\n"; *s; s++) {
|
||||
out = mb_convert_buf_add(out, *s);
|
||||
}
|
||||
out = mb_convert_buf_add(out, MIN(len, 45) + 32);
|
||||
buf->state |= 1;
|
||||
}
|
||||
|
||||
while (len--) {
|
||||
uint32_t w = *in++;
|
||||
uint32_t w2 = 0, w3 = 0;
|
||||
|
||||
if (len) {
|
||||
w2 = *in++;
|
||||
len--;
|
||||
}
|
||||
if (len) {
|
||||
w3 = *in++;
|
||||
len--;
|
||||
}
|
||||
|
||||
out = mb_convert_buf_add4(out, uuencode_six_bits((w >> 2) & 0x3F), uuencode_six_bits(((w & 0x3) << 4) + ((w2 >> 4) & 0xF)), uuencode_six_bits(((w2 & 0xF) << 2) + ((w3 >> 6) & 0x3)), uuencode_six_bits(w3 & 0x3F));
|
||||
|
||||
bytes_encoded += 3;
|
||||
|
||||
if (bytes_encoded >= 45) {
|
||||
out = mb_convert_buf_add(out, '\n');
|
||||
if (len) {
|
||||
out = mb_convert_buf_add(out, MIN(len, 45) + 32);
|
||||
}
|
||||
bytes_encoded = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (bytes_encoded) {
|
||||
out = mb_convert_buf_add(out, '\n');
|
||||
}
|
||||
|
||||
buf->state = (bytes_encoded << 1) | (buf->state & 1);
|
||||
MB_CONVERT_BUF_STORE(buf, out, limit);
|
||||
}
|
||||
|
|
41
ext/mbstring/tests/uuencode_encoding.phpt
Normal file
41
ext/mbstring/tests/uuencode_encoding.phpt
Normal file
|
@ -0,0 +1,41 @@
|
|||
--TEST--
|
||||
Temporary test of mbstring's UUEncode 'encoding'
|
||||
--EXTENSIONS--
|
||||
mbstring
|
||||
--FILE--
|
||||
<?php
|
||||
|
||||
/* Using mbstring to convert strings from UUEncode has already been deprecated
|
||||
* So this test should be removed when the UUEncode 'encoding' is */
|
||||
|
||||
function testConversion($uuencode, $raw) {
|
||||
$converted = mb_convert_encoding($uuencode, '8bit', 'UUENCODE');
|
||||
if ($converted !== $raw)
|
||||
die('Expected "' . $uuencode . '" to convert to ' . bin2hex($raw) . '; actually got ' . bin2hex($converted));
|
||||
$converted = mb_convert_encoding($raw, 'UUENCODE', '8bit');
|
||||
if ($converted !== $uuencode)
|
||||
die('Expected ' . bin2hex($raw) . ' to convert to "' . $uuencode . '"; actually got "' . $converted . '"');
|
||||
}
|
||||
|
||||
testConversion('', '');
|
||||
|
||||
/* mbstring's uuencode requires the user to strip off the terminating "`\nend\n" */
|
||||
|
||||
testConversion("begin 0644 filename\n#0V%T\n", 'Cat');
|
||||
testConversion("begin 0644 filename\n::'1T<#HO+W=W=RYW:6MI<&5D:6\$N;W)G#0H`\n", "http://www.wikipedia.org\r\n");
|
||||
testConversion("begin 0644 filename\n#`0(#\n", "\x01\x02\x03");
|
||||
testConversion("begin 0644 filename\n$`0(#\"@``\n", "\x01\x02\x03\n");
|
||||
|
||||
echo "Done!\n";
|
||||
?>
|
||||
--EXPECTF--
|
||||
Deprecated: mb_convert_encoding(): Handling Uuencode via mbstring is deprecated; use convert_uuencode/convert_uudecode instead in %s
|
||||
|
||||
Deprecated: mb_convert_encoding(): Handling Uuencode via mbstring is deprecated; use convert_uuencode/convert_uudecode instead in %s
|
||||
|
||||
Deprecated: mb_convert_encoding(): Handling Uuencode via mbstring is deprecated; use convert_uuencode/convert_uudecode instead in %s
|
||||
|
||||
Deprecated: mb_convert_encoding(): Handling Uuencode via mbstring is deprecated; use convert_uuencode/convert_uudecode instead in %s
|
||||
|
||||
Deprecated: mb_convert_encoding(): Handling Uuencode via mbstring is deprecated; use convert_uuencode/convert_uudecode instead in %s
|
||||
Done!
|
Loading…
Add table
Add a link
Reference in a new issue