mirror of
https://github.com/php/php-src.git
synced 2025-08-17 14:38:49 +02:00
Implement fast text conversion interface for UUENCODE
This commit is contained in:
parent
06a15e6395
commit
7c2587b1f6
2 changed files with 195 additions and 9 deletions
|
@ -30,6 +30,9 @@
|
||||||
#include "mbfilter.h"
|
#include "mbfilter.h"
|
||||||
#include "mbfilter_uuencode.h"
|
#include "mbfilter_uuencode.h"
|
||||||
|
|
||||||
|
static size_t mb_uuencode_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
|
||||||
|
static void mb_wchar_to_uuencode(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
|
||||||
|
|
||||||
const mbfl_encoding mbfl_encoding_uuencode = {
|
const mbfl_encoding mbfl_encoding_uuencode = {
|
||||||
mbfl_no_encoding_uuencode,
|
mbfl_no_encoding_uuencode,
|
||||||
"UUENCODE",
|
"UUENCODE",
|
||||||
|
@ -39,8 +42,8 @@ const mbfl_encoding mbfl_encoding_uuencode = {
|
||||||
MBFL_ENCTYPE_SBCS,
|
MBFL_ENCTYPE_SBCS,
|
||||||
NULL,
|
NULL,
|
||||||
NULL,
|
NULL,
|
||||||
NULL,
|
mb_uuencode_to_wchar,
|
||||||
NULL
|
mb_wchar_to_uuencode
|
||||||
};
|
};
|
||||||
|
|
||||||
const struct mbfl_convert_vtbl vtbl_uuencode_8bit = {
|
const struct mbfl_convert_vtbl vtbl_uuencode_8bit = {
|
||||||
|
@ -55,13 +58,19 @@ const struct mbfl_convert_vtbl vtbl_uuencode_8bit = {
|
||||||
|
|
||||||
#define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
|
#define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
|
||||||
|
|
||||||
/* uuencode => any */
|
|
||||||
#define UUDEC(c) (char)(((c)-' ') & 077)
|
#define UUDEC(c) (char)(((c)-' ') & 077)
|
||||||
static const char *uuenc_begin_text = "begin ";
|
static const char *uuenc_begin_text = "begin ";
|
||||||
enum { uudec_state_ground=0, uudec_state_inbegin,
|
enum {
|
||||||
|
uudec_state_ground=0,
|
||||||
|
uudec_state_inbegin,
|
||||||
uudec_state_until_newline,
|
uudec_state_until_newline,
|
||||||
uudec_state_size, uudec_state_a, uudec_state_b, uudec_state_c, uudec_state_d,
|
uudec_state_size,
|
||||||
uudec_state_skip_newline};
|
uudec_state_a,
|
||||||
|
uudec_state_b,
|
||||||
|
uudec_state_c,
|
||||||
|
uudec_state_d,
|
||||||
|
uudec_state_skip_newline
|
||||||
|
};
|
||||||
|
|
||||||
int mbfl_filt_conv_uudec(int c, mbfl_convert_filter *filter)
|
int mbfl_filt_conv_uudec(int c, mbfl_convert_filter *filter)
|
||||||
{
|
{
|
||||||
|
@ -135,6 +144,8 @@ int mbfl_filt_conv_uudec(int c, mbfl_convert_filter * filter)
|
||||||
CK((*filter->output_function)( (B << 4) | (C >> 2), filter->data));
|
CK((*filter->output_function)( (B << 4) | (C >> 2), filter->data));
|
||||||
if (n-- > 0)
|
if (n-- > 0)
|
||||||
CK((*filter->output_function)( (C << 6) | D, filter->data));
|
CK((*filter->output_function)( (C << 6) | D, filter->data));
|
||||||
|
if (n < 0)
|
||||||
|
n = 0;
|
||||||
filter->cache = n << 24;
|
filter->cache = n << 24;
|
||||||
|
|
||||||
if (n == 0)
|
if (n == 0)
|
||||||
|
@ -149,3 +160,137 @@ int mbfl_filt_conv_uudec(int c, mbfl_convert_filter * filter)
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Using mbstring to decode UUEncoded text is already deprecated
|
||||||
|
* However, to facilitate the move to the new, faster internal conversion interface,
|
||||||
|
* We will temporarily implement it for UUEncode */
|
||||||
|
|
||||||
|
static size_t mb_uuencode_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
|
||||||
|
{
|
||||||
|
unsigned char *p = *in, *e = p + *in_len;
|
||||||
|
uint32_t *out = buf, *limit = buf + bufsize;
|
||||||
|
|
||||||
|
unsigned int _state = *state & 0xFF;
|
||||||
|
unsigned int size = *state >> 8;
|
||||||
|
|
||||||
|
while (p < e && (limit - out) >= 3) {
|
||||||
|
unsigned char c = *p++;
|
||||||
|
|
||||||
|
switch (_state) {
|
||||||
|
case uudec_state_ground:
|
||||||
|
if (c == 'b') {
|
||||||
|
if ((e - p) >= 5 && memcmp(p, uuenc_begin_text+1, 5) == 0) {
|
||||||
|
p += 5;
|
||||||
|
while (p < e && *p++ != '\n'); /* Consume everything up to newline */
|
||||||
|
_state = uudec_state_size;
|
||||||
|
}
|
||||||
|
/* We didn't find "begin " */
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case uudec_state_size:
|
||||||
|
size = UUDEC(c);
|
||||||
|
_state = uudec_state_a;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case uudec_state_a:
|
||||||
|
if ((e - p) < 4) {
|
||||||
|
p = e;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned int a = UUDEC(c);
|
||||||
|
unsigned int b = UUDEC(*p++);
|
||||||
|
unsigned int c = UUDEC(*p++);
|
||||||
|
unsigned int d = UUDEC(*p++);
|
||||||
|
|
||||||
|
if (size > 0) {
|
||||||
|
*out++ = ((a << 2) | (b >> 4)) & 0xFF;
|
||||||
|
size--;
|
||||||
|
}
|
||||||
|
if (size > 0) {
|
||||||
|
*out++ = ((b << 4) | (c >> 2)) & 0xFF;
|
||||||
|
size--;
|
||||||
|
}
|
||||||
|
if (size > 0) {
|
||||||
|
*out++ = ((c << 6) | d) & 0xFF;
|
||||||
|
size--;
|
||||||
|
}
|
||||||
|
|
||||||
|
_state = size ? uudec_state_a : uudec_state_skip_newline;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case uudec_state_skip_newline:
|
||||||
|
_state = uudec_state_size;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
*state = (size << 8) | _state;
|
||||||
|
*in_len = e - p;
|
||||||
|
*in = p;
|
||||||
|
return out - buf;
|
||||||
|
}
|
||||||
|
|
||||||
|
static unsigned char uuencode_six_bits(unsigned int bits)
|
||||||
|
{
|
||||||
|
if (bits == 0) {
|
||||||
|
return '`';
|
||||||
|
} else {
|
||||||
|
return bits + 32;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void mb_wchar_to_uuencode(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
|
||||||
|
{
|
||||||
|
unsigned char *out, *limit;
|
||||||
|
MB_CONVERT_BUF_LOAD(buf, out, limit);
|
||||||
|
/* Every 3 bytes of input gets encoded as 4 bytes of output
|
||||||
|
* Additionally, we have a 'length' byte and a newline for each line of output
|
||||||
|
* (Maximum 45 input bytes can be encoded on a single output line)
|
||||||
|
* Make space for two more bytes in case we start close to where a line must end */
|
||||||
|
MB_CONVERT_BUF_ENSURE(buf, out, limit, ((len + 2) * 4 / 3) + (((len + 44) / 45) * 2) + (buf->state ? 0 : sizeof("begin 0644 filename\n")) + 2);
|
||||||
|
|
||||||
|
unsigned int bytes_encoded = buf->state >> 1;
|
||||||
|
|
||||||
|
if (!buf->state) {
|
||||||
|
for (char *s = "begin 0644 filename\n"; *s; s++) {
|
||||||
|
out = mb_convert_buf_add(out, *s);
|
||||||
|
}
|
||||||
|
out = mb_convert_buf_add(out, MIN(len, 45) + 32);
|
||||||
|
buf->state |= 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
while (len--) {
|
||||||
|
uint32_t w = *in++;
|
||||||
|
uint32_t w2 = 0, w3 = 0;
|
||||||
|
|
||||||
|
if (len) {
|
||||||
|
w2 = *in++;
|
||||||
|
len--;
|
||||||
|
}
|
||||||
|
if (len) {
|
||||||
|
w3 = *in++;
|
||||||
|
len--;
|
||||||
|
}
|
||||||
|
|
||||||
|
out = mb_convert_buf_add4(out, uuencode_six_bits((w >> 2) & 0x3F), uuencode_six_bits(((w & 0x3) << 4) + ((w2 >> 4) & 0xF)), uuencode_six_bits(((w2 & 0xF) << 2) + ((w3 >> 6) & 0x3)), uuencode_six_bits(w3 & 0x3F));
|
||||||
|
|
||||||
|
bytes_encoded += 3;
|
||||||
|
|
||||||
|
if (bytes_encoded >= 45) {
|
||||||
|
out = mb_convert_buf_add(out, '\n');
|
||||||
|
if (len) {
|
||||||
|
out = mb_convert_buf_add(out, MIN(len, 45) + 32);
|
||||||
|
}
|
||||||
|
bytes_encoded = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (bytes_encoded) {
|
||||||
|
out = mb_convert_buf_add(out, '\n');
|
||||||
|
}
|
||||||
|
|
||||||
|
buf->state = (bytes_encoded << 1) | (buf->state & 1);
|
||||||
|
MB_CONVERT_BUF_STORE(buf, out, limit);
|
||||||
|
}
|
||||||
|
|
41
ext/mbstring/tests/uuencode_encoding.phpt
Normal file
41
ext/mbstring/tests/uuencode_encoding.phpt
Normal file
|
@ -0,0 +1,41 @@
|
||||||
|
--TEST--
|
||||||
|
Temporary test of mbstring's UUEncode 'encoding'
|
||||||
|
--EXTENSIONS--
|
||||||
|
mbstring
|
||||||
|
--FILE--
|
||||||
|
<?php
|
||||||
|
|
||||||
|
/* Using mbstring to convert strings from UUEncode has already been deprecated
|
||||||
|
* So this test should be removed when the UUEncode 'encoding' is */
|
||||||
|
|
||||||
|
function testConversion($uuencode, $raw) {
|
||||||
|
$converted = mb_convert_encoding($uuencode, '8bit', 'UUENCODE');
|
||||||
|
if ($converted !== $raw)
|
||||||
|
die('Expected "' . $uuencode . '" to convert to ' . bin2hex($raw) . '; actually got ' . bin2hex($converted));
|
||||||
|
$converted = mb_convert_encoding($raw, 'UUENCODE', '8bit');
|
||||||
|
if ($converted !== $uuencode)
|
||||||
|
die('Expected ' . bin2hex($raw) . ' to convert to "' . $uuencode . '"; actually got "' . $converted . '"');
|
||||||
|
}
|
||||||
|
|
||||||
|
testConversion('', '');
|
||||||
|
|
||||||
|
/* mbstring's uuencode requires the user to strip off the terminating "`\nend\n" */
|
||||||
|
|
||||||
|
testConversion("begin 0644 filename\n#0V%T\n", 'Cat');
|
||||||
|
testConversion("begin 0644 filename\n::'1T<#HO+W=W=RYW:6MI<&5D:6\$N;W)G#0H`\n", "http://www.wikipedia.org\r\n");
|
||||||
|
testConversion("begin 0644 filename\n#`0(#\n", "\x01\x02\x03");
|
||||||
|
testConversion("begin 0644 filename\n$`0(#\"@``\n", "\x01\x02\x03\n");
|
||||||
|
|
||||||
|
echo "Done!\n";
|
||||||
|
?>
|
||||||
|
--EXPECTF--
|
||||||
|
Deprecated: mb_convert_encoding(): Handling Uuencode via mbstring is deprecated; use convert_uuencode/convert_uudecode instead in %s
|
||||||
|
|
||||||
|
Deprecated: mb_convert_encoding(): Handling Uuencode via mbstring is deprecated; use convert_uuencode/convert_uudecode instead in %s
|
||||||
|
|
||||||
|
Deprecated: mb_convert_encoding(): Handling Uuencode via mbstring is deprecated; use convert_uuencode/convert_uudecode instead in %s
|
||||||
|
|
||||||
|
Deprecated: mb_convert_encoding(): Handling Uuencode via mbstring is deprecated; use convert_uuencode/convert_uudecode instead in %s
|
||||||
|
|
||||||
|
Deprecated: mb_convert_encoding(): Handling Uuencode via mbstring is deprecated; use convert_uuencode/convert_uudecode instead in %s
|
||||||
|
Done!
|
Loading…
Add table
Add a link
Reference in a new issue