Implement mb_encode_mimeheader using fast text conversion filters

The behavior of the new mb_encode_mimeheader implementation closely follows the old implementation, except for three points: • The old implementation was missing a call to the mbfl_convert_filter flush function. So it would sometimes truncate the input string just before its end. • The old implementation would drop zero bytes when QPrint-encoding. So for example, if you tried to QPrint-encode the UTF-32BE string "\x00\x00\x12\x34", its QPrint-encoding would be "=12=34", which does not decode to a valid UTF-32BE string. This is now fixed. • In some rare corner cases, the new implementation will choose to Base64-encode or QPrint-encode the input string, where the old implementation would have just added newlines to it. Specifically, this can happen when there is a non-space ASCII character, followed by a large number of ASCII spaces, followed by a non-ASCII character. The new implementation is around 2.5-8x faster than the old one, depending on the text encoding and transfer encoding used. Performance gains are greater with Base64 transfer encoding than with QPrint transfer encoding; this is not because QPrint-encoding bytes is slow, but because QPrint-encoded output is much bigger than Base64-encoded output and takes more lines, so we have to go through the process of finding the right place to break a line many more times.
2025-08-16 05:58:45 +02:00 · 2023-02-17 13:51:02 +02:00 · 2023-02-17 13:51:02 +02:00 · 0ce755be26
commit 0ce755be26
parent 6ebb506637
8 changed files with 649 additions and 464 deletions
--- a/ext/mbstring/libmbfl/filters/mbfilter_base64.c
+++ b/ext/mbstring/libmbfl/filters/mbfilter_base64.c
@ -99,15 +99,13 @@ int mbfl_filt_conv_base64enc(int c, mbfl_convert_filter *filter)
 		filter->cache |= (c & 0xff) << 8;
 	} else {
 		filter->status &= ~0xff;
-		if ((filter->status & MBFL_BASE64_STS_MIME_HEADER) == 0) {
-			n = (filter->status & 0xff00) >> 8;
-			if (n > 72) {
-				CK((*filter->output_function)(0x0d, filter->data));		/* CR */
-				CK((*filter->output_function)(0x0a, filter->data));		/* LF */
-				filter->status &= ~0xff00;
-			}
-			filter->status += 0x400;
+		n = (filter->status & 0xff00) >> 8;
+		if (n > 72) {
+			CK((*filter->output_function)(0x0d, filter->data));		/* CR */
+			CK((*filter->output_function)(0x0a, filter->data));		/* LF */
+			filter->status &= ~0xff00;
 		}
+		filter->status += 0x400;
 		n = filter->cache | (c & 0xff);
 		CK((*filter->output_function)(mbfl_base64_table[(n >> 18) & 0x3f], filter->data));
 		CK((*filter->output_function)(mbfl_base64_table[(n >> 12) & 0x3f], filter->data));
@ -129,11 +127,9 @@ int mbfl_filt_conv_base64enc_flush(mbfl_convert_filter *filter)
 	filter->cache = 0;
 	/* flush fragments */
 	if (status >= 1) {
-		if ((filter->status & MBFL_BASE64_STS_MIME_HEADER) == 0) {
-			if (len > 72){
-				CK((*filter->output_function)(0x0d, filter->data));		/* CR */
-				CK((*filter->output_function)(0x0a, filter->data));		/* LF */
-			}
+		if (len > 72){
+			CK((*filter->output_function)(0x0d, filter->data));		/* CR */
+			CK((*filter->output_function)(0x0a, filter->data));		/* LF */
 		}
 		CK((*filter->output_function)(mbfl_base64_table[(cache >> 18) & 0x3f], filter->data));
 		CK((*filter->output_function)(mbfl_base64_table[(cache >> 12) & 0x3f], filter->data));
--- a/ext/mbstring/libmbfl/filters/mbfilter_qprint.c
+++ b/ext/mbstring/libmbfl/filters/mbfilter_qprint.c
@ -29,7 +29,6 @@

 #include "mbfilter.h"
 #include "mbfilter_qprint.h"
-#include "unicode_prop.h"

 static size_t mb_qprint_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
 static void mb_wchar_to_qprint(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
@ -96,28 +95,25 @@ int mbfl_filt_conv_qprintenc(int c, mbfl_convert_filter *filter)
 			break;
 		}

-		if ((filter->status & MBFL_QPRINT_STS_MIME_HEADER) == 0) {
-			if (s == 0x0a || (s == 0x0d && c != 0x0a)) {	/* line feed */
-				CK((*filter->output_function)(0x0d, filter->data));		/* CR */
-				CK((*filter->output_function)(0x0a, filter->data));		/* LF */
-				filter->status &= ~0xff00;
-				break;
-			} else if (s == 0x0d) {
-				break;
-			}
+		if (s == '\n' || (s == '\r' && c != '\n')) {	/* line feed */
+			CK((*filter->output_function)('\r', filter->data));
+			CK((*filter->output_function)('\n', filter->data));
+			filter->status &= ~0xff00;
+			break;
+		} else if (s == 0x0d) {
+			break;
 		}

-		if ((filter->status & MBFL_QPRINT_STS_MIME_HEADER) == 0  && n >= 72) {	/* soft line feed */
-			CK((*filter->output_function)(0x3d, filter->data));		/* '=' */
-			CK((*filter->output_function)(0x0d, filter->data));		/* CR */
-			CK((*filter->output_function)(0x0a, filter->data));		/* LF */
+		if (n >= 72) {	/* soft line feed */
+			CK((*filter->output_function)('=', filter->data));
+			CK((*filter->output_function)('\r', filter->data));
+			CK((*filter->output_function)('\n', filter->data));
 			filter->status &= ~0xff00;
 		}

-		if (s <= 0 || s >= 0x80 || s == 0x3d		/* not ASCII or '=' */
-		   || ((filter->status & MBFL_QPRINT_STS_MIME_HEADER) && mime_char_needs_qencode[s])) {
+		if (s <= 0 || s >= 0x80 || s == '=') { /* not ASCII or '=' */
 			/* hex-octet */
-			CK((*filter->output_function)(0x3d, filter->data));		/* '=' */
+			CK((*filter->output_function)('=', filter->data));
 			n = (s >> 4) & 0xf;
 			if (n < 10) {
 				n += 48;		/* '0' */
@ -132,14 +128,10 @@ int mbfl_filt_conv_qprintenc(int c, mbfl_convert_filter *filter)
 				n += 55;
 			}
 			CK((*filter->output_function)(n, filter->data));
-			if ((filter->status & MBFL_QPRINT_STS_MIME_HEADER) == 0) {
-				filter->status += 0x300;
-			}
+			filter->status += 0x300;
 		} else {
 			CK((*filter->output_function)(s, filter->data));
-			if ((filter->status & MBFL_QPRINT_STS_MIME_HEADER) == 0) {
-				filter->status += 0x100;
-			}
+			filter->status += 0x100;
 		}
 		break;
 	}
--- a/ext/mbstring/libmbfl/mbfl/mbfilter.c
+++ b/ext/mbstring/libmbfl/mbfl/mbfilter.c
@ -523,312 +523,3 @@ mbfl_strcut(

 	return result;
 }
-
-
-/*
- *  MIME header encode
- */
-struct mime_header_encoder_data {
-	mbfl_convert_filter *conv1_filter;
-	mbfl_convert_filter *block_filter;
-	mbfl_convert_filter *conv2_filter;
-	mbfl_convert_filter *conv2_filter_backup;
-	mbfl_convert_filter *encod_filter;
-	mbfl_convert_filter *encod_filter_backup;
-	mbfl_memory_device outdev;
-	mbfl_memory_device tmpdev;
-	int status1;
-	int status2;
-	size_t prevpos;
-	size_t linehead;
-	size_t firstindent;
-	int encnamelen;
-	int lwsplen;
-	char encname[128];
-	char lwsp[16];
-};
-
-static int
-mime_header_encoder_block_collector(int c, void *data)
-{
-	size_t n;
-	struct mime_header_encoder_data *pe = (struct mime_header_encoder_data *)data;
-
-	switch (pe->status2) {
-	case 1:	/* encoded word */
-		pe->prevpos = pe->outdev.pos;
-		mbfl_convert_filter_copy(pe->conv2_filter, pe->conv2_filter_backup);
-		mbfl_convert_filter_copy(pe->encod_filter, pe->encod_filter_backup);
-		(*pe->conv2_filter->filter_function)(c, pe->conv2_filter);
-		(*pe->conv2_filter->filter_flush)(pe->conv2_filter);
-		(*pe->encod_filter->filter_flush)(pe->encod_filter);
-		n = pe->outdev.pos - pe->linehead + pe->firstindent;
-		pe->outdev.pos = pe->prevpos;
-		mbfl_convert_filter_copy(pe->conv2_filter_backup, pe->conv2_filter);
-		mbfl_convert_filter_copy(pe->encod_filter_backup, pe->encod_filter);
-		if (n >= 74) {
-			(*pe->conv2_filter->filter_flush)(pe->conv2_filter);
-			(*pe->encod_filter->filter_flush)(pe->encod_filter);
-			mbfl_memory_device_strncat(&pe->outdev, "\x3f\x3d", 2);	/* ?= */
-			mbfl_memory_device_strncat(&pe->outdev, pe->lwsp, pe->lwsplen);
-			pe->linehead = pe->outdev.pos;
-			pe->firstindent = 0;
-			mbfl_memory_device_strncat(&pe->outdev, pe->encname, pe->encnamelen);
-			c = (*pe->conv2_filter->filter_function)(c, pe->conv2_filter);
-		} else {
-			c = (*pe->conv2_filter->filter_function)(c, pe->conv2_filter);
-		}
-		break;
-
-	default:
-		mbfl_memory_device_strncat(&pe->outdev, pe->encname, pe->encnamelen);
-		c = (*pe->conv2_filter->filter_function)(c, pe->conv2_filter);
-		pe->status2 = 1;
-		break;
-	}
-
-	return 0;
-}
-
-static int
-mime_header_encoder_collector(int c, void *data)
-{
-	static int qp_table[256] = {
-		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00 */
-		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00 */
-		1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20 */
-		0, 0, 0, 0, 0, 0, 0 ,0, 0, 0, 0, 0, 0, 1, 0, 1, /* 0x10 */
-		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x40 */
-		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, /* 0x50 */
-		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x60 */
-		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, /* 0x70 */
-		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x80 */
-		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x90 */
-		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xA0 */
-		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xB0 */
-		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xC0 */
-		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xD0 */
-		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xE0 */
-		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1  /* 0xF0 */
-	};
-
-	size_t n;
-	struct mime_header_encoder_data *pe = (struct mime_header_encoder_data *)data;
-
-	switch (pe->status1) {
-	case 11:	/* encoded word */
-		(*pe->block_filter->filter_function)(c, pe->block_filter);
-		break;
-
-	default:	/* ASCII */
-		if (c <= 0x00ff && !qp_table[(c & 0xff)]) { /* ordinary characters */
-			mbfl_memory_device_output(c, &pe->tmpdev);
-			pe->status1 = 1;
-		} else if (pe->status1 == 0 && c == 0x20) {	/* repeat SPACE */
-			mbfl_memory_device_output(c, &pe->tmpdev);
-		} else {
-			if (pe->tmpdev.pos < 74 && c == 0x20) {
-				n = pe->outdev.pos - pe->linehead + pe->tmpdev.pos + pe->firstindent;
-				if (n > 74) {
-					mbfl_memory_device_strncat(&pe->outdev, pe->lwsp, pe->lwsplen);		/* LWSP */
-					pe->linehead = pe->outdev.pos;
-					pe->firstindent = 0;
-				} else if (pe->outdev.pos > 0) {
-					mbfl_memory_device_output(0x20, &pe->outdev);
-				}
-				mbfl_memory_device_devcat(&pe->outdev, &pe->tmpdev);
-				mbfl_memory_device_reset(&pe->tmpdev);
-				pe->status1 = 0;
-			} else {
-				n = pe->outdev.pos - pe->linehead + pe->encnamelen + pe->firstindent;
-				if (n > 60)  {
-					mbfl_memory_device_strncat(&pe->outdev, pe->lwsp, pe->lwsplen);		/* LWSP */
-					pe->linehead = pe->outdev.pos;
-					pe->firstindent = 0;
-				} else if (pe->outdev.pos > 0)  {
-					mbfl_memory_device_output(0x20, &pe->outdev);
-				}
-				mbfl_convert_filter_devcat(pe->block_filter, &pe->tmpdev);
-				mbfl_memory_device_reset(&pe->tmpdev);
-				(*pe->block_filter->filter_function)(c, pe->block_filter);
-				pe->status1 = 11;
-			}
-		}
-		break;
-	}
-
-	return 0;
-}
-
-mbfl_string *
-mime_header_encoder_result(struct mime_header_encoder_data *pe, mbfl_string *result)
-{
-	if (pe->status1 >= 10) {
-		(*pe->conv2_filter->filter_flush)(pe->conv2_filter);
-		(*pe->encod_filter->filter_flush)(pe->encod_filter);
-		mbfl_memory_device_strncat(&pe->outdev, "\x3f\x3d", 2);		/* ?= */
-	} else if (pe->tmpdev.pos > 0) {
-		if (pe->outdev.pos > 0) {
-			if ((pe->outdev.pos - pe->linehead + pe->tmpdev.pos + pe->firstindent) > 74) {
-				mbfl_memory_device_strncat(&pe->outdev, pe->lwsp, pe->lwsplen);
-			} else {
-				mbfl_memory_device_output(0x20, &pe->outdev);
-			}
-		}
-		mbfl_memory_device_devcat(&pe->outdev, &pe->tmpdev);
-	}
-	mbfl_memory_device_reset(&pe->tmpdev);
-	pe->prevpos = 0;
-	pe->linehead = 0;
-	pe->status1 = 0;
-	pe->status2 = 0;
-
-	return mbfl_memory_device_result(&pe->outdev, result);
-}
-
-struct mime_header_encoder_data*
-mime_header_encoder_new(
-    const mbfl_encoding *incode,
-    const mbfl_encoding *outcode,
-    const mbfl_encoding *transenc)
-{
-	size_t n;
-	const char *s;
-	struct mime_header_encoder_data *pe;
-
-	/* get output encoding and check MIME charset name */
-	if (outcode->mime_name == NULL || outcode->mime_name[0] == '\0') {
-		return NULL;
-	}
-
-	pe = emalloc(sizeof(struct mime_header_encoder_data));
-	mbfl_memory_device_init(&pe->outdev, 0, 0);
-	mbfl_memory_device_init(&pe->tmpdev, 0, 0);
-	pe->prevpos = 0;
-	pe->linehead = 0;
-	pe->firstindent = 0;
-	pe->status1 = 0;
-	pe->status2 = 0;
-
-	/* make the encoding description string  exp. "=?ISO-2022-JP?B?" */
-	n = 0;
-	pe->encname[n++] = 0x3d;
-	pe->encname[n++] = 0x3f;
-	s = outcode->mime_name;
-	while (*s) {
-		pe->encname[n++] = *s++;
-	}
-	pe->encname[n++] = 0x3f;
-	if (transenc->no_encoding == mbfl_no_encoding_qprint) {
-		pe->encname[n++] = 0x51;
-	} else {
-		pe->encname[n++] = 0x42;
-		transenc = &mbfl_encoding_base64;
-	}
-	pe->encname[n++] = 0x3f;
-	pe->encname[n] = '\0';
-	pe->encnamelen = n;
-
-	n = 0;
-	pe->lwsp[n++] = 0x0d;
-	pe->lwsp[n++] = 0x0a;
-	pe->lwsp[n++] = 0x20;
-	pe->lwsp[n] = '\0';
-	pe->lwsplen = n;
-
-	/* transfer encode filter */
-	pe->encod_filter = mbfl_convert_filter_new(outcode, transenc, mbfl_memory_device_output, 0, &(pe->outdev));
-	pe->encod_filter_backup = mbfl_convert_filter_new(outcode, transenc, mbfl_memory_device_output, 0, &(pe->outdev));
-
-	/* Output code filter */
-	pe->conv2_filter = mbfl_convert_filter_new(&mbfl_encoding_wchar, outcode, mbfl_filter_output_pipe, 0, pe->encod_filter);
-	pe->conv2_filter_backup = mbfl_convert_filter_new(&mbfl_encoding_wchar, outcode, mbfl_filter_output_pipe, 0, pe->encod_filter);
-
-	/* encoded block filter */
-	pe->block_filter = mbfl_convert_filter_new(&mbfl_encoding_wchar, &mbfl_encoding_wchar, mime_header_encoder_block_collector, 0, pe);
-
-	/* Input code filter */
-	pe->conv1_filter = mbfl_convert_filter_new(incode, &mbfl_encoding_wchar, mime_header_encoder_collector, 0, pe);
-
-	if (pe->encod_filter == NULL ||
-	    pe->encod_filter_backup == NULL ||
-	    pe->conv2_filter == NULL ||
-	    pe->conv2_filter_backup == NULL ||
-	    pe->conv1_filter == NULL) {
-		mime_header_encoder_delete(pe);
-		return NULL;
-	}
-
-	if (transenc->no_encoding == mbfl_no_encoding_qprint) {
-		pe->encod_filter->status |= MBFL_QPRINT_STS_MIME_HEADER;
-		pe->encod_filter_backup->status |= MBFL_QPRINT_STS_MIME_HEADER;
-	} else {
-		pe->encod_filter->status |= MBFL_BASE64_STS_MIME_HEADER;
-		pe->encod_filter_backup->status |= MBFL_BASE64_STS_MIME_HEADER;
-	}
-
-	return pe;
-}
-
-void
-mime_header_encoder_delete(struct mime_header_encoder_data *pe)
-{
-	if (pe) {
-		mbfl_convert_filter_delete(pe->conv1_filter);
-		mbfl_convert_filter_delete(pe->block_filter);
-		mbfl_convert_filter_delete(pe->conv2_filter);
-		mbfl_convert_filter_delete(pe->conv2_filter_backup);
-		mbfl_convert_filter_delete(pe->encod_filter);
-		mbfl_convert_filter_delete(pe->encod_filter_backup);
-		mbfl_memory_device_clear(&pe->outdev);
-		mbfl_memory_device_clear(&pe->tmpdev);
-		efree((void*)pe);
-	}
-}
-
-mbfl_string *
-mbfl_mime_header_encode(
-    mbfl_string *string,
-    mbfl_string *result,
-    const mbfl_encoding *outcode,
-    const mbfl_encoding *encoding,
-    const char *linefeed,
-    int indent)
-{
-	size_t n;
-	unsigned char *p;
-	struct mime_header_encoder_data *pe;
-
-	mbfl_string_init(result);
-	result->encoding = &mbfl_encoding_ascii;
-
-	pe = mime_header_encoder_new(string->encoding, outcode, encoding);
-	if (pe == NULL) {
-		return NULL;
-	}
-
-	if (linefeed != NULL) {
-		n = 0;
-		while (*linefeed && n < 8) {
-			pe->lwsp[n++] = *linefeed++;
-		}
-		pe->lwsp[n++] = 0x20;
-		pe->lwsp[n] = '\0';
-		pe->lwsplen = n;
-	}
-	if (indent > 0 && indent < 74) {
-		pe->firstindent = indent;
-	}
-
-	n = string->len;
-	p = string->val;
-	while (n > 0) {
-		(*pe->conv1_filter->filter_function)(*p++, pe->conv1_filter);
-		n--;
-	}
-
-	result = mime_header_encoder_result(pe, result);
-	mime_header_encoder_delete(pe);
-
-	return result;
-}
--- a/ext/mbstring/libmbfl/mbfl/mbfilter.h
+++ b/ext/mbstring/libmbfl/mbfl/mbfilter.h
@ -168,29 +168,4 @@ static inline int mbfl_is_error(size_t len) {
 MBFLAPI extern mbfl_string *
 mbfl_strcut(mbfl_string *string, mbfl_string *result, size_t from, size_t length);

-/*
- * MIME header encode
- */
-struct mime_header_encoder_data;	/* forward declaration */
-
-MBFLAPI extern struct mime_header_encoder_data *
-mime_header_encoder_new(
-    const mbfl_encoding *incode,
-    const mbfl_encoding *outcode,
-    const mbfl_encoding *encoding);
-
-MBFLAPI extern void
-mime_header_encoder_delete(struct mime_header_encoder_data *pe);
-
-MBFLAPI extern mbfl_string *
-mime_header_encoder_result(struct mime_header_encoder_data *pe, mbfl_string *result);
-
-MBFLAPI extern mbfl_string *
-mbfl_mime_header_encode(
-    mbfl_string *string, mbfl_string *result,
-    const mbfl_encoding *outcode,
-    const mbfl_encoding *encoding,
-    const char *linefeed,
-    int indent);
-
 #endif	/* MBFL_MBFILTER_H */
--- a/ext/mbstring/libmbfl/mbfl/mbfl_consts.h
+++ b/ext/mbstring/libmbfl/mbfl/mbfl_consts.h
@ -44,9 +44,6 @@
 /* Marker for an erroneous input byte (or sequence of bytes) */
 #define MBFL_BAD_INPUT (-1)

-#define MBFL_QPRINT_STS_MIME_HEADER 0x1000000
-#define MBFL_BASE64_STS_MIME_HEADER 0x1000000
-
 #define MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE 0
 #define MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR 1
 #define MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG 2
--- a/ext/mbstring/libmbfl/mbfl/mbfl_encoding.h
+++ b/ext/mbstring/libmbfl/mbfl/mbfl_encoding.h
@ -162,17 +162,17 @@ static inline void mb_convert_buf_init(mb_convert_buf *buf, size_t initsize, uin
 #define MB_CONVERT_BUF_ENSURE(buf, out, limit, needed) \
 	ZEND_ASSERT(out <= limit); \
 	if ((limit - out) < (needed)) { \
-		size_t oldsize = limit - (unsigned char*)ZSTR_VAL(buf->str); \
+		size_t oldsize = limit - (unsigned char*)ZSTR_VAL((buf)->str); \
 		size_t newsize = oldsize + MAX(oldsize >> 1, needed); \
-		zend_string *newstr = erealloc(buf->str, _ZSTR_STRUCT_SIZE(newsize)); \
-		out = (unsigned char*)ZSTR_VAL(newstr) + (out - (unsigned char*)ZSTR_VAL(buf->str)); \
+		zend_string *newstr = erealloc((buf)->str, _ZSTR_STRUCT_SIZE(newsize)); \
+		out = (unsigned char*)ZSTR_VAL(newstr) + (out - (unsigned char*)ZSTR_VAL((buf)->str)); \
 		limit = (unsigned char*)ZSTR_VAL(newstr) + newsize; \
-		buf->str = newstr; \
+		(buf)->str = newstr; \
 	}

-#define MB_CONVERT_BUF_STORE(buf, _out, _limit) buf->out = _out; buf->limit = _limit
+#define MB_CONVERT_BUF_STORE(buf, _out, _limit) (buf)->out = _out; (buf)->limit = _limit

-#define MB_CONVERT_BUF_LOAD(buf, _out, _limit) _out = buf->out; _limit = buf->limit
+#define MB_CONVERT_BUF_LOAD(buf, _out, _limit) _out = (buf)->out; _limit = (buf)->limit

 #define MB_CONVERT_ERROR(buf, out, limit, bad_cp, conv_fn) \
 	MB_CONVERT_BUF_STORE(buf, out, limit); \
@ -209,6 +209,22 @@ static inline unsigned char* mb_convert_buf_add4(unsigned char *out, char c1, ch
 	return out;
 }

+static inline unsigned char* mb_convert_buf_appends(unsigned char *out, const char *s)
+{
+	while (*s) {
+		*out++ = *s++;
+	}
+	return out;
+}
+
+static inline unsigned char* mb_convert_buf_appendn(unsigned char *out, const char *s, size_t n)
+{
+	while (n--) {
+		*out++ = *s++;
+	}
+	return out;
+}
+
 static inline zend_string* mb_convert_buf_result_raw(mb_convert_buf *buf)
 {
 	ZEND_ASSERT(buf->out <= buf->limit);
@ -246,6 +262,24 @@ static inline zend_string* mb_convert_buf_result(mb_convert_buf *buf, const mbfl
 	return ret;
 }

+/* Used if we initialize an `mb_convert_buf` but then discover we don't actually
+ * want to return `zend_string` */
+static inline void mb_convert_buf_free(mb_convert_buf *buf)
+{
+	efree(buf->str);
+}
+
+static inline size_t mb_convert_buf_len(mb_convert_buf *buf)
+{
+	return buf->out - (unsigned char*)ZSTR_VAL(buf->str);
+}
+
+static inline void mb_convert_buf_reset(mb_convert_buf *buf, size_t len)
+{
+	buf->out = (unsigned char*)ZSTR_VAL(buf->str) + len;
+	ZEND_ASSERT(buf->out <= buf->limit);
+}
+
 MBFLAPI extern const mbfl_encoding *mbfl_name2encoding(const char *name);
 MBFLAPI extern const mbfl_encoding *mbfl_no2encoding(enum mbfl_no_encoding no_encoding);
 MBFLAPI extern const mbfl_encoding **mbfl_get_supported_encodings(void);
--- a/ext/mbstring/mbstring.c
+++ b/ext/mbstring/mbstring.c
@ -46,6 +46,7 @@
 #include "libmbfl/filters/mbfilter_utf16.h"
 #include "libmbfl/filters/mbfilter_singlebyte.h"
 #include "libmbfl/filters/translit_kana_jisx0201_jisx0208.h"
+#include "libmbfl/filters/unicode_prop.h"

 #include "php_variables.h"
 #include "php_globals.h"
@ -91,6 +92,8 @@ static bool mb_check_str_encoding(zend_string *str, const mbfl_encoding *encodin

 static const mbfl_encoding* mb_guess_encoding(unsigned char *in, size_t in_len, const mbfl_encoding **elist, unsigned int elist_size, bool strict);

+static zend_string* mb_mime_header_encode(zend_string *input, const mbfl_encoding *incode, const mbfl_encoding *outcode, bool base64, char *linefeed, size_t linefeed_len, zend_long indent);
+
 /* See mbfilter_cp5022x.c */
 uint32_t mb_convert_kana_codepoint(uint32_t c, uint32_t next, bool *consumed, uint32_t *second, int mode);
 /* }}} */
@ -3201,66 +3204,6 @@ PHP_FUNCTION(mb_encoding_aliases)
 }
 /* }}} */

-/* {{{ Converts the string to MIME "encoded-word" in the format of =?charset?(B|Q)?encoded_string?= */
-PHP_FUNCTION(mb_encode_mimeheader)
-{
-	const mbfl_encoding *charset, *transenc;
-	mbfl_string  string, result, *ret;
-	zend_string *charset_name = NULL;
-	char *trans_enc_name = NULL, *string_val;
-	size_t trans_enc_name_len;
-	char *linefeed = "\r\n";
-	size_t linefeed_len;
-	zend_long indent = 0;
-
-	string.encoding = MBSTRG(current_internal_encoding);
-
-	ZEND_PARSE_PARAMETERS_START(1, 5)
-		Z_PARAM_STRING(string_val, string.len)
-		Z_PARAM_OPTIONAL
-		Z_PARAM_STR(charset_name)
-		Z_PARAM_STRING(trans_enc_name, trans_enc_name_len)
-		Z_PARAM_STRING(linefeed, linefeed_len)
-		Z_PARAM_LONG(indent)
-	ZEND_PARSE_PARAMETERS_END();
-
-	string.val = (unsigned char*)string_val;
-	charset = &mbfl_encoding_pass;
-	transenc = &mbfl_encoding_base64;
-
-	if (charset_name != NULL) {
-		charset = php_mb_get_encoding(charset_name, 2);
-		if (!charset) {
-			RETURN_THROWS();
-		} else if (charset->mime_name == NULL || charset->mime_name[0] == '\0') {
-			zend_argument_value_error(2, "\"%s\" cannot be used for MIME header encoding", ZSTR_VAL(charset_name));
-			RETURN_THROWS();
-		}
-	} else {
-		const mbfl_language *lang = mbfl_no2language(MBSTRG(language));
-		if (lang != NULL) {
-			charset = mbfl_no2encoding(lang->mail_charset);
-			transenc = mbfl_no2encoding(lang->mail_header_encoding);
-		}
-	}
-
-	if (trans_enc_name != NULL) {
-		if (*trans_enc_name == 'B' || *trans_enc_name == 'b') {
-			transenc = &mbfl_encoding_base64;
-		} else if (*trans_enc_name == 'Q' || *trans_enc_name == 'q') {
-			transenc = &mbfl_encoding_qprint;
-		}
-	}
-
-	mbfl_string_init(&result);
-	ret = mbfl_mime_header_encode(&string, &result, charset, transenc, linefeed, indent);
-	ZEND_ASSERT(ret != NULL);
-	// TODO: avoid reallocation ???
-	RETVAL_STRINGL((char *)ret->val, ret->len);	/* the string is already strdup()'ed */
-	efree(ret->val);
-}
-/* }}} */
-
 static zend_string* jp_kana_convert(zend_string *input, const mbfl_encoding *encoding, unsigned int mode)
 {
 	/* Each wchar may potentially expand to 2 when we perform kana conversion...
@ -4156,8 +4099,7 @@ PHP_FUNCTION(mb_send_mail)
 	size_t to_len;
 	char *message;
 	size_t message_len;
-	char *subject;
-	size_t subject_len;
+	zend_string *subject;
 	zend_string *extra_cmd = NULL;
 	HashTable *headers_ht = NULL;
 	zend_string *str_headers = NULL;
@ -4169,9 +4111,7 @@ PHP_FUNCTION(mb_send_mail)
 		int cnt_trans_enc:1;
 	} suppressed_hdrs = { 0, 0 };

-	char *subject_buf = NULL, *p;
-	mbfl_string orig_str, conv_str;
-	mbfl_string *pstr;	/* pointer to mbfl string for return value */
+	char *p;
 	enum mbfl_no_encoding;
 	const mbfl_encoding *tran_cs,	/* transfer text charset */
 						*head_enc,	/* header transfer encoding */
@ -4181,10 +4121,6 @@ PHP_FUNCTION(mb_send_mail)
 	HashTable ht_headers;
 	zval *s;

-	/* initialize */
-	mbfl_string_init(&orig_str);
-	mbfl_string_init(&conv_str);
-
 	/* character-set, transfer-encoding */
 	tran_cs = &mbfl_encoding_utf8;
 	head_enc = &mbfl_encoding_base64;
@ -4198,7 +4134,7 @@ PHP_FUNCTION(mb_send_mail)

 	ZEND_PARSE_PARAMETERS_START(3, 5)
 		Z_PARAM_PATH(to, to_len)
-		Z_PARAM_PATH(subject, subject_len)
+		Z_PARAM_PATH_STR(subject)
 		Z_PARAM_PATH(message, message_len)
 		Z_PARAM_OPTIONAL
 		Z_PARAM_ARRAY_HT_OR_STR(headers_ht, str_headers)
@ -4310,22 +4246,17 @@ PHP_FUNCTION(mb_send_mail)
 	}

 	/* Subject: */
-	orig_str.val = (unsigned char *)subject;
-	orig_str.len = subject_len;
-	orig_str.encoding = MBSTRG(current_internal_encoding);
-	if (orig_str.encoding->no_encoding == mbfl_no_encoding_invalid || orig_str.encoding->no_encoding == mbfl_no_encoding_pass) {
-		orig_str.encoding = mb_guess_encoding((unsigned char*)subject, subject_len, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection));
+	const mbfl_encoding *enc = MBSTRG(current_internal_encoding);
+	if (enc == &mbfl_encoding_pass) {
+		enc = mb_guess_encoding((unsigned char*)ZSTR_VAL(subject), ZSTR_LEN(subject), MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection));
 	}
 	const char *line_sep = PG(mail_mixed_lf_and_crlf) ? "\n" : CRLF;
 	size_t line_sep_len = strlen(line_sep);
-	pstr = mbfl_mime_header_encode(&orig_str, &conv_str, tran_cs, head_enc, line_sep, strlen("Subject: [PHP-jp nnnnnnnn]") + line_sep_len);
-	if (pstr != NULL) {
-		subject_buf = subject = (char *)pstr->val;
-	}
+
+	subject = mb_mime_header_encode(subject, enc, tran_cs, head_enc == &mbfl_encoding_base64, (char*)line_sep, line_sep_len, strlen("Subject: [PHP-jp nnnnnnnn]") + line_sep_len);

 	/* message body */
 	const mbfl_encoding *msg_enc = MBSTRG(current_internal_encoding);
-
 	if (msg_enc == &mbfl_encoding_pass) {
 		msg_enc = mb_guess_encoding((unsigned char*)message, message_len, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection));
 	}
@ -4401,18 +4332,15 @@ PHP_FUNCTION(mb_send_mail)
 		extra_cmd = php_escape_shell_cmd(ZSTR_VAL(extra_cmd));
 	}

-	RETVAL_BOOL(!err && php_mail(to_r, subject, message, ZSTR_VAL(str_headers), extra_cmd ? ZSTR_VAL(extra_cmd) : NULL));
+	RETVAL_BOOL(!err && php_mail(to_r, ZSTR_VAL(subject), message, ZSTR_VAL(str_headers), extra_cmd ? ZSTR_VAL(extra_cmd) : NULL));

 	if (extra_cmd) {
 		zend_string_release_ex(extra_cmd, 0);
 	}
-
 	if (to_r != to) {
 		efree(to_r);
 	}
-	if (subject_buf) {
-		efree((void *)subject_buf);
-	}
+	zend_string_release(subject);
 	zend_string_free(conv);
 	zend_hash_destroy(&ht_headers);
 	if (str_headers) {
@ -5634,6 +5562,418 @@ static void php_mb_gpc_set_input_encoding(const zend_encoding *encoding) /* {{{
 }
 /* }}} */

+static const unsigned char base64_table[] = {
+ /* 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', */
+   0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x4b,0x4c,0x4d,
+ /* 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', */
+   0x4e,0x4f,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,
+ /* 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', */
+   0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,
+ /* 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', */
+   0x6e,0x6f,0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,
+ /* '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/', '\0' */
+   0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x2b,0x2f,0x00
+};
+
+static size_t transfer_encoded_size(mb_convert_buf *tmpbuf, bool base64)
+{
+	if (base64) {
+		return ((mb_convert_buf_len(tmpbuf) + 2) / 3) * 4;
+	} else {
+		size_t enc_size = 0;
+		unsigned char *p = (unsigned char*)ZSTR_VAL(tmpbuf->str);
+		while (p < tmpbuf->out) {
+			unsigned char c = *p++;
+			enc_size += (c > 0x7F || c == '=' || mime_char_needs_qencode[c]) ? 3 : 1;
+		}
+		return enc_size;
+	}
+}
+
+static void transfer_encode_mime_bytes(mb_convert_buf *tmpbuf, mb_convert_buf *outbuf, bool base64)
+{
+	unsigned char *out, *limit;
+	MB_CONVERT_BUF_LOAD(outbuf, out, limit);
+	unsigned char *p = (unsigned char*)ZSTR_VAL(tmpbuf->str), *e = tmpbuf->out;
+
+	if (base64) {
+		MB_CONVERT_BUF_ENSURE(outbuf, out, limit, ((e - p) + 2) / 3 * 4);
+		while ((e - p) >= 3) {
+			unsigned char a = *p++;
+			unsigned char b = *p++;
+			unsigned char c = *p++;
+			uint32_t bits = (a << 16) | (b << 8) | c;
+			out = mb_convert_buf_add4(out,
+				base64_table[(bits >> 18) & 0x3F],
+				base64_table[(bits >> 12) & 0x3F],
+				base64_table[(bits >> 6) & 0x3F],
+				base64_table[bits & 0x3F]);
+		}
+		if (p != e) {
+			if ((e - p) == 1) {
+				uint32_t bits = *p++;
+				out = mb_convert_buf_add4(out, base64_table[(bits >> 2) & 0x3F], base64_table[(bits & 0x3) << 4], '=', '=');
+			} else {
+				unsigned char a = *p++;
+				unsigned char b = *p++;
+				uint32_t bits = (a << 8) | b;
+				out = mb_convert_buf_add4(out, base64_table[(bits >> 10) & 0x3F], base64_table[(bits >> 4) & 0x3F], base64_table[(bits & 0xF) << 2], '=');
+			}
+		}
+	} else {
+		MB_CONVERT_BUF_ENSURE(outbuf, out, limit, (e - p) * 3);
+		while (p < e) {
+			unsigned char c = *p++;
+			if (c > 0x7F || c == '=' || mime_char_needs_qencode[c]) {
+				out = mb_convert_buf_add3(out, '=', "0123456789ABCDEF"[(c >> 4) & 0xF], "0123456789ABCDEF"[c & 0xF]);
+			} else {
+				out = mb_convert_buf_add(out, c);
+			}
+		}
+	}
+
+	mb_convert_buf_reset(tmpbuf, 0);
+	MB_CONVERT_BUF_STORE(outbuf, out, limit);
+}
+
+static zend_string* mb_mime_header_encode(zend_string *input, const mbfl_encoding *incode, const mbfl_encoding *outcode, bool base64, char *linefeed, size_t linefeed_len, zend_long indent)
+{
+	unsigned char *in = (unsigned char*)ZSTR_VAL(input);
+	size_t in_len = ZSTR_LEN(input);
+
+	if (!in_len) {
+		return zend_empty_string;
+	}
+
+	if (indent < 0 || indent >= 74) {
+		indent = 0;
+	}
+
+	if (linefeed_len > 8) {
+		linefeed_len = 8;
+	}
+	/* Maintain legacy behavior as regards embedded NUL (zero) bytes in linefeed string */
+	for (size_t i = 0; i < linefeed_len; i++) {
+		if (linefeed[i] == '\0') {
+			linefeed_len = i;
+			break;
+		}
+	}
+
+	unsigned int state = 0;
+	/* wchar_buf should be big enough that when it is full, we definitely have enough
+	 * wchars to fill an entire line of output */
+	uint32_t wchar_buf[80];
+	uint32_t *p, *e;
+	/* What part of wchar_buf is filled with still-unprocessed data which should not
+	 * be overwritten? */
+	unsigned int offset = 0;
+	size_t line_start = 0;
+
+	/* If the entire input string is ASCII with no spaces (except possibly leading
+	 * spaces), just pass it through unchanged */
+	bool checking_leading_spaces = true;
+	while (in_len) {
+		size_t out_len = incode->to_wchar(&in, &in_len, wchar_buf, 80, &state);
+		p = wchar_buf;
+		e = wchar_buf + out_len;
+
+		while (p < e) {
+			uint32_t w = *p++;
+			if (checking_leading_spaces) {
+				if (w == ' ') {
+					continue;
+				} else {
+					checking_leading_spaces = false;
+				}
+			}
+			if (w < 0x21 || w > 0x7E || w == '=' || w == '?' || w == '_') {
+				/* We cannot simply pass input string through unchanged; start again */
+				in = (unsigned char*)ZSTR_VAL(input);
+				in_len = ZSTR_LEN(input);
+				goto no_passthrough;
+			}
+		}
+	}
+
+	return zend_string_copy(input); /* This just increments refcount */
+
+no_passthrough: ;
+
+	mb_convert_buf buf;
+	mb_convert_buf_init(&buf, in_len, '?', MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR);
+
+	/* Encode some prefix of the input string as plain ASCII if possible
+	 * If we find it necessary to switch to Base64/QPrint encoding, we will
+	 * do so all the way to the end of the string */
+	while (in_len) {
+		/* Decode part of the input string, refill wchar_buf */
+		ZEND_ASSERT(offset < 80);
+		size_t out_len = incode->to_wchar(&in, &in_len, wchar_buf + offset, 80 - offset, &state);
+		ZEND_ASSERT(out_len <= 80 - offset);
+		p = wchar_buf;
+		e = wchar_buf + offset + out_len;
+		/* ASCII output is broken into space-delimited 'words'
+		 * If we find a non-ASCII character in the middle of a word, we will
+		 * transfer-encode the entire word */
+		uint32_t *word_start = p;
+
+		/* Don't consider adding line feed for spaces at the beginning of a word */
+		while (p < e && *p == ' ' && (p - word_start) <= 74) {
+			p++;
+		}
+
+		while (p < e) {
+			uint32_t w = *p++;
+
+			if (w < 0x20 || w > 0x7E || w == '?' || w == '=' || w == '_' || (w == ' ' && (p - word_start) > 74)) {
+				/* Non-ASCII character (or line too long); switch to Base64/QPrint encoding
+				 * If we are already too far along on a line to include Base64/QPrint encoded data
+				 * on the same line (without overrunning max line length), then add a line feed
+				 * right now */
+				if (mb_convert_buf_len(&buf) - line_start + indent + strlen(outcode->mime_name) > 55) {
+					MB_CONVERT_BUF_ENSURE(&buf, buf.out, buf.limit, (e - word_start) + linefeed_len + 1);
+					buf.out = mb_convert_buf_appendn(buf.out, linefeed, linefeed_len);
+					buf.out = mb_convert_buf_add(buf.out, ' ');
+					indent = 0;
+					line_start = mb_convert_buf_len(&buf);
+				} else if (mb_convert_buf_len(&buf) > 0) {
+					MB_CONVERT_BUF_ENSURE(&buf, buf.out, buf.limit, 1);
+					buf.out = mb_convert_buf_add(buf.out, ' ');
+				}
+				p = word_start; /* Back up to where MIME encoding of input chars should start */
+				goto mime_encoding_needed;
+			} else if (w == ' ') {
+				/* When we see a space, check whether we should insert a line break */
+				if (mb_convert_buf_len(&buf) - line_start + (p - word_start) + indent > 75) {
+					MB_CONVERT_BUF_ENSURE(&buf, buf.out, buf.limit, (e - word_start) + linefeed_len + 1);
+					buf.out = mb_convert_buf_appendn(buf.out, linefeed, linefeed_len);
+					buf.out = mb_convert_buf_add(buf.out, ' ');
+					indent = 0;
+					line_start = mb_convert_buf_len(&buf);
+				} else if (mb_convert_buf_len(&buf) > 0) {
+					MB_CONVERT_BUF_ENSURE(&buf, buf.out, buf.limit, (e - word_start) + 1);
+					buf.out = mb_convert_buf_add(buf.out, ' ');
+				}
+				/* Output one (space-delimited) word as plain ASCII */
+				while (word_start < p-1) {
+					buf.out = mb_convert_buf_add(buf.out, *word_start++ & 0xFF);
+				}
+				word_start++;
+				while (p < e && *p == ' ') {
+					p++;
+				}
+			}
+		}
+
+		if (in_len) {
+			/* Copy chars which are part of an incomplete 'word' to the beginning
+			 * of wchar_buf and reprocess them on the next iteration */
+			offset = e - word_start;
+			if (offset) {
+				memmove(wchar_buf, word_start, offset * sizeof(uint32_t));
+			}
+		} else {
+			/* We have reached the end of the input string while still in 'ASCII mode';
+			 * process any trailing ASCII chars which were not followed by a space */
+			if (word_start < e && mb_convert_buf_len(&buf) > 0) {
+				/* The whole input string was not just one big ASCII 'word' with no spaces
+				 * consider adding a line feed if necessary to prevent output lines from
+				 * being too long */
+				if (mb_convert_buf_len(&buf) - line_start + (p - word_start) + indent > 74) {
+					MB_CONVERT_BUF_ENSURE(&buf, buf.out, buf.limit, (e - word_start) + linefeed_len + 1);
+					buf.out = mb_convert_buf_appendn(buf.out, linefeed, linefeed_len);
+					buf.out = mb_convert_buf_add(buf.out, ' ');
+				} else {
+					MB_CONVERT_BUF_ENSURE(&buf, buf.out, buf.limit, (e - word_start) + 1);
+					buf.out = mb_convert_buf_add(buf.out, ' ');
+				}
+			}
+			while (word_start < e) {
+				buf.out = mb_convert_buf_add(buf.out, *word_start++ & 0xFF);
+			}
+		}
+	}
+
+	/* Ensure output string is marked as valid UTF-8 (ASCII strings are always 'valid UTF-8') */
+	return mb_convert_buf_result(&buf, &mbfl_encoding_utf8);
+
+mime_encoding_needed: ;
+
+	/* We will generate the output line by line, first converting wchars to bytes
+	 * in the requested output encoding, then transfer-encoding those bytes as
+	 * Base64 or QPrint
+	 * 'tmpbuf' will receive the bytes which need to be transfer-encoded before
+	 * sending them to 'buf' */
+	mb_convert_buf tmpbuf;
+	mb_convert_buf_init(&tmpbuf, in_len, '?', MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR);
+
+	/* Do we need to refill wchar_buf to make sure we don't run out of wchars
+	 * in the middle of a line? */
+	if (p == wchar_buf) {
+		goto start_new_line;
+	}
+	offset = e - p;
+	memmove(wchar_buf, p, offset * sizeof(uint32_t));
+
+	while(true) {
+refill_wchar_buf: ;
+		ZEND_ASSERT(offset < 80);
+		size_t out_len = incode->to_wchar(&in, &in_len, wchar_buf + offset, 80 - offset, &state);
+		ZEND_ASSERT(out_len <= 80 - offset);
+		p = wchar_buf;
+		e = wchar_buf + offset + out_len;
+
+start_new_line: ;
+		MB_CONVERT_BUF_ENSURE(&buf, buf.out, buf.limit, strlen(outcode->mime_name) + 5);
+		buf.out = mb_convert_buf_add2(buf.out, '=', '?');
+		buf.out = mb_convert_buf_appends(buf.out, outcode->mime_name);
+		buf.out = mb_convert_buf_add3(buf.out, '?', base64 ? 'B' : 'Q', '?');
+
+		/* How many wchars should we try converting to Base64/QPrint-encoded bytes?
+		 * We do something like a 'binary search' to find the greatest number which
+		 * can be included on this line without exceeding max line length */
+		unsigned int n = 12;
+		size_t space_available = 73 - indent - (mb_convert_buf_len(&buf) - line_start);
+
+		while (true) {
+			ZEND_ASSERT(p < e);
+
+			/* Remember where we were in process of generating output, so we can back
+			 * up if necessary */
+			size_t tmppos = mb_convert_buf_len(&tmpbuf);
+			unsigned int tmpstate = tmpbuf.state;
+
+			/* Try encoding 'n' wchars in output text encoding and sending output
+			 * bytes to 'tmpbuf'. Hopefully this is not too many to fit on the
+			 * current line. */
+			n = MIN(n, e - p);
+			outcode->from_wchar(p, n, &tmpbuf, false);
+
+			/* For some output text encodings, there may be a few ending bytes
+			 * which need to be emitted to output before we break a line.
+			 * Again, remember where we were so we can back up */
+			size_t tmppos2 = mb_convert_buf_len(&tmpbuf);
+			unsigned int tmpstate2 = tmpbuf.state;
+			outcode->from_wchar(NULL, 0, &tmpbuf, true);
+
+			if (transfer_encoded_size(&tmpbuf, base64) <= space_available || (n == 1 && tmppos == 0)) {
+				/* If we convert 'n' more wchars on the current line, it will not
+				 * overflow the maximum line length */
+				p += n;
+
+				if (p == e) {
+					/* We are done; we shouldn't reach here if there is more remaining
+					 * of the input string which needs to be processed */
+					ZEND_ASSERT(!in_len);
+					transfer_encode_mime_bytes(&tmpbuf, &buf, base64);
+					MB_CONVERT_BUF_ENSURE(&buf, buf.out, buf.limit, 2);
+					buf.out = mb_convert_buf_add2(buf.out, '?', '=');
+					mb_convert_buf_free(&tmpbuf);
+					return mb_convert_buf_result(&buf, &mbfl_encoding_utf8);
+				} else {
+					/* It's possible that more chars might fit on the current line,
+					 * so back up to where we were before emitting any ending bytes */
+					mb_convert_buf_reset(&tmpbuf, tmppos2);
+					tmpbuf.state = tmpstate2;
+				}
+			} else {
+				/* Converting 'n' more wchars on this line would be too much.
+				 * Back up to where we were before we tried that. */
+				mb_convert_buf_reset(&tmpbuf, tmppos);
+				tmpbuf.state = tmpstate;
+
+				if (n == 1) {
+					/* We have found the exact number of chars which will fit on the
+					 * current line. Finish up and move to a new line. */
+					outcode->from_wchar(NULL, 0, &tmpbuf, true);
+					transfer_encode_mime_bytes(&tmpbuf, &buf, base64);
+					tmpbuf.state = 0;
+
+					MB_CONVERT_BUF_ENSURE(&buf, buf.out, buf.limit, 3 + linefeed_len);
+					buf.out = mb_convert_buf_add2(buf.out, '?', '=');
+
+					indent = 0; /* Indent argument must only affect the first line */
+
+					if (in_len) {
+						/* We still have more of input string remaining to decode */
+						buf.out = mb_convert_buf_appendn(buf.out, linefeed, linefeed_len);
+						buf.out = mb_convert_buf_add(buf.out, ' ');
+						line_start = mb_convert_buf_len(&buf);
+						/* Copy remaining wchars to beginning of buffer so they will be
+						 * processed on the next iteration of outer 'do' loop */
+						offset = e - p;
+						memmove(wchar_buf, p, offset * sizeof(uint32_t));
+						goto refill_wchar_buf;
+					} else if (p < e) {
+						/* Input string is finished, but we still have trailing wchars
+						 * remaining to be processed in wchar_buf */
+						buf.out = mb_convert_buf_appendn(buf.out, linefeed, linefeed_len);
+						buf.out = mb_convert_buf_add(buf.out, ' ');
+						line_start = mb_convert_buf_len(&buf);
+						goto start_new_line;
+					} else {
+						/* We are done! */
+						mb_convert_buf_free(&tmpbuf);
+						return mb_convert_buf_result(&buf, &mbfl_encoding_utf8);
+					}
+				} else {
+					/* Try a smaller number of wchars */
+					n = MAX(n >> 1, 1);
+				}
+			}
+		}
+	}
+}
+
+PHP_FUNCTION(mb_encode_mimeheader)
+{
+	const mbfl_encoding *charset = &mbfl_encoding_pass;
+	zend_string *str, *charset_name = NULL, *transenc_name = NULL;
+	char *linefeed = "\r\n";
+	size_t linefeed_len = 2;
+	zend_long indent = 0;
+	bool base64 = true;
+
+	ZEND_PARSE_PARAMETERS_START(1, 5)
+		Z_PARAM_STR(str)
+		Z_PARAM_OPTIONAL
+		Z_PARAM_STR(charset_name)
+		Z_PARAM_STR(transenc_name)
+		Z_PARAM_STRING(linefeed, linefeed_len)
+		Z_PARAM_LONG(indent)
+	ZEND_PARSE_PARAMETERS_END();
+
+	if (charset_name != NULL) {
+		charset = php_mb_get_encoding(charset_name, 2);
+		if (!charset) {
+			RETURN_THROWS();
+		} else if (charset->mime_name == NULL || charset->mime_name[0] == '\0') {
+			zend_argument_value_error(2, "\"%s\" cannot be used for MIME header encoding", ZSTR_VAL(charset_name));
+			RETURN_THROWS();
+		}
+	} else {
+		const mbfl_language *lang = mbfl_no2language(MBSTRG(language));
+		if (lang != NULL) {
+			charset = mbfl_no2encoding(lang->mail_charset);
+			const mbfl_encoding *transenc = mbfl_no2encoding(lang->mail_header_encoding);
+			char t = transenc->name[0];
+			if (t == 'Q' || t == 'q') {
+				base64 = false;
+			}
+		}
+	}
+
+	if (transenc_name != NULL && ZSTR_LEN(transenc_name) > 0) {
+		char t = ZSTR_VAL(transenc_name)[0];
+		if (t == 'Q' || t == 'q') {
+			base64 = false;
+		}
+	}
+
+	RETURN_STR(mb_mime_header_encode(str, MBSTRG(current_internal_encoding), charset, base64, linefeed, linefeed_len, indent));
+}
+
 static int8_t decode_base64(unsigned char c)
 {
 	if (c >= 'A' && c <= 'Z') {
--- a/ext/mbstring/tests/mb_encode_mimeheader_basic4.phpt
+++ b/ext/mbstring/tests/mb_encode_mimeheader_basic4.phpt
@ -0,0 +1,160 @@
+--TEST--
+Test mb_encode_mimeheader() function : test cases found by fuzzer
+--EXTENSIONS--
+mbstring
+--FILE--
+<?php
+
+mb_internal_encoding('UTF-8');
+
+var_dump(mb_encode_mimeheader("", "UTF-8", "Q"));
+
+// Regression test for QPrint-encoding of strings with embedded NUL (zero) bytes
+var_dump(mb_encode_mimeheader("abc\x00abc", "UTF-8", "Q"));
+
+// Regression test for input strings which end prematurely
+var_dump(mb_encode_mimeheader("\xE2", "UTF-8", "B"));
+
+// Handling of ? signs
+var_dump(mb_encode_mimeheader("?", "ASCII", "B"));
+var_dump(mb_encode_mimeheader("?", "ASCII", "Q"));
+
+// Handling of = signs
+var_dump(mb_encode_mimeheader("=", "ASCII", "B"));
+var_dump(mb_encode_mimeheader("=", "ASCII", "Q"));
+
+// Handling of underscores
+var_dump(mb_encode_mimeheader("_", "ASCII", "B"));
+var_dump(mb_encode_mimeheader("_", "ASCII", "Q"));
+
+// Handling of 0x7F (DEL)
+var_dump(mb_encode_mimeheader("\x7f", "ASCII", "B", ""));
+
+// Handling of leading spaces
+var_dump(mb_encode_mimeheader(" ", "ASCII", "B"));
+var_dump(mb_encode_mimeheader(" ", "ASCII", "Q"));
+var_dump(mb_encode_mimeheader("   ", "ASCII", "B"));
+var_dump(mb_encode_mimeheader("   ", "ASCII", "Q"));
+
+// Try multiple spaces after a word
+var_dump(mb_encode_mimeheader("ab  ab  ", "ASCII", "B"));
+var_dump(mb_encode_mimeheader("ab  ab  ", "ASCII", "Q"));
+
+// Trailing spaces
+var_dump(mb_encode_mimeheader("` ", "HZ", "B", ""));
+var_dump(mb_encode_mimeheader("S ", "ASCII", "Q", "", 73));
+
+// Regression test: extra spaces should not be added at beginning of ASCII string
+// when entire input is one ASCII 'word' and high indent value makes us consider
+// adding a line feed
+var_dump(mb_encode_mimeheader("S4", "ASCII", "B", "\n", 73));
+var_dump(mb_encode_mimeheader("S4", "ASCII", "Q", "\n", 73));
+
+// Regression test: converting UTF-8 to UCS-4 and then QPrint-encoding makes string
+// take a vastly larger number of bytes; make sure we don't overrun max line length
+var_dump(mb_encode_mimeheader("24\x0a", "UCS-4", "Q", "", 29));
+
+// Regression test: include space after ASCII word when switching to Base64 encoding
+var_dump(mb_encode_mimeheader("o\x20\x00", "ASCII", "B"));
+
+// Regression test for buffer overrun while performing Base64-encoding
+var_dump(mb_encode_mimeheader("\x00\x11\x01\x00\x00\x00\x00\x00\x00\x00", "UCS-4", "B"));
+
+// Regression test for incorrect calculation of when to stop generating output
+var_dump(mb_encode_mimeheader("\x01\x00\xcb\xcb\xcb\xcb\xcb\xcb=\xcb\xcb\xcb=?\x01\x00a\x00\x00\xcb\xcb\xcb=?\xcb\xcb\xcb\xcb\xcb\xcb\xcb\xcb\xcb?4?4\xcb\xcb\xcb\xcb\xcb=?\x01\x00\x00\x00\x01\x00\x00\x06\xcb\xcb\xcb\xcb\xcb\xcb\xcb\xcb\xcb=?\xcb\xcb\xcb\xcb\xcb\xcb\xcb\xcb\xcb?4\xcb\xcb\xcb\xcb\xcb?4", "UCS-2", "B", ""));
+
+// 'Line feed' string is truncated to no more than 8 bytes long
+$linefeed = "=aaaaaa=?";
+var_dump(mb_encode_mimeheader("?", "ASCII", "Q", "=aaaaaa=?", 52));
+var_dump($linefeed); // Make sure 'line feed' string was not modified
+
+// Regression test: must take ASCII characters already output at beginning of line
+// into account when calculating how many QPrint-encoded characters we can output
+// without overrunning max line length
+var_dump(mb_encode_mimeheader(",\x20o\x00\x01\x00\x00(", "JIS", "Q", "", 40));
+
+// Make sure we maintain legacy behavior when linefeed string contains NUL (zero) bytes
+// (We treat the linefeed string as being truncated at that point)
+// The reason is because in the original implementation, the linefeed string was a
+// null-terminated C string, so including NUL bytes would have the side effect of
+// causing only part of the linefeed string to be used
+var_dump(mb_encode_mimeheader("\xff", "ASCII", "Q", "\x00", 54));
+
+// Regression test: After we see a non-ASCII character and switch into Base64/QPrint encoding mode,
+// we may need to emit a linefeed before we start the next MIME encoded word
+// If so, properly record where the line start position is so we can correctly calculate
+// how much output can fit on the line
+var_dump(mb_encode_mimeheader("\xff~H~\xe0\xea\x00\x00\xff\xff\xff\xff\xff>\x00\x00\x00\x00", "HZ", "Q", "", 71));
+
+// ASCII strings with no spaces should pass through unchanged
+var_dump(mb_encode_mimeheader("yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyBIG5", "BIG-5", "B"));
+
+// Regression test: After decoding part of a line as ASCII, before we switch into Base64/QPrint encoding mode,
+// refill our buffer of wchars so we don't hit the end of the buffer in the middle of a line
+var_dump(mb_encode_mimeheader("\x20\x20\x20\x202\x20\x20\x20sssssssssssssssssssssssssss\x20\x20\x20\x20W\x20\x20\x20\x20\x20\x20W\x20\x20\x20\x20\xb9S\x01\x00\xf0`\x00\x00\x20\x20\x20\x20mSCII\xee\x20\x20\x20\x20mSCII\xeeI\xee", "ArmSCII-8", "B", ""));
+
+// Regression test: Input string with a huge number of spaces
+var_dump(mb_encode_mimeheader("\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x00", "CP936", "Q", ""));
+
+// Regression test: Long string, all ASCII, but with spaces at the beginning
+var_dump(mb_encode_mimeheader("\x20\x201111111111111111111111111111111111111111111111111111111111111111111111111", "ASCII", "Q", ""));
+
+// Only a single character in input, but when we convert it to outcode and then
+// transfer-encode it, it takes too many bytes to fit on a single line
+// Legacy implementation would always include at least one wchar in each encoded word;
+// imitate the same behavior
+var_dump(mb_encode_mimeheader("\xe7\xad\xb5", "HZ", "Q", "", 44));
+
+// Regression test: Exploring corner cases of when legacy implementation would output plain ASCII
+// with no transfer encoding, and when it would transfer-encode
+var_dump(mb_encode_mimeheader("2\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20!3", "GB18030", "Q", ""));
+var_dump(mb_encode_mimeheader("\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20!3\x20", "GB18030", "Q", ""));
+
+// Change in behavior: The old implementation would output the following string as plain ASCII,
+// but the new one transfer-encodes it
+// In the general case, matching the old implementation's decision to transfer-encode or not
+// perfectly would require allocating potentially unbounded scratch memory (up to the size of
+// the input string), but we aim to only use a constant amount of temporarily allocated memory
+var_dump(mb_encode_mimeheader("2\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20!3", "GB18030", "Q", ""));
+
+echo "Done";
+?>
+--EXPECT--
+string(0) ""
+string(21) "=?UTF-8?Q?abc=00abc?="
+string(16) "=?UTF-8?B?Pw==?="
+string(19) "=?US-ASCII?B?Pw==?="
+string(18) "=?US-ASCII?Q?=3F?="
+string(19) "=?US-ASCII?B?PQ==?="
+string(18) "=?US-ASCII?Q?=3D?="
+string(19) "=?US-ASCII?B?Xw==?="
+string(18) "=?US-ASCII?Q?=5F?="
+string(19) "=?US-ASCII?B?fw==?="
+string(1) " "
+string(1) " "
+string(3) "   "
+string(3) "   "
+string(8) "ab  ab  "
+string(8) "ab  ab  "
+string(1) "`"
+string(1) "S"
+string(2) "S4"
+string(2) "S4"
+string(61) "=?UCS-4?Q?=00=00=00=32=00=00=00=34?= =?UCS-4?Q?=00=00=00=0A?="
+string(21) "o =?US-ASCII?B?AA==?="
+string(68) "=?UCS-4?B?AAAAAAAAABEAAAABAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA==?="
+string(271) "=?UCS-2?B?AAEAAAA/AD8APwA/AD8APwA9AD8APwA/AD0APwABAAAAYQAAAAAAPwA/AD8=?= =?UCS-2?B?AD0APwA/AD8APwA/AD8APwA/AD8APwA/ADQAPwA0AD8APwA/AD8APwA9AD8=?= =?UCS-2?B?AAEAAAAAAAAAAQAAAAAABgA/AD8APwA/AD8APwA/AD8APwA9AD8APwA/AD8=?= =?UCS-2?B?AD8APwA/AD8APwA/AD8ANAA/AD8APwA/AD8APwA0?="
+string(27) "=aaaaaa= =?US-ASCII?Q?=3F?="
+string(9) "=aaaaaa=?"
+string(55) ", =?ISO-2022-JP?Q?o=00=01=00=00?= =?ISO-2022-JP?Q?=28?="
+string(19) " =?US-ASCII?Q?=3F?="
+string(76) " =?HZ-GB-2312?Q?=3F=7E=7EH=7E=7E=3F=3F=00=00=3F=3F=3F=3F=3F=3E=00=00=00=00?="
+string(75) "yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyBIG5"
+string(108) "    2   sssssssssssssssssssssssssss    W      W =?ArmSCII-8?B?ICAgP1MBAD9gAAAgICAgbVNDSUk/ICAgIG1TQ0lJP0k/?="
+string(294) "=?CP936?Q?=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20?= =?CP936?Q?=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20?= =?CP936?Q?=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20?= =?CP936?Q?=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=00?="
+string(75) "  1111111111111111111111111111111111111111111111111111111111111111111111111"
+string(33) "=?HZ-GB-2312?Q?=7E=7Bs=5B=7E=7D?="
+string(77) "2                                                                          !3"
+string(282) "=?GB18030?Q?=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20?= =?GB18030?Q?=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20?= =?GB18030?Q?=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20?= =?GB18030?Q?=20=20=20=20=20=20=20=20=20=20=20=20!=33=20?="
+string(296) "2 =?GB18030?Q?=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20?= =?GB18030?Q?=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20?= =?GB18030?Q?=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20?= =?GB18030?Q?=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20!=33?="
+Done