Improve url scanner speed (up to 40% for large chunks of data)

and handle some corner cases better.  The scanner has been changed
to the format as proposed in "RE2C - A More Versatile Scanner Generator"
by Cowan et al.
This commit is contained in:
Sascha Schumann 2001-07-04 15:30:21 +00:00
parent 98b1d4bbf4
commit ef13ab5750
7 changed files with 542 additions and 341 deletions

View file

@ -150,9 +150,11 @@ typedef struct ps_serializer_struct {
#ifdef TRANS_SID
void session_adapt_uris(const char *, size_t, char **, size_t *);
void session_adapt_url(const char *, size_t, char **, size_t *);
void session_adapt_flush(int (*)(const char *, uint));
#else
#define session_adapt_uris(a,b,c,d)
#define session_adapt_url(a,b,c,d)
#define session_adapt_uris(a,b,c,d) do { } while(0)
#define session_adapt_url(a,b,c,d) do { } while(0)
#define session_adapt_flush(a) do { } while(0)
#endif
void php_set_session_var(char *name, size_t namelen, zval *state_val,HashTable *var_hash PSLS_DC);

View file

@ -1326,6 +1326,16 @@ void session_adapt_url(const char *url, size_t urllen, char **new, size_t *newle
if (PS(define_sid) && PS(nr_open_sessions) > 0)
*new = url_adapt_single_url(url, urllen, PS(session_name), PS(id), newlen);
}
void session_adapt_flush(int (*write)(const char *, uint))
{
char *str;
size_t len;
str = url_adapt_flush(&len);
if (str) write(str, len);
}
#endif
/* {{{ proto void session_unset(void)

View file

@ -254,10 +254,15 @@ PHPAPI void php_end_ob_buffer(zend_bool send_buffer, zend_bool just_flush)
PHPAPI void php_end_ob_buffers(zend_bool send_buffer)
{
OLS_FETCH();
BLS_FETCH();
while (OG(nesting_level)!=0) {
php_end_ob_buffer(send_buffer, 0);
}
if (send_buffer && BG(use_trans_sid)) {
session_adapt_flush(OG(php_header_write));
}
}
/* }}} */

View file

@ -1,5 +1,5 @@
/* Generated by re2c 0.5 on Wed Apr 4 19:29:46 2001 */
#line 1 "/usr/src/web/php/php4/ext/standard/url_scanner_ex.re"
/* Generated by re2c 0.5 on Wed Jul 4 17:05:51 2001 */
#line 1 "/home/sas/src/php4/ext/standard/url_scanner_ex.re"
/*
+----------------------------------------------------------------------+
| PHP version 4.0 |
@ -41,8 +41,6 @@
#include "php_smart_str.h"
/* {{{ PHP_INI_MH
*/
static PHP_INI_MH(OnUpdateTags)
{
url_adapt_state_ex_t *ctx;
@ -86,38 +84,100 @@ static PHP_INI_MH(OnUpdateTags)
return SUCCESS;
}
/* }}} */
PHP_INI_BEGIN()
STD_PHP_INI_ENTRY("url_rewriter.tags", "a=href,area=href,frame=src,form=fakeentry", PHP_INI_ALL, OnUpdateTags, url_adapt_state_ex, php_basic_globals, basic_globals)
PHP_INI_END()
/* {{{ append_modified_url
*/
#line 94
#define YYFILL(n) goto done
#define YYCTYPE unsigned char
#define YYCURSOR p
#define YYLIMIT q
#define YYMARKER r
static inline void append_modified_url(smart_str *url, smart_str *dest, smart_str *name, smart_str *val, const char *separator)
{
register const char *p, *q;
register const char *p, *q, *r;
const char *bash = NULL;
const char *sep = "?";
q = url->c + url->len;
for (p = url->c; p < q; p++) {
switch(*p) {
case ':':
smart_str_append(dest, url);
return;
case '?':
sep = separator;
break;
case '#':
bash = p;
break;
}
}
q = (p = url->c) + url->len;
scan:
{
YYCTYPE yych;
unsigned int yyaccept;
static unsigned char yybm[] = {
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 0, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 0, 128, 128, 128, 128, 0,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
};
goto yy0;
yy1: ++YYCURSOR;
yy0:
if(YYLIMIT == YYCURSOR) YYFILL(1);
yych = *YYCURSOR;
if(yybm[0+yych] & 128) goto yy8;
if(yych <= '9') goto yy6;
if(yych >= ';') goto yy4;
yy2: yych = *++YYCURSOR;
yy3:
#line 112
{ smart_str_append(dest, url); return; }
yy4: yych = *++YYCURSOR;
yy5:
#line 113
{ sep = separator; goto done; }
yy6: yych = *++YYCURSOR;
yy7:
#line 114
{ bash = p; goto done; }
yy8: ++YYCURSOR;
if(YYLIMIT == YYCURSOR) YYFILL(1);
yych = *YYCURSOR;
yy9: if(yybm[0+yych] & 128) goto yy8;
yy10:
#line 115
{ goto scan; }
}
#line 116
done:
/* Don't modify URLs of the format "#mark" */
if (bash - url->c == 0) {
if (bash && bash - url->c == 0) {
smart_str_append(dest, url);
return;
}
@ -135,7 +195,12 @@ static inline void append_modified_url(smart_str *url, smart_str *dest, smart_st
if (bash)
smart_str_appendl(dest, bash, q - bash);
}
/* }}} */
#undef YYFILL
#undef YYCTYPE
#undef YYCURSOR
#undef YYLIMIT
#undef YYMARKER
static inline void tag_arg(url_adapt_state_ex_t *ctx, char quote PLS_DC)
{
@ -154,7 +219,7 @@ static inline void tag_arg(url_adapt_state_ex_t *ctx, char quote PLS_DC)
}
enum {
STATE_PLAIN,
STATE_PLAIN = 0,
STATE_TAG,
STATE_NEXT_ARG,
STATE_ARG,
@ -227,8 +292,6 @@ static inline void handle_val(STD_PARA, char quotes, char type)
#define scdebug(x)
#endif
/* {{{ mainloop
*/
static inline void mainloop(url_adapt_state_ex_t *ctx, const char *newdata, size_t newlen)
{
char *end, *q;
@ -242,38 +305,81 @@ static inline void mainloop(url_adapt_state_ex_t *ctx, const char *newdata, size
YYCURSOR = ctx->buf.c;
YYLIMIT = ctx->buf.c + ctx->buf.len;
#line 238
switch (STATE) {
case STATE_PLAIN: goto state_plain;
case STATE_TAG: goto state_tag;
case STATE_NEXT_ARG: goto state_next_arg;
case STATE_ARG: goto state_arg;
case STATE_BEFORE_VAL: goto state_before_val;
case STATE_VAL: goto state_val;
}
while(1) {
start = YYCURSOR;
scdebug(("state %d at %s\n", STATE, YYCURSOR));
switch(STATE) {
case STATE_PLAIN:
state_plain_begin:
STATE = STATE_PLAIN;
state_plain:
start = YYCURSOR;
{
YYCTYPE yych;
unsigned int yyaccept;
goto yy0;
yy1: ++YYCURSOR;
yy0:
static unsigned char yybm[] = {
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 0, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
};
goto yy11;
yy12: ++YYCURSOR;
yy11:
if(YYLIMIT == YYCURSOR) YYFILL(1);
yych = *YYCURSOR;
if(yych != '<') goto yy4;
yy2: yych = *++YYCURSOR;
yy3:
#line 247
{ passthru(STD_ARGS); STATE = STATE_TAG; continue; }
yy4: yych = *++YYCURSOR;
yy5:
#line 248
{ passthru(STD_ARGS); continue; }
if(yybm[0+yych] & 128) goto yy15;
yy13: yych = *++YYCURSOR;
yy14:
#line 264
{ passthru(STD_ARGS); STATE = STATE_TAG; goto state_tag; }
yy15: ++YYCURSOR;
if(YYLIMIT == YYCURSOR) YYFILL(1);
yych = *YYCURSOR;
yy16: if(yybm[0+yych] & 128) goto yy15;
yy17:
#line 265
{ passthru(STD_ARGS); goto state_plain; }
}
#line 249
#line 266
break;
case STATE_TAG:
state_tag:
start = YYCURSOR;
{
YYCTYPE yych;
unsigned int yyaccept;
@ -311,84 +417,127 @@ yy5:
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
};
goto yy6;
yy7: ++YYCURSOR;
yy6:
goto yy18;
yy19: ++YYCURSOR;
yy18:
if((YYLIMIT - YYCURSOR) < 2) YYFILL(2);
yych = *YYCURSOR;
if(yych <= '@') goto yy10;
if(yych <= 'Z') goto yy8;
if(yych <= '`') goto yy10;
if(yych >= '{') goto yy10;
yy8: yych = *++YYCURSOR;
goto yy13;
yy9:
#line 254
{ handle_tag(STD_ARGS); /* Sets STATE */; passthru(STD_ARGS); continue; }
yy10: yych = *++YYCURSOR;
yy11:
#line 255
{ passthru(STD_ARGS); STATE = STATE_PLAIN; continue; }
yy12: ++YYCURSOR;
if(yych <= '@') goto yy22;
if(yych <= 'Z') goto yy20;
if(yych <= '`') goto yy22;
if(yych >= '{') goto yy22;
yy20: yych = *++YYCURSOR;
goto yy25;
yy21:
#line 271
{ handle_tag(STD_ARGS); /* Sets STATE */; passthru(STD_ARGS); if (STATE == STATE_PLAIN) goto state_plain; else goto state_next_arg; }
yy22: yych = *++YYCURSOR;
yy23:
#line 272
{ passthru(STD_ARGS); goto state_plain_begin; }
yy24: ++YYCURSOR;
if(YYLIMIT == YYCURSOR) YYFILL(1);
yych = *YYCURSOR;
yy13: if(yybm[0+yych] & 128) goto yy12;
goto yy9;
yy25: if(yybm[0+yych] & 128) goto yy24;
goto yy21;
}
#line 256
#line 273
break;
case STATE_NEXT_ARG:
state_next_arg_begin:
STATE = STATE_NEXT_ARG;
state_next_arg:
start = YYCURSOR;
{
YYCTYPE yych;
unsigned int yyaccept;
goto yy14;
yy15: ++YYCURSOR;
yy14:
if(YYLIMIT == YYCURSOR) YYFILL(1);
static unsigned char yybm[] = {
0, 0, 0, 0, 0, 0, 0, 0,
0, 128, 128, 128, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
128, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
};
goto yy26;
yy27: ++YYCURSOR;
yy26:
if((YYLIMIT - YYCURSOR) < 2) YYFILL(2);
yych = *YYCURSOR;
if(yych <= '='){
if(yych <= '\n'){
if(yych <= '\t') goto yy22;
goto yy18;
if(yych <= '\v'){
if(yych <= '\b') goto yy34;
goto yy30;
} else {
if(yych == ' ') goto yy18;
goto yy22;
if(yych == ' ') goto yy30;
goto yy34;
}
} else {
if(yych <= 'Z'){
if(yych <= '>') goto yy16;
if(yych <= '@') goto yy22;
goto yy20;
if(yych <= '>') goto yy28;
if(yych <= '@') goto yy34;
goto yy32;
} else {
if(yych <= '`') goto yy22;
if(yych <= 'z') goto yy20;
goto yy22;
if(yych <= '`') goto yy34;
if(yych <= 'z') goto yy32;
goto yy34;
}
}
yy16: yych = *++YYCURSOR;
yy17:
#line 261
{ passthru(STD_ARGS); handle_form(STD_ARGS); STATE = STATE_PLAIN; continue; }
yy18: yych = *++YYCURSOR;
yy19:
#line 262
{ passthru(STD_ARGS); continue; }
yy20: yych = *++YYCURSOR;
yy21:
#line 263
{ YYCURSOR--; STATE = STATE_ARG; continue; }
yy22: yych = *++YYCURSOR;
yy23:
#line 264
{ passthru(STD_ARGS); STATE = STATE_PLAIN; continue; }
yy28: yych = *++YYCURSOR;
yy29:
#line 281
{ passthru(STD_ARGS); handle_form(STD_ARGS); goto state_plain_begin; }
yy30: yych = *++YYCURSOR;
goto yy37;
yy31:
#line 282
{ passthru(STD_ARGS); goto state_next_arg; }
yy32: yych = *++YYCURSOR;
yy33:
#line 283
{ --YYCURSOR; STATE = STATE_ARG; goto state_arg; }
yy34: yych = *++YYCURSOR;
yy35:
#line 284
{ passthru(STD_ARGS); goto state_plain_begin; }
yy36: ++YYCURSOR;
if(YYLIMIT == YYCURSOR) YYFILL(1);
yych = *YYCURSOR;
yy37: if(yybm[0+yych] & 128) goto yy36;
goto yy31;
}
#line 265
#line 285
break;
case STATE_ARG:
state_arg:
start = YYCURSOR;
{
YYCTYPE yych;
unsigned int yyaccept;
@ -426,34 +575,35 @@ yy23:
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
};
goto yy24;
yy25: ++YYCURSOR;
yy24:
goto yy38;
yy39: ++YYCURSOR;
yy38:
if((YYLIMIT - YYCURSOR) < 2) YYFILL(2);
yych = *YYCURSOR;
if(yych <= '@') goto yy28;
if(yych <= 'Z') goto yy26;
if(yych <= '`') goto yy28;
if(yych >= '{') goto yy28;
yy26: yych = *++YYCURSOR;
goto yy31;
yy27:
#line 270
{ passthru(STD_ARGS); handle_arg(STD_ARGS); STATE = STATE_BEFORE_VAL; continue; }
yy28: yych = *++YYCURSOR;
yy29:
#line 271
{ passthru(STD_ARGS); STATE = STATE_NEXT_ARG; continue; }
yy30: ++YYCURSOR;
if(yych <= '@') goto yy42;
if(yych <= 'Z') goto yy40;
if(yych <= '`') goto yy42;
if(yych >= '{') goto yy42;
yy40: yych = *++YYCURSOR;
goto yy45;
yy41:
#line 290
{ passthru(STD_ARGS); handle_arg(STD_ARGS); STATE = STATE_BEFORE_VAL; goto state_before_val; }
yy42: yych = *++YYCURSOR;
yy43:
#line 291
{ passthru(STD_ARGS); STATE = STATE_NEXT_ARG; goto state_next_arg; }
yy44: ++YYCURSOR;
if(YYLIMIT == YYCURSOR) YYFILL(1);
yych = *YYCURSOR;
yy31: if(yybm[0+yych] & 128) goto yy30;
goto yy27;
yy45: if(yybm[0+yych] & 128) goto yy44;
goto yy41;
}
#line 272
#line 292
case STATE_BEFORE_VAL:
state_before_val:
start = YYCURSOR;
{
YYCTYPE yych;
unsigned int yyaccept;
@ -491,184 +641,185 @@ yy31: if(yybm[0+yych] & 128) goto yy30;
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
};
goto yy32;
yy33: ++YYCURSOR;
yy32:
goto yy46;
yy47: ++YYCURSOR;
yy46:
if((YYLIMIT - YYCURSOR) < 2) YYFILL(2);
yych = *YYCURSOR;
if(yych == ' ') goto yy34;
if(yych == '=') goto yy36;
goto yy38;
yy34: yyaccept = 0;
if(yych == ' ') goto yy48;
if(yych == '=') goto yy50;
goto yy52;
yy48: yyaccept = 0;
yych = *(YYMARKER = ++YYCURSOR);
if(yych == ' ') goto yy41;
if(yych == '=') goto yy39;
yy35:
#line 277
{ YYCURSOR--; STATE = STATE_NEXT_ARG; continue; }
yy36: yych = *++YYCURSOR;
goto yy40;
yy37:
#line 276
{ passthru(STD_ARGS); STATE = STATE_VAL; continue; }
yy38: yych = *++YYCURSOR;
goto yy35;
yy39: ++YYCURSOR;
if(yych == ' ') goto yy55;
if(yych == '=') goto yy53;
yy49:
#line 298
{ --YYCURSOR; goto state_next_arg_begin; }
yy50: yych = *++YYCURSOR;
goto yy54;
yy51:
#line 297
{ passthru(STD_ARGS); STATE = STATE_VAL; goto state_val; }
yy52: yych = *++YYCURSOR;
goto yy49;
yy53: ++YYCURSOR;
if(YYLIMIT == YYCURSOR) YYFILL(1);
yych = *YYCURSOR;
yy40: if(yybm[0+yych] & 128) goto yy39;
goto yy37;
yy41: ++YYCURSOR;
yy54: if(yybm[0+yych] & 128) goto yy53;
goto yy51;
yy55: ++YYCURSOR;
if(YYLIMIT == YYCURSOR) YYFILL(1);
yych = *YYCURSOR;
yy42: if(yych == ' ') goto yy41;
if(yych == '=') goto yy39;
yy43: YYCURSOR = YYMARKER;
yy56: if(yych == ' ') goto yy55;
if(yych == '=') goto yy53;
yy57: YYCURSOR = YYMARKER;
switch(yyaccept){
case 0: goto yy35;
case 0: goto yy49;
}
}
#line 278
#line 299
break;
case STATE_VAL:
state_val:
start = YYCURSOR;
{
YYCTYPE yych;
unsigned int yyaccept;
static unsigned char yybm[] = {
240, 240, 240, 240, 240, 240, 240, 240,
240, 240, 192, 240, 240, 240, 240, 240,
240, 240, 240, 240, 240, 240, 240, 240,
240, 240, 240, 240, 240, 240, 240, 240,
192, 240, 64, 240, 240, 240, 240, 144,
240, 240, 240, 240, 240, 240, 240, 240,
240, 240, 240, 240, 240, 240, 240, 240,
240, 240, 240, 240, 240, 240, 0, 240,
240, 240, 240, 240, 240, 240, 240, 240,
240, 240, 240, 240, 240, 240, 240, 240,
240, 240, 240, 240, 240, 240, 240, 240,
240, 240, 240, 240, 240, 240, 240, 240,
240, 240, 240, 240, 240, 240, 240, 240,
240, 240, 240, 240, 240, 240, 240, 240,
240, 240, 240, 240, 240, 240, 240, 240,
240, 240, 240, 240, 240, 240, 240, 240,
240, 240, 240, 240, 240, 240, 240, 240,
240, 240, 240, 240, 240, 240, 240, 240,
240, 240, 240, 240, 240, 240, 240, 240,
240, 240, 240, 240, 240, 240, 240, 240,
240, 240, 240, 240, 240, 240, 240, 240,
240, 240, 240, 240, 240, 240, 240, 240,
240, 240, 240, 240, 240, 240, 240, 240,
240, 240, 240, 240, 240, 240, 240, 240,
240, 240, 240, 240, 240, 240, 240, 240,
240, 240, 240, 240, 240, 240, 240, 240,
240, 240, 240, 240, 240, 240, 240, 240,
240, 240, 240, 240, 240, 240, 240, 240,
240, 240, 240, 240, 240, 240, 240, 240,
240, 240, 240, 240, 240, 240, 240, 240,
240, 240, 240, 240, 240, 240, 240, 240,
240, 240, 240, 240, 240, 240, 240, 240,
224, 224, 224, 224, 224, 224, 224, 224,
224, 224, 192, 224, 224, 224, 224, 224,
224, 224, 224, 224, 224, 224, 224, 224,
224, 224, 224, 224, 224, 224, 224, 224,
192, 224, 64, 224, 224, 224, 224, 128,
224, 224, 224, 224, 224, 224, 224, 224,
224, 224, 224, 224, 224, 224, 224, 224,
224, 224, 224, 224, 224, 224, 0, 224,
224, 224, 224, 224, 224, 224, 224, 224,
224, 224, 224, 224, 224, 224, 224, 224,
224, 224, 224, 224, 224, 224, 224, 224,
224, 224, 224, 224, 224, 224, 224, 224,
224, 224, 224, 224, 224, 224, 224, 224,
224, 224, 224, 224, 224, 224, 224, 224,
224, 224, 224, 224, 224, 224, 224, 224,
224, 224, 224, 224, 224, 224, 224, 224,
224, 224, 224, 224, 224, 224, 224, 224,
224, 224, 224, 224, 224, 224, 224, 224,
224, 224, 224, 224, 224, 224, 224, 224,
224, 224, 224, 224, 224, 224, 224, 224,
224, 224, 224, 224, 224, 224, 224, 224,
224, 224, 224, 224, 224, 224, 224, 224,
224, 224, 224, 224, 224, 224, 224, 224,
224, 224, 224, 224, 224, 224, 224, 224,
224, 224, 224, 224, 224, 224, 224, 224,
224, 224, 224, 224, 224, 224, 224, 224,
224, 224, 224, 224, 224, 224, 224, 224,
224, 224, 224, 224, 224, 224, 224, 224,
224, 224, 224, 224, 224, 224, 224, 224,
224, 224, 224, 224, 224, 224, 224, 224,
224, 224, 224, 224, 224, 224, 224, 224,
224, 224, 224, 224, 224, 224, 224, 224,
};
goto yy44;
yy45: ++YYCURSOR;
yy44:
if((YYLIMIT - YYCURSOR) < 3) YYFILL(3);
goto yy58;
yy59: ++YYCURSOR;
yy58:
if((YYLIMIT - YYCURSOR) < 2) YYFILL(2);
yych = *YYCURSOR;
if(yych <= '!'){
if(yych <= '\n'){
if(yych <= '\t') goto yy50;
goto yy51;
if(yych <= '\t') goto yy63;
goto yy65;
} else {
if(yych == ' ') goto yy51;
goto yy50;
if(yych == ' ') goto yy65;
goto yy63;
}
} else {
if(yych <= '\''){
if(yych <= '"') goto yy46;
if(yych <= '&') goto yy50;
goto yy48;
if(yych <= '"') goto yy60;
if(yych <= '&') goto yy63;
goto yy62;
} else {
if(yych == '>') goto yy51;
goto yy50;
if(yych == '>') goto yy65;
goto yy63;
}
}
yy46: yyaccept = 0;
yy60: yyaccept = 0;
yych = *(YYMARKER = ++YYCURSOR);
if(yych != '>') goto yy63;
yy47:
#line 286
{ passthru(STD_ARGS); STATE = STATE_NEXT_ARG; continue; }
yy48: yyaccept = 1;
if(yych != '>') goto yy74;
yy61:
#line 308
{ passthru(STD_ARGS); goto state_next_arg_begin; }
yy62: yyaccept = 0;
yych = *(YYMARKER = ++YYCURSOR);
goto yy55;
yy49:
#line 285
{ handle_val(STD_ARGS, 0, '"'); STATE = STATE_NEXT_ARG; continue; }
yy50: yych = *++YYCURSOR;
goto yy53;
yy51: yych = *++YYCURSOR;
goto yy47;
yy52: ++YYCURSOR;
if(yych == '>') goto yy61;
goto yy69;
yy63: yych = *++YYCURSOR;
goto yy67;
yy64:
#line 307
{ handle_val(STD_ARGS, 0, '"'); goto state_next_arg_begin; }
yy65: yych = *++YYCURSOR;
goto yy61;
yy66: ++YYCURSOR;
if(YYLIMIT == YYCURSOR) YYFILL(1);
yych = *YYCURSOR;
yy53: if(yybm[0+yych] & 16) goto yy52;
goto yy49;
yy54: yyaccept = 1;
YYMARKER = ++YYCURSOR;
yy67: if(yybm[0+yych] & 32) goto yy66;
goto yy64;
yy68: ++YYCURSOR;
if(YYLIMIT == YYCURSOR) YYFILL(1);
yych = *YYCURSOR;
yy55: if(yybm[0+yych] & 32) goto yy54;
if(yych <= '&') goto yy58;
if(yych >= '(') goto yy49;
yy56: yych = *++YYCURSOR;
if(yybm[0+yych] & 16) goto yy52;
yy57:
#line 284
{ handle_val(STD_ARGS, 1, '\''); STATE = STATE_NEXT_ARG; continue; }
yy58: ++YYCURSOR;
if(YYLIMIT == YYCURSOR) YYFILL(1);
yych = *YYCURSOR;
yy59: if(yybm[0+yych] & 64) goto yy58;
if(yych <= '=') goto yy61;
yy60: YYCURSOR = YYMARKER;
yy69: if(yybm[0+yych] & 64) goto yy68;
if(yych <= '=') goto yy71;
yy70: YYCURSOR = YYMARKER;
switch(yyaccept){
case 1: goto yy49;
case 0: goto yy47;
case 0: goto yy61;
}
yy61: yych = *++YYCURSOR;
goto yy57;
yy62: ++YYCURSOR;
yy71: yych = *++YYCURSOR;
yy72:
#line 306
{ handle_val(STD_ARGS, 1, '\''); goto state_next_arg_begin; }
yy73: ++YYCURSOR;
if(YYLIMIT == YYCURSOR) YYFILL(1);
yych = *YYCURSOR;
yy63: if(yybm[0+yych] & 128) goto yy62;
if(yych >= '>') goto yy60;
yy64: yych = *++YYCURSOR;
yy65:
#line 283
{ handle_val(STD_ARGS, 1, '"'); STATE = STATE_NEXT_ARG; continue; }
yy74: if(yybm[0+yych] & 128) goto yy73;
if(yych >= '>') goto yy70;
yy75: yych = *++YYCURSOR;
yy76:
#line 305
{ handle_val(STD_ARGS, 1, '"'); goto state_next_arg_begin; }
}
#line 287
#line 309
break;
}
}
stop:
scdebug(("stopped in state %d at pos %d (%d:%c)\n", STATE, YYCURSOR - ctx->buf.c, *YYCURSOR, *YYCURSOR));
rest = YYLIMIT - start;
scdebug(("stopped in state %d at pos %d (%d:%c) %d\n", STATE, YYCURSOR - ctx->buf.c, *YYCURSOR, *YYCURSOR, rest));
/* XXX: Crash avoidance. Need to work with reporter to figure out what goes wrong */
if (rest < 0) rest = 0;
if (rest) memmove(ctx->buf.c, start, rest);
ctx->buf.len = rest;
}
/* }}} */
/* {{{ url_adapt_single_url
*/
char *url_adapt_flush(size_t *newlen)
{
char *ret = NULL;
url_adapt_state_ex_t *ctx;
BLS_FETCH();
ctx = &BG(url_adapt_state_ex);
if (ctx->buf.len) {
ret = ctx->buf.c;
*newlen = ctx->buf.len;
ctx->buf.c = 0;
ctx->buf.len = 0;
}
return ret;
}
char *url_adapt_single_url(const char *url, size_t urllen, const char *name, const char *value, size_t *newlen)
{
smart_str surl = {0};
@ -688,10 +839,7 @@ char *url_adapt_single_url(const char *url, size_t urllen, const char *name, con
return buf.c;
}
/* }}} */
/* {{{ url_adapt_ext
*/
char *url_adapt_ext(const char *src, size_t srclen, const char *name, const char *value, size_t *newlen)
{
char *ret;
@ -705,11 +853,12 @@ char *url_adapt_ext(const char *src, size_t srclen, const char *name, const char
mainloop(ctx, src, srclen);
*newlen = ctx->result.len;
if (!ctx->result.c)
smart_str_appendl(&ctx->result, "", 0);
smart_str_0(&ctx->result);
ctx->result.len = 0;
return ctx->result.c;
}
/* }}} */
PHP_RINIT_FUNCTION(url_scanner)
{
@ -762,12 +911,3 @@ PHP_MSHUTDOWN_FUNCTION(url_scanner)
}
#endif
/*
* Local variables:
* tab-width: 4
* c-basic-offset: 4
* End:
* vim600: sw=4 ts=4 tw=78 fdm=marker
* vim<600: sw=4 ts=4 tw=78
*/

View file

@ -28,6 +28,8 @@ char *url_adapt_ext_ex(const char *src, size_t srclen, const char *name, const c
char *url_adapt_single_url(const char *url, size_t urllen, const char *name, const char *value, size_t *newlen);
char *url_adapt_flush(size_t *);
#include "php_smart_str_public.h"
typedef struct {

View file

@ -87,30 +87,37 @@ PHP_INI_BEGIN()
STD_PHP_INI_ENTRY("url_rewriter.tags", "a=href,area=href,frame=src,form=fakeentry", PHP_INI_ALL, OnUpdateTags, url_adapt_state_ex, php_basic_globals, basic_globals)
PHP_INI_END()
/*!re2c
any = [\000-\377];
N = (any\[<]);
alpha = [a-zA-Z];
*/
#define YYFILL(n) goto done
#define YYCTYPE unsigned char
#define YYCURSOR p
#define YYLIMIT q
#define YYMARKER r
static inline void append_modified_url(smart_str *url, smart_str *dest, smart_str *name, smart_str *val, const char *separator)
{
register const char *p, *q;
register const char *p, *q, *r;
const char *bash = NULL;
const char *sep = "?";
q = url->c + url->len;
for (p = url->c; p < q; p++) {
switch(*p) {
case ':':
smart_str_append(dest, url);
return;
case '?':
sep = separator;
break;
case '#':
bash = p;
break;
}
}
q = (p = url->c) + url->len;
scan:
/*!re2c
":" { smart_str_append(dest, url); return; }
"?" { sep = separator; goto done; }
"#" { bash = p; goto done; }
(any\[:?#])+ { goto scan; }
*/
done:
/* Don't modify URLs of the format "#mark" */
if (bash - url->c == 0) {
if (bash && bash - url->c == 0) {
smart_str_append(dest, url);
return;
}
@ -129,6 +136,12 @@ static inline void append_modified_url(smart_str *url, smart_str *dest, smart_st
smart_str_appendl(dest, bash, q - bash);
}
#undef YYFILL
#undef YYCTYPE
#undef YYCURSOR
#undef YYLIMIT
#undef YYMARKER
static inline void tag_arg(url_adapt_state_ex_t *ctx, char quote PLS_DC)
{
char f = 0;
@ -146,7 +159,7 @@ static inline void tag_arg(url_adapt_state_ex_t *ctx, char quote PLS_DC)
}
enum {
STATE_PLAIN,
STATE_PLAIN = 0,
STATE_TAG,
STATE_NEXT_ARG,
STATE_ARG,
@ -232,68 +245,72 @@ static inline void mainloop(url_adapt_state_ex_t *ctx, const char *newdata, size
YYCURSOR = ctx->buf.c;
YYLIMIT = ctx->buf.c + ctx->buf.len;
/*!re2c
any = [\000-\377];
alpha = [a-zA-Z];
*/
switch (STATE) {
case STATE_PLAIN: goto state_plain;
case STATE_TAG: goto state_tag;
case STATE_NEXT_ARG: goto state_next_arg;
case STATE_ARG: goto state_arg;
case STATE_BEFORE_VAL: goto state_before_val;
case STATE_VAL: goto state_val;
}
while(1) {
start = YYCURSOR;
scdebug(("state %d at %s\n", STATE, YYCURSOR));
switch(STATE) {
case STATE_PLAIN:
/*!re2c
[<] { passthru(STD_ARGS); STATE = STATE_TAG; continue; }
(any\[<]) { passthru(STD_ARGS); continue; }
*/
break;
case STATE_TAG:
/*!re2c
alpha+ { handle_tag(STD_ARGS); /* Sets STATE */; passthru(STD_ARGS); continue; }
any { passthru(STD_ARGS); STATE = STATE_PLAIN; continue; }
*/
break;
case STATE_NEXT_ARG:
/*!re2c
">" { passthru(STD_ARGS); handle_form(STD_ARGS); STATE = STATE_PLAIN; continue; }
[ \n] { passthru(STD_ARGS); continue; }
alpha { YYCURSOR--; STATE = STATE_ARG; continue; }
any { passthru(STD_ARGS); STATE = STATE_PLAIN; continue; }
*/
break;
case STATE_ARG:
state_plain_begin:
STATE = STATE_PLAIN;
state_plain:
start = YYCURSOR;
/*!re2c
alpha+ { passthru(STD_ARGS); handle_arg(STD_ARGS); STATE = STATE_BEFORE_VAL; continue; }
any { passthru(STD_ARGS); STATE = STATE_NEXT_ARG; continue; }
"<" { passthru(STD_ARGS); STATE = STATE_TAG; goto state_tag; }
N+ { passthru(STD_ARGS); goto state_plain; }
*/
case STATE_BEFORE_VAL:
state_tag:
start = YYCURSOR;
/*!re2c
[ ]* "=" [ ]* { passthru(STD_ARGS); STATE = STATE_VAL; continue; }
any { YYCURSOR--; STATE = STATE_NEXT_ARG; continue; }
alpha+ { handle_tag(STD_ARGS); /* Sets STATE */; passthru(STD_ARGS); if (STATE == STATE_PLAIN) goto state_plain; else goto state_next_arg; }
any { passthru(STD_ARGS); goto state_plain_begin; }
*/
break;
case STATE_VAL:
state_next_arg_begin:
STATE = STATE_NEXT_ARG;
state_next_arg:
start = YYCURSOR;
/*!re2c
["] (any\[">])* ["] { handle_val(STD_ARGS, 1, '"'); STATE = STATE_NEXT_ARG; continue; }
['] (any\['>])* ['] { handle_val(STD_ARGS, 1, '\''); STATE = STATE_NEXT_ARG; continue; }
(any\[ \n>"])+ { handle_val(STD_ARGS, 0, '"'); STATE = STATE_NEXT_ARG; continue; }
any { passthru(STD_ARGS); STATE = STATE_NEXT_ARG; continue; }
">" { passthru(STD_ARGS); handle_form(STD_ARGS); goto state_plain_begin; }
[ \v\t\n]+ { passthru(STD_ARGS); goto state_next_arg; }
alpha { --YYCURSOR; STATE = STATE_ARG; goto state_arg; }
any { passthru(STD_ARGS); goto state_plain_begin; }
*/
state_arg:
start = YYCURSOR;
/*!re2c
alpha+ { passthru(STD_ARGS); handle_arg(STD_ARGS); STATE = STATE_BEFORE_VAL; goto state_before_val; }
any { passthru(STD_ARGS); STATE = STATE_NEXT_ARG; goto state_next_arg; }
*/
state_before_val:
start = YYCURSOR;
/*!re2c
[ ]* "=" [ ]* { passthru(STD_ARGS); STATE = STATE_VAL; goto state_val; }
any { --YYCURSOR; goto state_next_arg_begin; }
*/
state_val:
start = YYCURSOR;
/*!re2c
["] (any\[">])* ["] { handle_val(STD_ARGS, 1, '"'); goto state_next_arg_begin; }
['] (any\['>])* ['] { handle_val(STD_ARGS, 1, '\''); goto state_next_arg_begin; }
(any\[ \n>"'])+ { handle_val(STD_ARGS, 0, '"'); goto state_next_arg_begin; }
any { passthru(STD_ARGS); goto state_next_arg_begin; }
*/
break;
}
}
stop:
scdebug(("stopped in state %d at pos %d (%d:%c)\n", STATE, YYCURSOR - ctx->buf.c, *YYCURSOR, *YYCURSOR));
rest = YYLIMIT - start;
scdebug(("stopped in state %d at pos %d (%d:%c) %d\n", STATE, YYCURSOR - ctx->buf.c, *YYCURSOR, *YYCURSOR, rest));
/* XXX: Crash avoidance. Need to work with reporter to figure out what goes wrong */
if (rest < 0) rest = 0;
@ -301,6 +318,24 @@ stop:
ctx->buf.len = rest;
}
char *url_adapt_flush(size_t *newlen)
{
char *ret = NULL;
url_adapt_state_ex_t *ctx;
BLS_FETCH();
ctx = &BG(url_adapt_state_ex);
if (ctx->buf.len) {
ret = ctx->buf.c;
*newlen = ctx->buf.len;
ctx->buf.c = 0;
ctx->buf.len = 0;
}
return ret;
}
char *url_adapt_single_url(const char *url, size_t urllen, const char *name, const char *value, size_t *newlen)
{
smart_str surl = {0};
@ -334,6 +369,8 @@ char *url_adapt_ext(const char *src, size_t srclen, const char *name, const char
mainloop(ctx, src, srclen);
*newlen = ctx->result.len;
if (!ctx->result.c)
smart_str_appendl(&ctx->result, "", 0);
smart_str_0(&ctx->result);
ctx->result.len = 0;
return ctx->result.c;

View file

@ -254,10 +254,15 @@ PHPAPI void php_end_ob_buffer(zend_bool send_buffer, zend_bool just_flush)
PHPAPI void php_end_ob_buffers(zend_bool send_buffer)
{
OLS_FETCH();
BLS_FETCH();
while (OG(nesting_level)!=0) {
php_end_ob_buffer(send_buffer, 0);
}
if (send_buffer && BG(use_trans_sid)) {
session_adapt_flush(OG(php_header_write));
}
}
/* }}} */