Fix lexing of nested heredoc strings in token_get_all()

This fixes bug #60097.

Before two global variables CG(heredoc) and CG(heredoc_len) were used to
track the current heredoc label. In order to support nested heredoc
strings the *previous* heredoc label was assigned as the token value of
T_START_HEREDOC and the language_parser.y assigned that to CG(heredoc).

This created a dependency of the lexer on the parser. Thus the
token_get_all() function, which accesses the lexer directly without
also running the parser, was not able to tokenize nested heredoc strings
(and leaked memory). Same applies for the source-code highlighting
functions.

The new approach is to maintain a heredoc_label_stack in the lexer, which
contains all active heredoc labels.

As it is no longer required, T_START_HEREDOC and T_END_HEREDOC now don't
carry a token value anymore.

In order to make the work with zend_ptr_stack in this context more
convenient I added a new function zend_ptr_stack_top(), which retrieves the
top element of the stack (similar to zend_stack_top()).
This commit is contained in:
Nikita Popov 2012-03-30 20:41:44 +02:00
parent 15a98ece9f
commit 4cf90e06c9
12 changed files with 561 additions and 425 deletions

3
NEWS
View file

@ -41,4 +41,7 @@ PHP NEWS
- pgsql - pgsql
. Added pg_escape_literal() and pg_escape_identifier() (Yasuo) . Added pg_escape_literal() and pg_escape_identifier() (Yasuo)
- Tokenizer:
. Fixed bug #60097 (token_get_all fails to lex nested heredoc). (Nikita Popov)
<<< NOTE: Insert NEWS from last stable release here prior to actual release! >>> <<< NOTE: Insert NEWS from last stable release here prior to actual release! >>>

View file

@ -6708,9 +6708,6 @@ again:
case T_OPEN_TAG_WITH_ECHO: case T_OPEN_TAG_WITH_ECHO:
retval = T_ECHO; retval = T_ECHO;
break; break;
case T_END_HEREDOC:
efree(Z_STRVAL(zendlval->u.constant));
break;
} }
INIT_PZVAL(&zendlval->u.constant); INIT_PZVAL(&zendlval->u.constant);

View file

@ -89,9 +89,6 @@ struct _zend_compiler_globals {
int zend_lineno; int zend_lineno;
char *heredoc;
int heredoc_len;
zend_op_array *active_op_array; zend_op_array *active_op_array;
HashTable *function_table; /* function symbol table */ HashTable *function_table; /* function symbol table */
@ -297,6 +294,7 @@ struct _zend_php_scanner_globals {
unsigned char *yy_limit; unsigned char *yy_limit;
int yy_state; int yy_state;
zend_stack state_stack; zend_stack state_stack;
zend_ptr_stack heredoc_label_stack;
/* original (unfiltered) script */ /* original (unfiltered) script */
unsigned char *script_org; unsigned char *script_org;

View file

@ -153,8 +153,6 @@ ZEND_API void zend_highlight(zend_syntax_highlighter_ini *syntax_highlighter_ini
efree(token.value.str.val); efree(token.value.str.val);
break; break;
} }
} else if (token_type == T_END_HEREDOC) {
efree(token.value.str.val);
} }
token.type = 0; token.type = 0;
} }

View file

@ -911,8 +911,8 @@ common_scalar:
| T_METHOD_C { $$ = $1; } | T_METHOD_C { $$ = $1; }
| T_FUNC_C { $$ = $1; } | T_FUNC_C { $$ = $1; }
| T_NS_C { $$ = $1; } | T_NS_C { $$ = $1; }
| T_START_HEREDOC T_ENCAPSED_AND_WHITESPACE T_END_HEREDOC { $$ = $2; CG(heredoc) = Z_STRVAL($1.u.constant); CG(heredoc_len) = Z_STRLEN($1.u.constant); } | T_START_HEREDOC T_ENCAPSED_AND_WHITESPACE T_END_HEREDOC { $$ = $2; }
| T_START_HEREDOC T_END_HEREDOC { ZVAL_EMPTY_STRING(&$$.u.constant); INIT_PZVAL(&$$.u.constant); $$.op_type = IS_CONST; CG(heredoc) = Z_STRVAL($1.u.constant); CG(heredoc_len) = Z_STRLEN($1.u.constant); } | T_START_HEREDOC T_END_HEREDOC { ZVAL_EMPTY_STRING(&$$.u.constant); INIT_PZVAL(&$$.u.constant); $$.op_type = IS_CONST; }
; ;
@ -941,7 +941,7 @@ scalar:
| T_NS_SEPARATOR namespace_name { char *tmp = estrndup(Z_STRVAL($2.u.constant), Z_STRLEN($2.u.constant)+1); memcpy(&(tmp[1]), Z_STRVAL($2.u.constant), Z_STRLEN($2.u.constant)+1); tmp[0] = '\\'; efree(Z_STRVAL($2.u.constant)); Z_STRVAL($2.u.constant) = tmp; ++Z_STRLEN($2.u.constant); zend_do_fetch_constant(&$$, NULL, &$2, ZEND_RT, 0 TSRMLS_CC); } | T_NS_SEPARATOR namespace_name { char *tmp = estrndup(Z_STRVAL($2.u.constant), Z_STRLEN($2.u.constant)+1); memcpy(&(tmp[1]), Z_STRVAL($2.u.constant), Z_STRLEN($2.u.constant)+1); tmp[0] = '\\'; efree(Z_STRVAL($2.u.constant)); Z_STRVAL($2.u.constant) = tmp; ++Z_STRLEN($2.u.constant); zend_do_fetch_constant(&$$, NULL, &$2, ZEND_RT, 0 TSRMLS_CC); }
| common_scalar { $$ = $1; } | common_scalar { $$ = $1; }
| '"' encaps_list '"' { $$ = $2; } | '"' encaps_list '"' { $$ = $2; }
| T_START_HEREDOC encaps_list T_END_HEREDOC { $$ = $2; CG(heredoc) = Z_STRVAL($1.u.constant); CG(heredoc_len) = Z_STRLEN($1.u.constant); } | T_START_HEREDOC encaps_list T_END_HEREDOC { $$ = $2; }
| T_CLASS_C { if (Z_TYPE($1.u.constant) == IS_CONSTANT) {zend_do_fetch_constant(&$$, NULL, &$1, ZEND_RT, 1 TSRMLS_CC);} else {$$ = $1;} } | T_CLASS_C { if (Z_TYPE($1.u.constant) == IS_CONSTANT) {zend_do_fetch_constant(&$$, NULL, &$1, ZEND_RT, 1 TSRMLS_CC);} else {$$ = $1;} }
; ;

File diff suppressed because it is too large Load diff

View file

@ -31,6 +31,7 @@ typedef struct _zend_lex_state {
unsigned char *yy_limit; unsigned char *yy_limit;
int yy_state; int yy_state;
zend_stack state_stack; zend_stack state_stack;
zend_ptr_stack heredoc_label_stack;
zend_file_handle *in; zend_file_handle *in;
uint lineno; uint lineno;
@ -50,6 +51,10 @@ typedef struct _zend_lex_state {
const zend_encoding *script_encoding; const zend_encoding *script_encoding;
} zend_lex_state; } zend_lex_state;
typedef struct _zend_heredoc_label {
char *label;
int length;
} zend_heredoc_label;
BEGIN_EXTERN_C() BEGIN_EXTERN_C()
int zend_compare_file_handles(zend_file_handle *fh1, zend_file_handle *fh2); int zend_compare_file_handles(zend_file_handle *fh1, zend_file_handle *fh2);

View file

@ -175,22 +175,23 @@ static void yy_scan_buffer(char *str, unsigned int len TSRMLS_DC)
void startup_scanner(TSRMLS_D) void startup_scanner(TSRMLS_D)
{ {
CG(parse_error) = 0; CG(parse_error) = 0;
CG(heredoc) = NULL;
CG(heredoc_len) = 0;
CG(doc_comment) = NULL; CG(doc_comment) = NULL;
CG(doc_comment_len) = 0; CG(doc_comment_len) = 0;
zend_stack_init(&SCNG(state_stack)); zend_stack_init(&SCNG(state_stack));
zend_ptr_stack_init(&SCNG(heredoc_label_stack));
}
static void heredoc_label_dtor(zend_heredoc_label *heredoc_label) {
efree(heredoc_label->label);
} }
void shutdown_scanner(TSRMLS_D) void shutdown_scanner(TSRMLS_D)
{ {
if (CG(heredoc)) {
efree(CG(heredoc));
CG(heredoc_len)=0;
}
CG(parse_error) = 0; CG(parse_error) = 0;
zend_stack_destroy(&SCNG(state_stack));
RESET_DOC_COMMENT(); RESET_DOC_COMMENT();
zend_stack_destroy(&SCNG(state_stack));
zend_ptr_stack_clean(&SCNG(heredoc_label_stack), (void (*)(void *)) &heredoc_label_dtor, 1);
zend_ptr_stack_destroy(&SCNG(heredoc_label_stack));
} }
ZEND_API void zend_save_lexical_state(zend_lex_state *lex_state TSRMLS_DC) ZEND_API void zend_save_lexical_state(zend_lex_state *lex_state TSRMLS_DC)
@ -205,6 +206,9 @@ ZEND_API void zend_save_lexical_state(zend_lex_state *lex_state TSRMLS_DC)
lex_state->state_stack = SCNG(state_stack); lex_state->state_stack = SCNG(state_stack);
zend_stack_init(&SCNG(state_stack)); zend_stack_init(&SCNG(state_stack));
lex_state->heredoc_label_stack = SCNG(heredoc_label_stack);
zend_ptr_stack_init(&SCNG(heredoc_label_stack));
lex_state->in = SCNG(yy_in); lex_state->in = SCNG(yy_in);
lex_state->yy_state = YYSTATE; lex_state->yy_state = YYSTATE;
lex_state->filename = zend_get_compiled_filename(TSRMLS_C); lex_state->filename = zend_get_compiled_filename(TSRMLS_C);
@ -231,6 +235,10 @@ ZEND_API void zend_restore_lexical_state(zend_lex_state *lex_state TSRMLS_DC)
zend_stack_destroy(&SCNG(state_stack)); zend_stack_destroy(&SCNG(state_stack));
SCNG(state_stack) = lex_state->state_stack; SCNG(state_stack) = lex_state->state_stack;
zend_ptr_stack_clean(&SCNG(heredoc_label_stack), (void (*)(void *)) &heredoc_label_dtor, 1);
zend_ptr_stack_destroy(&SCNG(heredoc_label_stack));
SCNG(heredoc_label_stack) = lex_state->heredoc_label_stack;
SCNG(yy_in) = lex_state->in; SCNG(yy_in) = lex_state->in;
YYSETCONDITION(lex_state->yy_state); YYSETCONDITION(lex_state->yy_state);
CG(zend_lineno) = lex_state->lineno; CG(zend_lineno) = lex_state->lineno;
@ -247,12 +255,6 @@ ZEND_API void zend_restore_lexical_state(zend_lex_state *lex_state TSRMLS_DC)
SCNG(input_filter) = lex_state->input_filter; SCNG(input_filter) = lex_state->input_filter;
SCNG(output_filter) = lex_state->output_filter; SCNG(output_filter) = lex_state->output_filter;
SCNG(script_encoding) = lex_state->script_encoding; SCNG(script_encoding) = lex_state->script_encoding;
if (CG(heredoc)) {
efree(CG(heredoc));
CG(heredoc) = NULL;
CG(heredoc_len) = 0;
}
} }
ZEND_API void zend_destroy_file_handle(zend_file_handle *file_handle TSRMLS_DC) ZEND_API void zend_destroy_file_handle(zend_file_handle *file_handle TSRMLS_DC)
@ -2097,38 +2099,35 @@ inline_html:
<ST_IN_SCRIPTING>b?"<<<"{TABS_AND_SPACES}({LABEL}|([']{LABEL}['])|(["]{LABEL}["])){NEWLINE} { <ST_IN_SCRIPTING>b?"<<<"{TABS_AND_SPACES}({LABEL}|([']{LABEL}['])|(["]{LABEL}["])){NEWLINE} {
char *s; char *s;
int bprefix = (yytext[0] != '<') ? 1 : 0; int bprefix = (yytext[0] != '<') ? 1 : 0;
zend_heredoc_label *heredoc_label = emalloc(sizeof(zend_heredoc_label));
/* save old heredoc label */
Z_STRVAL_P(zendlval) = CG(heredoc);
Z_STRLEN_P(zendlval) = CG(heredoc_len);
CG(zend_lineno)++; CG(zend_lineno)++;
CG(heredoc_len) = yyleng-bprefix-3-1-(yytext[yyleng-2]=='\r'?1:0); heredoc_label->length = yyleng-bprefix-3-1-(yytext[yyleng-2]=='\r'?1:0);
s = yytext+bprefix+3; s = yytext+bprefix+3;
while ((*s == ' ') || (*s == '\t')) { while ((*s == ' ') || (*s == '\t')) {
s++; s++;
CG(heredoc_len)--; heredoc_label->length--;
} }
if (*s == '\'') { if (*s == '\'') {
s++; s++;
CG(heredoc_len) -= 2; heredoc_label->length -= 2;
BEGIN(ST_NOWDOC); BEGIN(ST_NOWDOC);
} else { } else {
if (*s == '"') { if (*s == '"') {
s++; s++;
CG(heredoc_len) -= 2; heredoc_label->length -= 2;
} }
BEGIN(ST_HEREDOC); BEGIN(ST_HEREDOC);
} }
CG(heredoc) = estrndup(s, CG(heredoc_len)); heredoc_label->label = estrndup(s, heredoc_label->length);
/* Check for ending label on the next line */ /* Check for ending label on the next line */
if (CG(heredoc_len) < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, s, CG(heredoc_len))) { if (heredoc_label->length < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, s, heredoc_label->length)) {
YYCTYPE *end = YYCURSOR + CG(heredoc_len); YYCTYPE *end = YYCURSOR + heredoc_label->length;
if (*end == ';') { if (*end == ';') {
end++; end++;
@ -2139,6 +2138,8 @@ inline_html:
} }
} }
zend_ptr_stack_push(&SCNG(heredoc_label_stack), (void *) heredoc_label);
return T_START_HEREDOC; return T_START_HEREDOC;
} }
@ -2150,13 +2151,14 @@ inline_html:
<ST_END_HEREDOC>{ANY_CHAR} { <ST_END_HEREDOC>{ANY_CHAR} {
YYCURSOR += CG(heredoc_len) - 1; zend_heredoc_label *heredoc_label = zend_ptr_stack_pop(&SCNG(heredoc_label_stack));
yyleng = CG(heredoc_len);
YYCURSOR += heredoc_label->length - 1;
yyleng = heredoc_label->length;
heredoc_label_dtor(heredoc_label);
efree(heredoc_label);
Z_STRVAL_P(zendlval) = CG(heredoc);
Z_STRLEN_P(zendlval) = CG(heredoc_len);
CG(heredoc) = NULL;
CG(heredoc_len) = 0;
BEGIN(ST_IN_SCRIPTING); BEGIN(ST_IN_SCRIPTING);
return T_END_HEREDOC; return T_END_HEREDOC;
} }
@ -2276,6 +2278,8 @@ double_quotes_scan_done:
<ST_HEREDOC>{ANY_CHAR} { <ST_HEREDOC>{ANY_CHAR} {
int newline = 0; int newline = 0;
zend_heredoc_label *heredoc_label = zend_ptr_stack_top(&SCNG(heredoc_label_stack));
if (YYCURSOR > YYLIMIT) { if (YYCURSOR > YYLIMIT) {
return 0; return 0;
} }
@ -2291,8 +2295,8 @@ double_quotes_scan_done:
/* fall through */ /* fall through */
case '\n': case '\n':
/* Check for ending label on the next line */ /* Check for ending label on the next line */
if (IS_LABEL_START(*YYCURSOR) && CG(heredoc_len) < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, CG(heredoc), CG(heredoc_len))) { if (IS_LABEL_START(*YYCURSOR) && heredoc_label->length < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, heredoc_label->label, heredoc_label->length)) {
YYCTYPE *end = YYCURSOR + CG(heredoc_len); YYCTYPE *end = YYCURSOR + heredoc_label->length;
if (*end == ';') { if (*end == ';') {
end++; end++;
@ -2348,6 +2352,8 @@ heredoc_scan_done:
<ST_NOWDOC>{ANY_CHAR} { <ST_NOWDOC>{ANY_CHAR} {
int newline = 0; int newline = 0;
zend_heredoc_label *heredoc_label = zend_ptr_stack_top(&SCNG(heredoc_label_stack));
if (YYCURSOR > YYLIMIT) { if (YYCURSOR > YYLIMIT) {
return 0; return 0;
} }
@ -2363,8 +2369,8 @@ heredoc_scan_done:
/* fall through */ /* fall through */
case '\n': case '\n':
/* Check for ending label on the next line */ /* Check for ending label on the next line */
if (IS_LABEL_START(*YYCURSOR) && CG(heredoc_len) < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, CG(heredoc), CG(heredoc_len))) { if (IS_LABEL_START(*YYCURSOR) && heredoc_label->length < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, heredoc_label->label, heredoc_label->length)) {
YYCTYPE *end = YYCURSOR + CG(heredoc_len); YYCTYPE *end = YYCURSOR + heredoc_label->length;
if (*end == ';') { if (*end == ';') {
end++; end++;

View file

@ -1,4 +1,4 @@
/* Generated by re2c 0.13.5 on Thu Mar 1 21:27:30 2012 */ /* Generated by re2c 0.13.5 on Sat Mar 31 21:29:29 2012 */
#line 3 "Zend/zend_language_scanner_defs.h" #line 3 "Zend/zend_language_scanner_defs.h"
enum YYCONDTYPE { enum YYCONDTYPE {

View file

@ -111,6 +111,11 @@ static zend_always_inline void *zend_ptr_stack_pop(zend_ptr_stack *stack)
return *(--stack->top_element); return *(--stack->top_element);
} }
static inline void *zend_ptr_stack_top(zend_ptr_stack *stack)
{
return stack->elements[stack->top - 1];
}
#endif /* ZEND_PTR_STACK_H */ #endif /* ZEND_PTR_STACK_H */
/* /*

View file

@ -0,0 +1,121 @@
--TEST--
Bug 60097: token_get_all fails to lex nested heredoc
--FILE--
<?php
var_dump(token_get_all('<?php
<<<DOC1
{$s(<<<DOC2
DOC2
)}
DOC1;
'));
?>
--EXPECT--
array(14) {
[0]=>
array(3) {
[0]=>
int(372)
[1]=>
string(6) "<?php
"
[2]=>
int(1)
}
[1]=>
array(3) {
[0]=>
int(376)
[1]=>
string(8) "<<<DOC1
"
[2]=>
int(2)
}
[2]=>
array(3) {
[0]=>
int(379)
[1]=>
string(1) "{"
[2]=>
int(3)
}
[3]=>
array(3) {
[0]=>
int(309)
[1]=>
string(2) "$s"
[2]=>
int(3)
}
[4]=>
string(1) "("
[5]=>
array(3) {
[0]=>
int(376)
[1]=>
string(8) "<<<DOC2
"
[2]=>
int(3)
}
[6]=>
array(3) {
[0]=>
int(377)
[1]=>
string(4) "DOC2"
[2]=>
int(4)
}
[7]=>
array(3) {
[0]=>
int(375)
[1]=>
string(1) "
"
[2]=>
int(4)
}
[8]=>
string(1) ")"
[9]=>
string(1) "}"
[10]=>
array(3) {
[0]=>
int(314)
[1]=>
string(1) "
"
[2]=>
int(5)
}
[11]=>
array(3) {
[0]=>
int(377)
[1]=>
string(4) "DOC1"
[2]=>
int(6)
}
[12]=>
string(1) ";"
[13]=>
array(3) {
[0]=>
int(375)
[1]=>
string(1) "
"
[2]=>
int(6)
}
}

View file

@ -138,11 +138,8 @@ static void tokenize(zval *return_value TSRMLS_DC)
token_line = ++CG(zend_lineno); token_line = ++CG(zend_lineno);
CG(increment_lineno) = 0; CG(increment_lineno) = 0;
} }
add_next_index_stringl(keyword, Z_STRVAL(token), Z_STRLEN(token), 1);
efree(Z_STRVAL(token));
} else {
add_next_index_stringl(keyword, (char *)zendtext, zendleng, 1);
} }
add_next_index_stringl(keyword, (char *)zendtext, zendleng, 1);
add_next_index_long(keyword, token_line); add_next_index_long(keyword, token_line);
add_next_index_zval(return_value, keyword); add_next_index_zval(return_value, keyword);
} else { } else {