mirror of
https://github.com/php/php-src.git
synced 2025-08-16 05:58:45 +02:00
Fix lexing of nested heredoc strings in token_get_all()
This fixes bug #60097. Before two global variables CG(heredoc) and CG(heredoc_len) were used to track the current heredoc label. In order to support nested heredoc strings the *previous* heredoc label was assigned as the token value of T_START_HEREDOC and the language_parser.y assigned that to CG(heredoc). This created a dependency of the lexer on the parser. Thus the token_get_all() function, which accesses the lexer directly without also running the parser, was not able to tokenize nested heredoc strings (and leaked memory). Same applies for the source-code highlighting functions. The new approach is to maintain a heredoc_label_stack in the lexer, which contains all active heredoc labels. As it is no longer required, T_START_HEREDOC and T_END_HEREDOC now don't carry a token value anymore. In order to make the work with zend_ptr_stack in this context more convenient I added a new function zend_ptr_stack_top(), which retrieves the top element of the stack (similar to zend_stack_top()).
This commit is contained in:
parent
15a98ece9f
commit
4cf90e06c9
12 changed files with 561 additions and 425 deletions
3
NEWS
3
NEWS
|
@ -41,4 +41,7 @@ PHP NEWS
|
|||
- pgsql
|
||||
. Added pg_escape_literal() and pg_escape_identifier() (Yasuo)
|
||||
|
||||
- Tokenizer:
|
||||
. Fixed bug #60097 (token_get_all fails to lex nested heredoc). (Nikita Popov)
|
||||
|
||||
<<< NOTE: Insert NEWS from last stable release here prior to actual release! >>>
|
||||
|
|
|
@ -6708,9 +6708,6 @@ again:
|
|||
case T_OPEN_TAG_WITH_ECHO:
|
||||
retval = T_ECHO;
|
||||
break;
|
||||
case T_END_HEREDOC:
|
||||
efree(Z_STRVAL(zendlval->u.constant));
|
||||
break;
|
||||
}
|
||||
|
||||
INIT_PZVAL(&zendlval->u.constant);
|
||||
|
|
|
@ -89,9 +89,6 @@ struct _zend_compiler_globals {
|
|||
|
||||
int zend_lineno;
|
||||
|
||||
char *heredoc;
|
||||
int heredoc_len;
|
||||
|
||||
zend_op_array *active_op_array;
|
||||
|
||||
HashTable *function_table; /* function symbol table */
|
||||
|
@ -297,6 +294,7 @@ struct _zend_php_scanner_globals {
|
|||
unsigned char *yy_limit;
|
||||
int yy_state;
|
||||
zend_stack state_stack;
|
||||
zend_ptr_stack heredoc_label_stack;
|
||||
|
||||
/* original (unfiltered) script */
|
||||
unsigned char *script_org;
|
||||
|
|
|
@ -153,8 +153,6 @@ ZEND_API void zend_highlight(zend_syntax_highlighter_ini *syntax_highlighter_ini
|
|||
efree(token.value.str.val);
|
||||
break;
|
||||
}
|
||||
} else if (token_type == T_END_HEREDOC) {
|
||||
efree(token.value.str.val);
|
||||
}
|
||||
token.type = 0;
|
||||
}
|
||||
|
|
|
@ -911,8 +911,8 @@ common_scalar:
|
|||
| T_METHOD_C { $$ = $1; }
|
||||
| T_FUNC_C { $$ = $1; }
|
||||
| T_NS_C { $$ = $1; }
|
||||
| T_START_HEREDOC T_ENCAPSED_AND_WHITESPACE T_END_HEREDOC { $$ = $2; CG(heredoc) = Z_STRVAL($1.u.constant); CG(heredoc_len) = Z_STRLEN($1.u.constant); }
|
||||
| T_START_HEREDOC T_END_HEREDOC { ZVAL_EMPTY_STRING(&$$.u.constant); INIT_PZVAL(&$$.u.constant); $$.op_type = IS_CONST; CG(heredoc) = Z_STRVAL($1.u.constant); CG(heredoc_len) = Z_STRLEN($1.u.constant); }
|
||||
| T_START_HEREDOC T_ENCAPSED_AND_WHITESPACE T_END_HEREDOC { $$ = $2; }
|
||||
| T_START_HEREDOC T_END_HEREDOC { ZVAL_EMPTY_STRING(&$$.u.constant); INIT_PZVAL(&$$.u.constant); $$.op_type = IS_CONST; }
|
||||
;
|
||||
|
||||
|
||||
|
@ -941,7 +941,7 @@ scalar:
|
|||
| T_NS_SEPARATOR namespace_name { char *tmp = estrndup(Z_STRVAL($2.u.constant), Z_STRLEN($2.u.constant)+1); memcpy(&(tmp[1]), Z_STRVAL($2.u.constant), Z_STRLEN($2.u.constant)+1); tmp[0] = '\\'; efree(Z_STRVAL($2.u.constant)); Z_STRVAL($2.u.constant) = tmp; ++Z_STRLEN($2.u.constant); zend_do_fetch_constant(&$$, NULL, &$2, ZEND_RT, 0 TSRMLS_CC); }
|
||||
| common_scalar { $$ = $1; }
|
||||
| '"' encaps_list '"' { $$ = $2; }
|
||||
| T_START_HEREDOC encaps_list T_END_HEREDOC { $$ = $2; CG(heredoc) = Z_STRVAL($1.u.constant); CG(heredoc_len) = Z_STRLEN($1.u.constant); }
|
||||
| T_START_HEREDOC encaps_list T_END_HEREDOC { $$ = $2; }
|
||||
| T_CLASS_C { if (Z_TYPE($1.u.constant) == IS_CONSTANT) {zend_do_fetch_constant(&$$, NULL, &$1, ZEND_RT, 1 TSRMLS_CC);} else {$$ = $1;} }
|
||||
;
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -31,6 +31,7 @@ typedef struct _zend_lex_state {
|
|||
unsigned char *yy_limit;
|
||||
int yy_state;
|
||||
zend_stack state_stack;
|
||||
zend_ptr_stack heredoc_label_stack;
|
||||
|
||||
zend_file_handle *in;
|
||||
uint lineno;
|
||||
|
@ -50,6 +51,10 @@ typedef struct _zend_lex_state {
|
|||
const zend_encoding *script_encoding;
|
||||
} zend_lex_state;
|
||||
|
||||
typedef struct _zend_heredoc_label {
|
||||
char *label;
|
||||
int length;
|
||||
} zend_heredoc_label;
|
||||
|
||||
BEGIN_EXTERN_C()
|
||||
int zend_compare_file_handles(zend_file_handle *fh1, zend_file_handle *fh2);
|
||||
|
|
|
@ -175,22 +175,23 @@ static void yy_scan_buffer(char *str, unsigned int len TSRMLS_DC)
|
|||
void startup_scanner(TSRMLS_D)
|
||||
{
|
||||
CG(parse_error) = 0;
|
||||
CG(heredoc) = NULL;
|
||||
CG(heredoc_len) = 0;
|
||||
CG(doc_comment) = NULL;
|
||||
CG(doc_comment_len) = 0;
|
||||
zend_stack_init(&SCNG(state_stack));
|
||||
zend_ptr_stack_init(&SCNG(heredoc_label_stack));
|
||||
}
|
||||
|
||||
static void heredoc_label_dtor(zend_heredoc_label *heredoc_label) {
|
||||
efree(heredoc_label->label);
|
||||
}
|
||||
|
||||
void shutdown_scanner(TSRMLS_D)
|
||||
{
|
||||
if (CG(heredoc)) {
|
||||
efree(CG(heredoc));
|
||||
CG(heredoc_len)=0;
|
||||
}
|
||||
CG(parse_error) = 0;
|
||||
zend_stack_destroy(&SCNG(state_stack));
|
||||
RESET_DOC_COMMENT();
|
||||
zend_stack_destroy(&SCNG(state_stack));
|
||||
zend_ptr_stack_clean(&SCNG(heredoc_label_stack), (void (*)(void *)) &heredoc_label_dtor, 1);
|
||||
zend_ptr_stack_destroy(&SCNG(heredoc_label_stack));
|
||||
}
|
||||
|
||||
ZEND_API void zend_save_lexical_state(zend_lex_state *lex_state TSRMLS_DC)
|
||||
|
@ -205,6 +206,9 @@ ZEND_API void zend_save_lexical_state(zend_lex_state *lex_state TSRMLS_DC)
|
|||
lex_state->state_stack = SCNG(state_stack);
|
||||
zend_stack_init(&SCNG(state_stack));
|
||||
|
||||
lex_state->heredoc_label_stack = SCNG(heredoc_label_stack);
|
||||
zend_ptr_stack_init(&SCNG(heredoc_label_stack));
|
||||
|
||||
lex_state->in = SCNG(yy_in);
|
||||
lex_state->yy_state = YYSTATE;
|
||||
lex_state->filename = zend_get_compiled_filename(TSRMLS_C);
|
||||
|
@ -231,6 +235,10 @@ ZEND_API void zend_restore_lexical_state(zend_lex_state *lex_state TSRMLS_DC)
|
|||
zend_stack_destroy(&SCNG(state_stack));
|
||||
SCNG(state_stack) = lex_state->state_stack;
|
||||
|
||||
zend_ptr_stack_clean(&SCNG(heredoc_label_stack), (void (*)(void *)) &heredoc_label_dtor, 1);
|
||||
zend_ptr_stack_destroy(&SCNG(heredoc_label_stack));
|
||||
SCNG(heredoc_label_stack) = lex_state->heredoc_label_stack;
|
||||
|
||||
SCNG(yy_in) = lex_state->in;
|
||||
YYSETCONDITION(lex_state->yy_state);
|
||||
CG(zend_lineno) = lex_state->lineno;
|
||||
|
@ -247,12 +255,6 @@ ZEND_API void zend_restore_lexical_state(zend_lex_state *lex_state TSRMLS_DC)
|
|||
SCNG(input_filter) = lex_state->input_filter;
|
||||
SCNG(output_filter) = lex_state->output_filter;
|
||||
SCNG(script_encoding) = lex_state->script_encoding;
|
||||
|
||||
if (CG(heredoc)) {
|
||||
efree(CG(heredoc));
|
||||
CG(heredoc) = NULL;
|
||||
CG(heredoc_len) = 0;
|
||||
}
|
||||
}
|
||||
|
||||
ZEND_API void zend_destroy_file_handle(zend_file_handle *file_handle TSRMLS_DC)
|
||||
|
@ -2097,38 +2099,35 @@ inline_html:
|
|||
<ST_IN_SCRIPTING>b?"<<<"{TABS_AND_SPACES}({LABEL}|([']{LABEL}['])|(["]{LABEL}["])){NEWLINE} {
|
||||
char *s;
|
||||
int bprefix = (yytext[0] != '<') ? 1 : 0;
|
||||
|
||||
/* save old heredoc label */
|
||||
Z_STRVAL_P(zendlval) = CG(heredoc);
|
||||
Z_STRLEN_P(zendlval) = CG(heredoc_len);
|
||||
zend_heredoc_label *heredoc_label = emalloc(sizeof(zend_heredoc_label));
|
||||
|
||||
CG(zend_lineno)++;
|
||||
CG(heredoc_len) = yyleng-bprefix-3-1-(yytext[yyleng-2]=='\r'?1:0);
|
||||
heredoc_label->length = yyleng-bprefix-3-1-(yytext[yyleng-2]=='\r'?1:0);
|
||||
s = yytext+bprefix+3;
|
||||
while ((*s == ' ') || (*s == '\t')) {
|
||||
s++;
|
||||
CG(heredoc_len)--;
|
||||
heredoc_label->length--;
|
||||
}
|
||||
|
||||
if (*s == '\'') {
|
||||
s++;
|
||||
CG(heredoc_len) -= 2;
|
||||
heredoc_label->length -= 2;
|
||||
|
||||
BEGIN(ST_NOWDOC);
|
||||
} else {
|
||||
if (*s == '"') {
|
||||
s++;
|
||||
CG(heredoc_len) -= 2;
|
||||
heredoc_label->length -= 2;
|
||||
}
|
||||
|
||||
BEGIN(ST_HEREDOC);
|
||||
}
|
||||
|
||||
CG(heredoc) = estrndup(s, CG(heredoc_len));
|
||||
heredoc_label->label = estrndup(s, heredoc_label->length);
|
||||
|
||||
/* Check for ending label on the next line */
|
||||
if (CG(heredoc_len) < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, s, CG(heredoc_len))) {
|
||||
YYCTYPE *end = YYCURSOR + CG(heredoc_len);
|
||||
if (heredoc_label->length < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, s, heredoc_label->length)) {
|
||||
YYCTYPE *end = YYCURSOR + heredoc_label->length;
|
||||
|
||||
if (*end == ';') {
|
||||
end++;
|
||||
|
@ -2139,6 +2138,8 @@ inline_html:
|
|||
}
|
||||
}
|
||||
|
||||
zend_ptr_stack_push(&SCNG(heredoc_label_stack), (void *) heredoc_label);
|
||||
|
||||
return T_START_HEREDOC;
|
||||
}
|
||||
|
||||
|
@ -2150,13 +2151,14 @@ inline_html:
|
|||
|
||||
|
||||
<ST_END_HEREDOC>{ANY_CHAR} {
|
||||
YYCURSOR += CG(heredoc_len) - 1;
|
||||
yyleng = CG(heredoc_len);
|
||||
zend_heredoc_label *heredoc_label = zend_ptr_stack_pop(&SCNG(heredoc_label_stack));
|
||||
|
||||
YYCURSOR += heredoc_label->length - 1;
|
||||
yyleng = heredoc_label->length;
|
||||
|
||||
heredoc_label_dtor(heredoc_label);
|
||||
efree(heredoc_label);
|
||||
|
||||
Z_STRVAL_P(zendlval) = CG(heredoc);
|
||||
Z_STRLEN_P(zendlval) = CG(heredoc_len);
|
||||
CG(heredoc) = NULL;
|
||||
CG(heredoc_len) = 0;
|
||||
BEGIN(ST_IN_SCRIPTING);
|
||||
return T_END_HEREDOC;
|
||||
}
|
||||
|
@ -2276,6 +2278,8 @@ double_quotes_scan_done:
|
|||
<ST_HEREDOC>{ANY_CHAR} {
|
||||
int newline = 0;
|
||||
|
||||
zend_heredoc_label *heredoc_label = zend_ptr_stack_top(&SCNG(heredoc_label_stack));
|
||||
|
||||
if (YYCURSOR > YYLIMIT) {
|
||||
return 0;
|
||||
}
|
||||
|
@ -2291,8 +2295,8 @@ double_quotes_scan_done:
|
|||
/* fall through */
|
||||
case '\n':
|
||||
/* Check for ending label on the next line */
|
||||
if (IS_LABEL_START(*YYCURSOR) && CG(heredoc_len) < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, CG(heredoc), CG(heredoc_len))) {
|
||||
YYCTYPE *end = YYCURSOR + CG(heredoc_len);
|
||||
if (IS_LABEL_START(*YYCURSOR) && heredoc_label->length < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, heredoc_label->label, heredoc_label->length)) {
|
||||
YYCTYPE *end = YYCURSOR + heredoc_label->length;
|
||||
|
||||
if (*end == ';') {
|
||||
end++;
|
||||
|
@ -2348,6 +2352,8 @@ heredoc_scan_done:
|
|||
<ST_NOWDOC>{ANY_CHAR} {
|
||||
int newline = 0;
|
||||
|
||||
zend_heredoc_label *heredoc_label = zend_ptr_stack_top(&SCNG(heredoc_label_stack));
|
||||
|
||||
if (YYCURSOR > YYLIMIT) {
|
||||
return 0;
|
||||
}
|
||||
|
@ -2363,8 +2369,8 @@ heredoc_scan_done:
|
|||
/* fall through */
|
||||
case '\n':
|
||||
/* Check for ending label on the next line */
|
||||
if (IS_LABEL_START(*YYCURSOR) && CG(heredoc_len) < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, CG(heredoc), CG(heredoc_len))) {
|
||||
YYCTYPE *end = YYCURSOR + CG(heredoc_len);
|
||||
if (IS_LABEL_START(*YYCURSOR) && heredoc_label->length < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, heredoc_label->label, heredoc_label->length)) {
|
||||
YYCTYPE *end = YYCURSOR + heredoc_label->length;
|
||||
|
||||
if (*end == ';') {
|
||||
end++;
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* Generated by re2c 0.13.5 on Thu Mar 1 21:27:30 2012 */
|
||||
/* Generated by re2c 0.13.5 on Sat Mar 31 21:29:29 2012 */
|
||||
#line 3 "Zend/zend_language_scanner_defs.h"
|
||||
|
||||
enum YYCONDTYPE {
|
||||
|
|
|
@ -111,6 +111,11 @@ static zend_always_inline void *zend_ptr_stack_pop(zend_ptr_stack *stack)
|
|||
return *(--stack->top_element);
|
||||
}
|
||||
|
||||
static inline void *zend_ptr_stack_top(zend_ptr_stack *stack)
|
||||
{
|
||||
return stack->elements[stack->top - 1];
|
||||
}
|
||||
|
||||
#endif /* ZEND_PTR_STACK_H */
|
||||
|
||||
/*
|
||||
|
|
121
ext/tokenizer/tests/bug60097.phpt
Normal file
121
ext/tokenizer/tests/bug60097.phpt
Normal file
|
@ -0,0 +1,121 @@
|
|||
--TEST--
|
||||
Bug 60097: token_get_all fails to lex nested heredoc
|
||||
--FILE--
|
||||
<?php
|
||||
|
||||
var_dump(token_get_all('<?php
|
||||
<<<DOC1
|
||||
{$s(<<<DOC2
|
||||
DOC2
|
||||
)}
|
||||
DOC1;
|
||||
'));
|
||||
|
||||
?>
|
||||
--EXPECT--
|
||||
array(14) {
|
||||
[0]=>
|
||||
array(3) {
|
||||
[0]=>
|
||||
int(372)
|
||||
[1]=>
|
||||
string(6) "<?php
|
||||
"
|
||||
[2]=>
|
||||
int(1)
|
||||
}
|
||||
[1]=>
|
||||
array(3) {
|
||||
[0]=>
|
||||
int(376)
|
||||
[1]=>
|
||||
string(8) "<<<DOC1
|
||||
"
|
||||
[2]=>
|
||||
int(2)
|
||||
}
|
||||
[2]=>
|
||||
array(3) {
|
||||
[0]=>
|
||||
int(379)
|
||||
[1]=>
|
||||
string(1) "{"
|
||||
[2]=>
|
||||
int(3)
|
||||
}
|
||||
[3]=>
|
||||
array(3) {
|
||||
[0]=>
|
||||
int(309)
|
||||
[1]=>
|
||||
string(2) "$s"
|
||||
[2]=>
|
||||
int(3)
|
||||
}
|
||||
[4]=>
|
||||
string(1) "("
|
||||
[5]=>
|
||||
array(3) {
|
||||
[0]=>
|
||||
int(376)
|
||||
[1]=>
|
||||
string(8) "<<<DOC2
|
||||
"
|
||||
[2]=>
|
||||
int(3)
|
||||
}
|
||||
[6]=>
|
||||
array(3) {
|
||||
[0]=>
|
||||
int(377)
|
||||
[1]=>
|
||||
string(4) "DOC2"
|
||||
[2]=>
|
||||
int(4)
|
||||
}
|
||||
[7]=>
|
||||
array(3) {
|
||||
[0]=>
|
||||
int(375)
|
||||
[1]=>
|
||||
string(1) "
|
||||
"
|
||||
[2]=>
|
||||
int(4)
|
||||
}
|
||||
[8]=>
|
||||
string(1) ")"
|
||||
[9]=>
|
||||
string(1) "}"
|
||||
[10]=>
|
||||
array(3) {
|
||||
[0]=>
|
||||
int(314)
|
||||
[1]=>
|
||||
string(1) "
|
||||
"
|
||||
[2]=>
|
||||
int(5)
|
||||
}
|
||||
[11]=>
|
||||
array(3) {
|
||||
[0]=>
|
||||
int(377)
|
||||
[1]=>
|
||||
string(4) "DOC1"
|
||||
[2]=>
|
||||
int(6)
|
||||
}
|
||||
[12]=>
|
||||
string(1) ";"
|
||||
[13]=>
|
||||
array(3) {
|
||||
[0]=>
|
||||
int(375)
|
||||
[1]=>
|
||||
string(1) "
|
||||
"
|
||||
[2]=>
|
||||
int(6)
|
||||
}
|
||||
}
|
|
@ -138,11 +138,8 @@ static void tokenize(zval *return_value TSRMLS_DC)
|
|||
token_line = ++CG(zend_lineno);
|
||||
CG(increment_lineno) = 0;
|
||||
}
|
||||
add_next_index_stringl(keyword, Z_STRVAL(token), Z_STRLEN(token), 1);
|
||||
efree(Z_STRVAL(token));
|
||||
} else {
|
||||
add_next_index_stringl(keyword, (char *)zendtext, zendleng, 1);
|
||||
}
|
||||
add_next_index_stringl(keyword, (char *)zendtext, zendleng, 1);
|
||||
add_next_index_long(keyword, token_line);
|
||||
add_next_index_zval(return_value, keyword);
|
||||
} else {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue