mirror of
https://github.com/php/php-src.git
synced 2025-08-16 05:58:45 +02:00
Fix bug #69430
Don't throw from token_get_all() unless TOKEN_PARSE is used. Errors are reported as T_ERROR tokens.
This commit is contained in:
parent
8abc3022b0
commit
d91aad5966
7 changed files with 415 additions and 371 deletions
3
NEWS
3
NEWS
|
@ -12,6 +12,9 @@
|
||||||
- Standard:
|
- Standard:
|
||||||
. Fixed bug #70018 (exec does not strip all whitespace). (Laruence)
|
. Fixed bug #70018 (exec does not strip all whitespace). (Laruence)
|
||||||
|
|
||||||
|
- Tokenizer:
|
||||||
|
. Fixed bug #69430 (token_get_all has new irrecoverable errors). (Nikita)
|
||||||
|
|
||||||
09 Jul 2015, PHP 7.0.0 Beta 1
|
09 Jul 2015, PHP 7.0.0 Beta 1
|
||||||
|
|
||||||
- Core:
|
- Core:
|
||||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -1011,6 +1011,7 @@ static int zend_scan_escape_string(zval *zendlval, char *str, int len, char quot
|
||||||
zend_throw_exception(zend_ce_parse_error,
|
zend_throw_exception(zend_ce_parse_error,
|
||||||
"Invalid UTF-8 codepoint escape sequence", 0);
|
"Invalid UTF-8 codepoint escape sequence", 0);
|
||||||
zval_ptr_dtor(zendlval);
|
zval_ptr_dtor(zendlval);
|
||||||
|
ZVAL_UNDEF(zendlval);
|
||||||
return FAILURE;
|
return FAILURE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1022,6 +1023,7 @@ static int zend_scan_escape_string(zval *zendlval, char *str, int len, char quot
|
||||||
zend_throw_exception(zend_ce_parse_error,
|
zend_throw_exception(zend_ce_parse_error,
|
||||||
"Invalid UTF-8 codepoint escape sequence: Codepoint too large", 0);
|
"Invalid UTF-8 codepoint escape sequence: Codepoint too large", 0);
|
||||||
zval_ptr_dtor(zendlval);
|
zval_ptr_dtor(zendlval);
|
||||||
|
ZVAL_UNDEF(zendlval);
|
||||||
return FAILURE;
|
return FAILURE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1659,6 +1661,7 @@ NEWLINE ("\r"|"\n"|"\r\n")
|
||||||
*/
|
*/
|
||||||
if (end != yytext + yyleng) {
|
if (end != yytext + yyleng) {
|
||||||
zend_throw_exception(zend_ce_parse_error, "Invalid numeric literal", 0);
|
zend_throw_exception(zend_ce_parse_error, "Invalid numeric literal", 0);
|
||||||
|
ZVAL_UNDEF(zendlval);
|
||||||
RETURN_TOKEN(T_ERROR);
|
RETURN_TOKEN(T_ERROR);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
@ -1676,6 +1679,7 @@ NEWLINE ("\r"|"\n"|"\r\n")
|
||||||
if (end != yytext + yyleng) {
|
if (end != yytext + yyleng) {
|
||||||
zend_throw_exception(zend_ce_parse_error,
|
zend_throw_exception(zend_ce_parse_error,
|
||||||
"Invalid numeric literal", 0);
|
"Invalid numeric literal", 0);
|
||||||
|
ZVAL_UNDEF(zendlval);
|
||||||
RETURN_TOKEN(T_ERROR);
|
RETURN_TOKEN(T_ERROR);
|
||||||
}
|
}
|
||||||
ZEND_ASSERT(!errno);
|
ZEND_ASSERT(!errno);
|
||||||
|
@ -1684,6 +1688,7 @@ NEWLINE ("\r"|"\n"|"\r\n")
|
||||||
/* Also not an assert for the same reason */
|
/* Also not an assert for the same reason */
|
||||||
if (end != yytext + yyleng) {
|
if (end != yytext + yyleng) {
|
||||||
zend_throw_exception(zend_ce_parse_error, "Invalid numeric literal", 0);
|
zend_throw_exception(zend_ce_parse_error, "Invalid numeric literal", 0);
|
||||||
|
ZVAL_UNDEF(zendlval);
|
||||||
RETURN_TOKEN(T_ERROR);
|
RETURN_TOKEN(T_ERROR);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -7,10 +7,19 @@ Parse errors during token_get_all()
|
||||||
|
|
||||||
function test_parse_error($code) {
|
function test_parse_error($code) {
|
||||||
try {
|
try {
|
||||||
var_dump(token_get_all($code));
|
var_dump(token_get_all($code, TOKEN_PARSE));
|
||||||
} catch (ParseError $e) {
|
} catch (ParseError $e) {
|
||||||
echo $e->getMessage(), "\n";
|
echo $e->getMessage(), "\n";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
foreach (token_get_all($code) as $token) {
|
||||||
|
if (is_array($token)) {
|
||||||
|
echo token_name($token[0]), " ($token[1])\n";
|
||||||
|
} else {
|
||||||
|
echo "$token\n";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
echo "\n";
|
||||||
}
|
}
|
||||||
|
|
||||||
test_parse_error('<?php var_dump(078);');
|
test_parse_error('<?php var_dump(078);');
|
||||||
|
@ -21,6 +30,37 @@ test_parse_error('<?php var_dump(078 + 078);');
|
||||||
?>
|
?>
|
||||||
--EXPECT--
|
--EXPECT--
|
||||||
Invalid numeric literal
|
Invalid numeric literal
|
||||||
|
T_OPEN_TAG (<?php )
|
||||||
|
T_STRING (var_dump)
|
||||||
|
(
|
||||||
|
T_ERROR (078)
|
||||||
|
)
|
||||||
|
;
|
||||||
|
|
||||||
Invalid UTF-8 codepoint escape sequence
|
Invalid UTF-8 codepoint escape sequence
|
||||||
|
T_OPEN_TAG (<?php )
|
||||||
|
T_STRING (var_dump)
|
||||||
|
(
|
||||||
|
T_ERROR ("\u{xyz}")
|
||||||
|
)
|
||||||
|
;
|
||||||
|
|
||||||
Invalid UTF-8 codepoint escape sequence: Codepoint too large
|
Invalid UTF-8 codepoint escape sequence: Codepoint too large
|
||||||
|
T_OPEN_TAG (<?php )
|
||||||
|
T_STRING (var_dump)
|
||||||
|
(
|
||||||
|
T_ERROR ("\u{ffffff}")
|
||||||
|
)
|
||||||
|
;
|
||||||
|
|
||||||
Invalid numeric literal
|
Invalid numeric literal
|
||||||
|
T_OPEN_TAG (<?php )
|
||||||
|
T_STRING (var_dump)
|
||||||
|
(
|
||||||
|
T_ERROR (078)
|
||||||
|
T_WHITESPACE ( )
|
||||||
|
+
|
||||||
|
T_WHITESPACE ( )
|
||||||
|
T_ERROR (078)
|
||||||
|
)
|
||||||
|
;
|
||||||
|
|
|
@ -28,6 +28,7 @@
|
||||||
#include "php_tokenizer.h"
|
#include "php_tokenizer.h"
|
||||||
|
|
||||||
#include "zend.h"
|
#include "zend.h"
|
||||||
|
#include "zend_exceptions.h"
|
||||||
#include "zend_language_scanner.h"
|
#include "zend_language_scanner.h"
|
||||||
#include "zend_language_scanner_defs.h"
|
#include "zend_language_scanner_defs.h"
|
||||||
#include <zend_language_parser.h>
|
#include <zend_language_parser.h>
|
||||||
|
@ -112,7 +113,6 @@ static zend_bool tokenize(zval *return_value, zend_string *source)
|
||||||
zval token;
|
zval token;
|
||||||
zval keyword;
|
zval keyword;
|
||||||
int token_type;
|
int token_type;
|
||||||
zend_bool destroy;
|
|
||||||
int token_line = 1;
|
int token_line = 1;
|
||||||
int need_tokens = -1; /* for __halt_compiler lexing. -1 = disabled */
|
int need_tokens = -1; /* for __halt_compiler lexing. -1 = disabled */
|
||||||
|
|
||||||
|
@ -127,24 +127,10 @@ static zend_bool tokenize(zval *return_value, zend_string *source)
|
||||||
LANG_SCNG(yy_state) = yycINITIAL;
|
LANG_SCNG(yy_state) = yycINITIAL;
|
||||||
array_init(return_value);
|
array_init(return_value);
|
||||||
|
|
||||||
ZVAL_NULL(&token);
|
ZVAL_UNDEF(&token);
|
||||||
while ((token_type = lex_scan(&token))) {
|
while ((token_type = lex_scan(&token))) {
|
||||||
|
if (token_type == T_CLOSE_TAG && zendtext[zendleng - 1] != '>') {
|
||||||
if(token_type == T_ERROR) break;
|
CG(zend_lineno)++;
|
||||||
|
|
||||||
destroy = 1;
|
|
||||||
switch (token_type) {
|
|
||||||
case T_CLOSE_TAG:
|
|
||||||
if (zendtext[zendleng - 1] != '>') {
|
|
||||||
CG(zend_lineno)++;
|
|
||||||
}
|
|
||||||
case T_OPEN_TAG:
|
|
||||||
case T_OPEN_TAG_WITH_ECHO:
|
|
||||||
case T_WHITESPACE:
|
|
||||||
case T_COMMENT:
|
|
||||||
case T_DOC_COMMENT:
|
|
||||||
destroy = 0;
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (token_type >= 256) {
|
if (token_type >= 256) {
|
||||||
|
@ -162,10 +148,11 @@ static zend_bool tokenize(zval *return_value, zend_string *source)
|
||||||
} else {
|
} else {
|
||||||
add_next_index_stringl(return_value, (char *)zendtext, zendleng);
|
add_next_index_stringl(return_value, (char *)zendtext, zendleng);
|
||||||
}
|
}
|
||||||
if (destroy && Z_TYPE(token) != IS_NULL) {
|
|
||||||
|
if (Z_TYPE(token) != IS_UNDEF) {
|
||||||
zval_dtor(&token);
|
zval_dtor(&token);
|
||||||
|
ZVAL_UNDEF(&token);
|
||||||
}
|
}
|
||||||
ZVAL_NULL(&token);
|
|
||||||
|
|
||||||
/* after T_HALT_COMPILER collect the next three non-dropped tokens */
|
/* after T_HALT_COMPILER collect the next three non-dropped tokens */
|
||||||
if (need_tokens != -1) {
|
if (need_tokens != -1) {
|
||||||
|
@ -294,6 +281,8 @@ PHP_FUNCTION(token_get_all)
|
||||||
success = tokenize_parse(return_value, source);
|
success = tokenize_parse(return_value, source);
|
||||||
} else {
|
} else {
|
||||||
success = tokenize(return_value, source);
|
success = tokenize(return_value, source);
|
||||||
|
/* Normal token_get_all() should never throw. Errors are indicated by T_ERROR tokens. */
|
||||||
|
zend_clear_exception();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!success) RETURN_FALSE;
|
if (!success) RETURN_FALSE;
|
||||||
|
|
|
@ -162,6 +162,7 @@ void tokenizer_register_constants(INIT_FUNC_ARGS) {
|
||||||
REGISTER_LONG_CONSTANT("T_NS_C", T_NS_C, CONST_CS | CONST_PERSISTENT);
|
REGISTER_LONG_CONSTANT("T_NS_C", T_NS_C, CONST_CS | CONST_PERSISTENT);
|
||||||
REGISTER_LONG_CONSTANT("T_NS_SEPARATOR", T_NS_SEPARATOR, CONST_CS | CONST_PERSISTENT);
|
REGISTER_LONG_CONSTANT("T_NS_SEPARATOR", T_NS_SEPARATOR, CONST_CS | CONST_PERSISTENT);
|
||||||
REGISTER_LONG_CONSTANT("T_ELLIPSIS", T_ELLIPSIS, CONST_CS | CONST_PERSISTENT);
|
REGISTER_LONG_CONSTANT("T_ELLIPSIS", T_ELLIPSIS, CONST_CS | CONST_PERSISTENT);
|
||||||
|
REGISTER_LONG_CONSTANT("T_ERROR", T_ERROR, CONST_CS | CONST_PERSISTENT);
|
||||||
REGISTER_LONG_CONSTANT("T_DOUBLE_COLON", T_PAAMAYIM_NEKUDOTAYIM, CONST_CS | CONST_PERSISTENT);
|
REGISTER_LONG_CONSTANT("T_DOUBLE_COLON", T_PAAMAYIM_NEKUDOTAYIM, CONST_CS | CONST_PERSISTENT);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -302,6 +303,7 @@ char *get_token_type_name(int token_type)
|
||||||
case T_NS_C: return "T_NS_C";
|
case T_NS_C: return "T_NS_C";
|
||||||
case T_NS_SEPARATOR: return "T_NS_SEPARATOR";
|
case T_NS_SEPARATOR: return "T_NS_SEPARATOR";
|
||||||
case T_ELLIPSIS: return "T_ELLIPSIS";
|
case T_ELLIPSIS: return "T_ELLIPSIS";
|
||||||
|
case T_ERROR: return "T_ERROR";
|
||||||
|
|
||||||
}
|
}
|
||||||
return "UNKNOWN";
|
return "UNKNOWN";
|
||||||
|
|
|
@ -46,7 +46,7 @@ echo '/*
|
||||||
|
|
||||||
echo 'void tokenizer_register_constants(INIT_FUNC_ARGS) {' >> $OUTFILE
|
echo 'void tokenizer_register_constants(INIT_FUNC_ARGS) {' >> $OUTFILE
|
||||||
$AWK '
|
$AWK '
|
||||||
/^#define T_ERROR/ { next }
|
/^#define T_NOELSE/ { next }
|
||||||
/^#define T_/ { print " REGISTER_LONG_CONSTANT(\"" $2 "\", " $2 ", CONST_CS | CONST_PERSISTENT);" }
|
/^#define T_/ { print " REGISTER_LONG_CONSTANT(\"" $2 "\", " $2 ", CONST_CS | CONST_PERSISTENT);" }
|
||||||
' < $INFILE >> $OUTFILE
|
' < $INFILE >> $OUTFILE
|
||||||
echo ' REGISTER_LONG_CONSTANT("T_DOUBLE_COLON", T_PAAMAYIM_NEKUDOTAYIM, CONST_CS | CONST_PERSISTENT);' >> $OUTFILE
|
echo ' REGISTER_LONG_CONSTANT("T_DOUBLE_COLON", T_PAAMAYIM_NEKUDOTAYIM, CONST_CS | CONST_PERSISTENT);' >> $OUTFILE
|
||||||
|
@ -64,7 +64,7 @@ $AWK '
|
||||||
print " case T_PAAMAYIM_NEKUDOTAYIM: return \"T_DOUBLE_COLON\";"
|
print " case T_PAAMAYIM_NEKUDOTAYIM: return \"T_DOUBLE_COLON\";"
|
||||||
next
|
next
|
||||||
}
|
}
|
||||||
/^#define T_ERROR/ { next }
|
/^#define T_NOELSE/ { next }
|
||||||
/^#define T_/ {
|
/^#define T_/ {
|
||||||
print " case " $2 ": return \"" $2 "\";"
|
print " case " $2 ": return \"" $2 "\";"
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue