mirror of
https://github.com/php/php-src.git
synced 2025-08-16 05:58:45 +02:00
Fix bug #69430
Don't throw from token_get_all() unless TOKEN_PARSE is used. Errors are reported as T_ERROR tokens.
This commit is contained in:
parent
8abc3022b0
commit
d91aad5966
7 changed files with 415 additions and 371 deletions
3
NEWS
3
NEWS
|
@ -12,6 +12,9 @@
|
|||
- Standard:
|
||||
. Fixed bug #70018 (exec does not strip all whitespace). (Laruence)
|
||||
|
||||
- Tokenizer:
|
||||
. Fixed bug #69430 (token_get_all has new irrecoverable errors). (Nikita)
|
||||
|
||||
09 Jul 2015, PHP 7.0.0 Beta 1
|
||||
|
||||
- Core:
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -1011,6 +1011,7 @@ static int zend_scan_escape_string(zval *zendlval, char *str, int len, char quot
|
|||
zend_throw_exception(zend_ce_parse_error,
|
||||
"Invalid UTF-8 codepoint escape sequence", 0);
|
||||
zval_ptr_dtor(zendlval);
|
||||
ZVAL_UNDEF(zendlval);
|
||||
return FAILURE;
|
||||
}
|
||||
|
||||
|
@ -1022,6 +1023,7 @@ static int zend_scan_escape_string(zval *zendlval, char *str, int len, char quot
|
|||
zend_throw_exception(zend_ce_parse_error,
|
||||
"Invalid UTF-8 codepoint escape sequence: Codepoint too large", 0);
|
||||
zval_ptr_dtor(zendlval);
|
||||
ZVAL_UNDEF(zendlval);
|
||||
return FAILURE;
|
||||
}
|
||||
|
||||
|
@ -1659,6 +1661,7 @@ NEWLINE ("\r"|"\n"|"\r\n")
|
|||
*/
|
||||
if (end != yytext + yyleng) {
|
||||
zend_throw_exception(zend_ce_parse_error, "Invalid numeric literal", 0);
|
||||
ZVAL_UNDEF(zendlval);
|
||||
RETURN_TOKEN(T_ERROR);
|
||||
}
|
||||
} else {
|
||||
|
@ -1676,6 +1679,7 @@ NEWLINE ("\r"|"\n"|"\r\n")
|
|||
if (end != yytext + yyleng) {
|
||||
zend_throw_exception(zend_ce_parse_error,
|
||||
"Invalid numeric literal", 0);
|
||||
ZVAL_UNDEF(zendlval);
|
||||
RETURN_TOKEN(T_ERROR);
|
||||
}
|
||||
ZEND_ASSERT(!errno);
|
||||
|
@ -1684,6 +1688,7 @@ NEWLINE ("\r"|"\n"|"\r\n")
|
|||
/* Also not an assert for the same reason */
|
||||
if (end != yytext + yyleng) {
|
||||
zend_throw_exception(zend_ce_parse_error, "Invalid numeric literal", 0);
|
||||
ZVAL_UNDEF(zendlval);
|
||||
RETURN_TOKEN(T_ERROR);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -7,10 +7,19 @@ Parse errors during token_get_all()
|
|||
|
||||
function test_parse_error($code) {
|
||||
try {
|
||||
var_dump(token_get_all($code));
|
||||
var_dump(token_get_all($code, TOKEN_PARSE));
|
||||
} catch (ParseError $e) {
|
||||
echo $e->getMessage(), "\n";
|
||||
}
|
||||
|
||||
foreach (token_get_all($code) as $token) {
|
||||
if (is_array($token)) {
|
||||
echo token_name($token[0]), " ($token[1])\n";
|
||||
} else {
|
||||
echo "$token\n";
|
||||
}
|
||||
}
|
||||
echo "\n";
|
||||
}
|
||||
|
||||
test_parse_error('<?php var_dump(078);');
|
||||
|
@ -21,6 +30,37 @@ test_parse_error('<?php var_dump(078 + 078);');
|
|||
?>
|
||||
--EXPECT--
|
||||
Invalid numeric literal
|
||||
T_OPEN_TAG (<?php )
|
||||
T_STRING (var_dump)
|
||||
(
|
||||
T_ERROR (078)
|
||||
)
|
||||
;
|
||||
|
||||
Invalid UTF-8 codepoint escape sequence
|
||||
T_OPEN_TAG (<?php )
|
||||
T_STRING (var_dump)
|
||||
(
|
||||
T_ERROR ("\u{xyz}")
|
||||
)
|
||||
;
|
||||
|
||||
Invalid UTF-8 codepoint escape sequence: Codepoint too large
|
||||
T_OPEN_TAG (<?php )
|
||||
T_STRING (var_dump)
|
||||
(
|
||||
T_ERROR ("\u{ffffff}")
|
||||
)
|
||||
;
|
||||
|
||||
Invalid numeric literal
|
||||
T_OPEN_TAG (<?php )
|
||||
T_STRING (var_dump)
|
||||
(
|
||||
T_ERROR (078)
|
||||
T_WHITESPACE ( )
|
||||
+
|
||||
T_WHITESPACE ( )
|
||||
T_ERROR (078)
|
||||
)
|
||||
;
|
||||
|
|
|
@ -28,6 +28,7 @@
|
|||
#include "php_tokenizer.h"
|
||||
|
||||
#include "zend.h"
|
||||
#include "zend_exceptions.h"
|
||||
#include "zend_language_scanner.h"
|
||||
#include "zend_language_scanner_defs.h"
|
||||
#include <zend_language_parser.h>
|
||||
|
@ -112,7 +113,6 @@ static zend_bool tokenize(zval *return_value, zend_string *source)
|
|||
zval token;
|
||||
zval keyword;
|
||||
int token_type;
|
||||
zend_bool destroy;
|
||||
int token_line = 1;
|
||||
int need_tokens = -1; /* for __halt_compiler lexing. -1 = disabled */
|
||||
|
||||
|
@ -127,25 +127,11 @@ static zend_bool tokenize(zval *return_value, zend_string *source)
|
|||
LANG_SCNG(yy_state) = yycINITIAL;
|
||||
array_init(return_value);
|
||||
|
||||
ZVAL_NULL(&token);
|
||||
ZVAL_UNDEF(&token);
|
||||
while ((token_type = lex_scan(&token))) {
|
||||
|
||||
if(token_type == T_ERROR) break;
|
||||
|
||||
destroy = 1;
|
||||
switch (token_type) {
|
||||
case T_CLOSE_TAG:
|
||||
if (zendtext[zendleng - 1] != '>') {
|
||||
if (token_type == T_CLOSE_TAG && zendtext[zendleng - 1] != '>') {
|
||||
CG(zend_lineno)++;
|
||||
}
|
||||
case T_OPEN_TAG:
|
||||
case T_OPEN_TAG_WITH_ECHO:
|
||||
case T_WHITESPACE:
|
||||
case T_COMMENT:
|
||||
case T_DOC_COMMENT:
|
||||
destroy = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
if (token_type >= 256) {
|
||||
array_init(&keyword);
|
||||
|
@ -162,10 +148,11 @@ static zend_bool tokenize(zval *return_value, zend_string *source)
|
|||
} else {
|
||||
add_next_index_stringl(return_value, (char *)zendtext, zendleng);
|
||||
}
|
||||
if (destroy && Z_TYPE(token) != IS_NULL) {
|
||||
|
||||
if (Z_TYPE(token) != IS_UNDEF) {
|
||||
zval_dtor(&token);
|
||||
ZVAL_UNDEF(&token);
|
||||
}
|
||||
ZVAL_NULL(&token);
|
||||
|
||||
/* after T_HALT_COMPILER collect the next three non-dropped tokens */
|
||||
if (need_tokens != -1) {
|
||||
|
@ -294,6 +281,8 @@ PHP_FUNCTION(token_get_all)
|
|||
success = tokenize_parse(return_value, source);
|
||||
} else {
|
||||
success = tokenize(return_value, source);
|
||||
/* Normal token_get_all() should never throw. Errors are indicated by T_ERROR tokens. */
|
||||
zend_clear_exception();
|
||||
}
|
||||
|
||||
if (!success) RETURN_FALSE;
|
||||
|
|
|
@ -162,6 +162,7 @@ void tokenizer_register_constants(INIT_FUNC_ARGS) {
|
|||
REGISTER_LONG_CONSTANT("T_NS_C", T_NS_C, CONST_CS | CONST_PERSISTENT);
|
||||
REGISTER_LONG_CONSTANT("T_NS_SEPARATOR", T_NS_SEPARATOR, CONST_CS | CONST_PERSISTENT);
|
||||
REGISTER_LONG_CONSTANT("T_ELLIPSIS", T_ELLIPSIS, CONST_CS | CONST_PERSISTENT);
|
||||
REGISTER_LONG_CONSTANT("T_ERROR", T_ERROR, CONST_CS | CONST_PERSISTENT);
|
||||
REGISTER_LONG_CONSTANT("T_DOUBLE_COLON", T_PAAMAYIM_NEKUDOTAYIM, CONST_CS | CONST_PERSISTENT);
|
||||
}
|
||||
|
||||
|
@ -302,6 +303,7 @@ char *get_token_type_name(int token_type)
|
|||
case T_NS_C: return "T_NS_C";
|
||||
case T_NS_SEPARATOR: return "T_NS_SEPARATOR";
|
||||
case T_ELLIPSIS: return "T_ELLIPSIS";
|
||||
case T_ERROR: return "T_ERROR";
|
||||
|
||||
}
|
||||
return "UNKNOWN";
|
||||
|
|
|
@ -46,7 +46,7 @@ echo '/*
|
|||
|
||||
echo 'void tokenizer_register_constants(INIT_FUNC_ARGS) {' >> $OUTFILE
|
||||
$AWK '
|
||||
/^#define T_ERROR/ { next }
|
||||
/^#define T_NOELSE/ { next }
|
||||
/^#define T_/ { print " REGISTER_LONG_CONSTANT(\"" $2 "\", " $2 ", CONST_CS | CONST_PERSISTENT);" }
|
||||
' < $INFILE >> $OUTFILE
|
||||
echo ' REGISTER_LONG_CONSTANT("T_DOUBLE_COLON", T_PAAMAYIM_NEKUDOTAYIM, CONST_CS | CONST_PERSISTENT);' >> $OUTFILE
|
||||
|
@ -64,7 +64,7 @@ $AWK '
|
|||
print " case T_PAAMAYIM_NEKUDOTAYIM: return \"T_DOUBLE_COLON\";"
|
||||
next
|
||||
}
|
||||
/^#define T_ERROR/ { next }
|
||||
/^#define T_NOELSE/ { next }
|
||||
/^#define T_/ {
|
||||
print " case " $2 ": return \"" $2 "\";"
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue