Don't throw from token_get_all() unless TOKEN_PARSE is used. Errors
are reported as T_ERROR tokens.
This commit is contained in:
Nikita Popov 2015-07-09 19:10:06 +02:00
parent 8abc3022b0
commit d91aad5966
7 changed files with 415 additions and 371 deletions

3
NEWS
View file

@ -12,6 +12,9 @@
- Standard:
. Fixed bug #70018 (exec does not strip all whitespace). (Laruence)
- Tokenizer:
. Fixed bug #69430 (token_get_all has new irrecoverable errors). (Nikita)
09 Jul 2015, PHP 7.0.0 Beta 1
- Core:

File diff suppressed because it is too large Load diff

View file

@ -1011,6 +1011,7 @@ static int zend_scan_escape_string(zval *zendlval, char *str, int len, char quot
zend_throw_exception(zend_ce_parse_error,
"Invalid UTF-8 codepoint escape sequence", 0);
zval_ptr_dtor(zendlval);
ZVAL_UNDEF(zendlval);
return FAILURE;
}
@ -1022,6 +1023,7 @@ static int zend_scan_escape_string(zval *zendlval, char *str, int len, char quot
zend_throw_exception(zend_ce_parse_error,
"Invalid UTF-8 codepoint escape sequence: Codepoint too large", 0);
zval_ptr_dtor(zendlval);
ZVAL_UNDEF(zendlval);
return FAILURE;
}
@ -1659,6 +1661,7 @@ NEWLINE ("\r"|"\n"|"\r\n")
*/
if (end != yytext + yyleng) {
zend_throw_exception(zend_ce_parse_error, "Invalid numeric literal", 0);
ZVAL_UNDEF(zendlval);
RETURN_TOKEN(T_ERROR);
}
} else {
@ -1676,6 +1679,7 @@ NEWLINE ("\r"|"\n"|"\r\n")
if (end != yytext + yyleng) {
zend_throw_exception(zend_ce_parse_error,
"Invalid numeric literal", 0);
ZVAL_UNDEF(zendlval);
RETURN_TOKEN(T_ERROR);
}
ZEND_ASSERT(!errno);
@ -1684,6 +1688,7 @@ NEWLINE ("\r"|"\n"|"\r\n")
/* Also not an assert for the same reason */
if (end != yytext + yyleng) {
zend_throw_exception(zend_ce_parse_error, "Invalid numeric literal", 0);
ZVAL_UNDEF(zendlval);
RETURN_TOKEN(T_ERROR);
}
}

View file

@ -7,10 +7,19 @@ Parse errors during token_get_all()
function test_parse_error($code) {
try {
var_dump(token_get_all($code));
var_dump(token_get_all($code, TOKEN_PARSE));
} catch (ParseError $e) {
echo $e->getMessage(), "\n";
}
foreach (token_get_all($code) as $token) {
if (is_array($token)) {
echo token_name($token[0]), " ($token[1])\n";
} else {
echo "$token\n";
}
}
echo "\n";
}
test_parse_error('<?php var_dump(078);');
@ -21,6 +30,37 @@ test_parse_error('<?php var_dump(078 + 078);');
?>
--EXPECT--
Invalid numeric literal
T_OPEN_TAG (<?php )
T_STRING (var_dump)
(
T_ERROR (078)
)
;
Invalid UTF-8 codepoint escape sequence
T_OPEN_TAG (<?php )
T_STRING (var_dump)
(
T_ERROR ("\u{xyz}")
)
;
Invalid UTF-8 codepoint escape sequence: Codepoint too large
T_OPEN_TAG (<?php )
T_STRING (var_dump)
(
T_ERROR ("\u{ffffff}")
)
;
Invalid numeric literal
T_OPEN_TAG (<?php )
T_STRING (var_dump)
(
T_ERROR (078)
T_WHITESPACE ( )
+
T_WHITESPACE ( )
T_ERROR (078)
)
;

View file

@ -28,6 +28,7 @@
#include "php_tokenizer.h"
#include "zend.h"
#include "zend_exceptions.h"
#include "zend_language_scanner.h"
#include "zend_language_scanner_defs.h"
#include <zend_language_parser.h>
@ -112,7 +113,6 @@ static zend_bool tokenize(zval *return_value, zend_string *source)
zval token;
zval keyword;
int token_type;
zend_bool destroy;
int token_line = 1;
int need_tokens = -1; /* for __halt_compiler lexing. -1 = disabled */
@ -127,25 +127,11 @@ static zend_bool tokenize(zval *return_value, zend_string *source)
LANG_SCNG(yy_state) = yycINITIAL;
array_init(return_value);
ZVAL_NULL(&token);
ZVAL_UNDEF(&token);
while ((token_type = lex_scan(&token))) {
if(token_type == T_ERROR) break;
destroy = 1;
switch (token_type) {
case T_CLOSE_TAG:
if (zendtext[zendleng - 1] != '>') {
if (token_type == T_CLOSE_TAG && zendtext[zendleng - 1] != '>') {
CG(zend_lineno)++;
}
case T_OPEN_TAG:
case T_OPEN_TAG_WITH_ECHO:
case T_WHITESPACE:
case T_COMMENT:
case T_DOC_COMMENT:
destroy = 0;
break;
}
if (token_type >= 256) {
array_init(&keyword);
@ -162,10 +148,11 @@ static zend_bool tokenize(zval *return_value, zend_string *source)
} else {
add_next_index_stringl(return_value, (char *)zendtext, zendleng);
}
if (destroy && Z_TYPE(token) != IS_NULL) {
if (Z_TYPE(token) != IS_UNDEF) {
zval_dtor(&token);
ZVAL_UNDEF(&token);
}
ZVAL_NULL(&token);
/* after T_HALT_COMPILER collect the next three non-dropped tokens */
if (need_tokens != -1) {
@ -294,6 +281,8 @@ PHP_FUNCTION(token_get_all)
success = tokenize_parse(return_value, source);
} else {
success = tokenize(return_value, source);
/* Normal token_get_all() should never throw. Errors are indicated by T_ERROR tokens. */
zend_clear_exception();
}
if (!success) RETURN_FALSE;

View file

@ -162,6 +162,7 @@ void tokenizer_register_constants(INIT_FUNC_ARGS) {
REGISTER_LONG_CONSTANT("T_NS_C", T_NS_C, CONST_CS | CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("T_NS_SEPARATOR", T_NS_SEPARATOR, CONST_CS | CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("T_ELLIPSIS", T_ELLIPSIS, CONST_CS | CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("T_ERROR", T_ERROR, CONST_CS | CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("T_DOUBLE_COLON", T_PAAMAYIM_NEKUDOTAYIM, CONST_CS | CONST_PERSISTENT);
}
@ -302,6 +303,7 @@ char *get_token_type_name(int token_type)
case T_NS_C: return "T_NS_C";
case T_NS_SEPARATOR: return "T_NS_SEPARATOR";
case T_ELLIPSIS: return "T_ELLIPSIS";
case T_ERROR: return "T_ERROR";
}
return "UNKNOWN";

View file

@ -46,7 +46,7 @@ echo '/*
echo 'void tokenizer_register_constants(INIT_FUNC_ARGS) {' >> $OUTFILE
$AWK '
/^#define T_ERROR/ { next }
/^#define T_NOELSE/ { next }
/^#define T_/ { print " REGISTER_LONG_CONSTANT(\"" $2 "\", " $2 ", CONST_CS | CONST_PERSISTENT);" }
' < $INFILE >> $OUTFILE
echo ' REGISTER_LONG_CONSTANT("T_DOUBLE_COLON", T_PAAMAYIM_NEKUDOTAYIM, CONST_CS | CONST_PERSISTENT);' >> $OUTFILE
@ -64,7 +64,7 @@ $AWK '
print " case T_PAAMAYIM_NEKUDOTAYIM: return \"T_DOUBLE_COLON\";"
next
}
/^#define T_ERROR/ { next }
/^#define T_NOELSE/ { next }
/^#define T_/ {
print " case " $2 ": return \"" $2 "\";"
}