mirror of
https://github.com/php/php-src.git
synced 2025-08-15 21:48:51 +02:00
Syntax errors caused by unclosed {, [, ( mention specific location
Aside from a few very specific syntax errors for which detailed exceptions are thrown, generally PHP just emits the default error messages generated by bison on syntax error. These messages are very uninformative; they just say "Unexpected ... at line ...". This is most problematic with constructs which can span an arbitrary number of lines, such as blocks of code delimited by { }, 'if' conditions delimited by ( ), and so on. If a closing delimiter is missed, the block will run for the entire remainder of the source file (which could be thousands of lines), and then at the end, a parse error will be thrown with the dreaded words: "Unexpected end of file". Therefore, track the positions of opening and closing delimiters and ensure that they match up correctly. If any mismatch or missing delimiter is detected, immediately throw a parse error which points the user to the offending line. This is best done in the *lexer* and not in the parser. Thanks to Nikita Popov and George Peter Banyard for suggesting improvements. Fixes bug #79368. Closes GH-5364.
This commit is contained in:
parent
d4471c6aae
commit
80598f1250
10 changed files with 192 additions and 17 deletions
2
NEWS
2
NEWS
|
@ -18,6 +18,8 @@ PHP NEWS
|
|||
(Nikita)
|
||||
. Fixed bug #79462 (method_exists and property_exists incoherent behavior).
|
||||
(cmb)
|
||||
. Fixed bug #79368 ("Unexpected end of file" is not an acceptable error
|
||||
message). (Alex Dowad)
|
||||
|
||||
- CURL:
|
||||
. Bumped required libcurl version to 7.29.0. (cmb)
|
||||
|
|
|
@ -7,4 +7,4 @@ namespace foo {
|
|||
|
||||
?>
|
||||
--EXPECTF--
|
||||
Parse error: syntax error, unexpected end of file in %s on line %d
|
||||
Parse error: Unclosed '{' on line 2 in %s on line %d
|
||||
|
|
|
@ -12,4 +12,4 @@ EOC
|
|||
|
||||
?>
|
||||
--EXPECTF--
|
||||
Parse error: syntax error, unexpected end of file in %s(%d) : eval()'d code on line %d
|
||||
Parse error: Unclosed '{' in %s(%d) : eval()'d code on line %d
|
||||
|
|
|
@ -43,8 +43,8 @@ var_dump("\u{ffffff}");');
|
|||
?>
|
||||
--EXPECT--
|
||||
Deprecated: Directive 'allow_url_include' is deprecated in Unknown on line 0
|
||||
syntax error, unexpected end of file on line 2
|
||||
syntax error, unexpected end of file on line 3
|
||||
Unclosed '{' on line 2
|
||||
Unclosed '{' on line 3
|
||||
syntax error, unexpected end of file, expecting '(' on line 2
|
||||
Invalid numeric literal on line 2
|
||||
Invalid UTF-8 codepoint escape sequence on line 2
|
||||
|
|
|
@ -285,6 +285,7 @@ struct _zend_php_scanner_globals {
|
|||
int yy_state;
|
||||
zend_stack state_stack;
|
||||
zend_ptr_stack heredoc_label_stack;
|
||||
zend_stack nest_location_stack; /* for syntax error reporting */
|
||||
zend_bool heredoc_scan_ahead;
|
||||
int heredoc_indentation;
|
||||
zend_bool heredoc_indentation_uses_spaces;
|
||||
|
|
|
@ -30,6 +30,7 @@ typedef struct _zend_lex_state {
|
|||
int yy_state;
|
||||
zend_stack state_stack;
|
||||
zend_ptr_stack heredoc_label_stack;
|
||||
zend_stack nest_location_stack; /* for syntax error reporting */
|
||||
|
||||
zend_file_handle *in;
|
||||
uint32_t lineno;
|
||||
|
@ -63,6 +64,12 @@ typedef struct _zend_heredoc_label {
|
|||
zend_bool indentation_uses_spaces;
|
||||
} zend_heredoc_label;
|
||||
|
||||
/* Track locations of unclosed {, [, (, etc. for better syntax error reporting */
|
||||
typedef struct _zend_nest_location {
|
||||
char text;
|
||||
int lineno;
|
||||
} zend_nest_location;
|
||||
|
||||
BEGIN_EXTERN_C()
|
||||
ZEND_API void zend_save_lexical_state(zend_lex_state *lex_state);
|
||||
ZEND_API void zend_restore_lexical_state(zend_lex_state *lex_state);
|
||||
|
|
|
@ -192,6 +192,7 @@ void startup_scanner(void)
|
|||
CG(doc_comment) = NULL;
|
||||
CG(extra_fn_flags) = 0;
|
||||
zend_stack_init(&SCNG(state_stack), sizeof(int));
|
||||
zend_stack_init(&SCNG(nest_location_stack), sizeof(zend_nest_location));
|
||||
zend_ptr_stack_init(&SCNG(heredoc_label_stack));
|
||||
SCNG(heredoc_scan_ahead) = 0;
|
||||
}
|
||||
|
@ -205,6 +206,7 @@ void shutdown_scanner(void)
|
|||
CG(parse_error) = 0;
|
||||
RESET_DOC_COMMENT();
|
||||
zend_stack_destroy(&SCNG(state_stack));
|
||||
zend_stack_destroy(&SCNG(nest_location_stack));
|
||||
zend_ptr_stack_clean(&SCNG(heredoc_label_stack), (void (*)(void *)) &heredoc_label_dtor, 1);
|
||||
zend_ptr_stack_destroy(&SCNG(heredoc_label_stack));
|
||||
SCNG(heredoc_scan_ahead) = 0;
|
||||
|
@ -223,6 +225,9 @@ ZEND_API void zend_save_lexical_state(zend_lex_state *lex_state)
|
|||
lex_state->state_stack = SCNG(state_stack);
|
||||
zend_stack_init(&SCNG(state_stack), sizeof(int));
|
||||
|
||||
lex_state->nest_location_stack = SCNG(nest_location_stack);
|
||||
zend_stack_init(&SCNG(nest_location_stack), sizeof(zend_nest_location));
|
||||
|
||||
lex_state->heredoc_label_stack = SCNG(heredoc_label_stack);
|
||||
zend_ptr_stack_init(&SCNG(heredoc_label_stack));
|
||||
|
||||
|
@ -258,6 +263,9 @@ ZEND_API void zend_restore_lexical_state(zend_lex_state *lex_state)
|
|||
zend_stack_destroy(&SCNG(state_stack));
|
||||
SCNG(state_stack) = lex_state->state_stack;
|
||||
|
||||
zend_stack_destroy(&SCNG(nest_location_stack));
|
||||
SCNG(nest_location_stack) = lex_state->nest_location_stack;
|
||||
|
||||
zend_ptr_stack_clean(&SCNG(heredoc_label_stack), (void (*)(void *)) &heredoc_label_dtor, 1);
|
||||
zend_ptr_stack_destroy(&SCNG(heredoc_label_stack));
|
||||
SCNG(heredoc_label_stack) = lex_state->heredoc_label_stack;
|
||||
|
@ -1250,6 +1258,63 @@ static void copy_heredoc_label_stack(void *void_heredoc_label)
|
|||
zend_ptr_stack_push(&SCNG(heredoc_label_stack), (void *) new_heredoc_label);
|
||||
}
|
||||
|
||||
/* Check that { }, [ ], ( ) are nested correctly */
|
||||
static void report_bad_nesting(char opening, int opening_lineno, char closing)
|
||||
{
|
||||
char buf[256];
|
||||
size_t used = 0;
|
||||
|
||||
used = snprintf(buf, sizeof(buf), "Unclosed '%c'", opening);
|
||||
|
||||
if (opening_lineno != CG(zend_lineno)) {
|
||||
used += snprintf(buf + used, sizeof(buf) - used, " on line %d", opening_lineno);
|
||||
}
|
||||
|
||||
if (closing) { /* 'closing' will be 0 if at end of file */
|
||||
used += snprintf(buf + used, sizeof(buf) - used, " does not match '%c'", closing);
|
||||
}
|
||||
|
||||
zend_throw_exception(zend_ce_parse_error, buf, 0);
|
||||
}
|
||||
|
||||
static void enter_nesting(char opening)
|
||||
{
|
||||
zend_nest_location nest_loc = {opening, CG(zend_lineno)};
|
||||
zend_stack_push(&SCNG(nest_location_stack), &nest_loc);
|
||||
}
|
||||
|
||||
static int exit_nesting(char closing)
|
||||
{
|
||||
if (zend_stack_is_empty(&SCNG(nest_location_stack))) {
|
||||
zend_throw_exception_ex(zend_ce_parse_error, 0, "Unmatched '%c'", closing);
|
||||
return -1;
|
||||
}
|
||||
|
||||
zend_nest_location *nest_loc = zend_stack_top(&SCNG(nest_location_stack));
|
||||
char opening = nest_loc->text;
|
||||
|
||||
if ((opening == '{' && closing != '}') ||
|
||||
(opening == '[' && closing != ']') ||
|
||||
(opening == '(' && closing != ')')) {
|
||||
report_bad_nesting(opening, nest_loc->lineno, closing);
|
||||
return -1;
|
||||
}
|
||||
|
||||
zend_stack_del_top(&SCNG(nest_location_stack));
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int check_nesting_at_end()
|
||||
{
|
||||
if (!zend_stack_is_empty(&SCNG(nest_location_stack))) {
|
||||
zend_nest_location *nest_loc = zend_stack_top(&SCNG(nest_location_stack));
|
||||
report_bad_nesting(nest_loc->text, nest_loc->lineno, 0);
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define PARSER_MODE() \
|
||||
EXPECTED(elem != NULL)
|
||||
|
||||
|
@ -1277,6 +1342,22 @@ static void copy_heredoc_label_stack(void *void_heredoc_label)
|
|||
goto emit_token; \
|
||||
} while (0)
|
||||
|
||||
#define RETURN_EXIT_NESTING_TOKEN(_token) do { \
|
||||
if (exit_nesting(_token) && PARSER_MODE()) { \
|
||||
RETURN_TOKEN(T_ERROR); \
|
||||
} else { \
|
||||
RETURN_TOKEN(_token); \
|
||||
} \
|
||||
} while(0)
|
||||
|
||||
#define RETURN_END_TOKEN do { \
|
||||
if (check_nesting_at_end() && PARSER_MODE()) { \
|
||||
RETURN_TOKEN(T_ERROR); \
|
||||
} else { \
|
||||
RETURN_TOKEN(END); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
int ZEND_FASTCALL lex_scan(zval *zendlval, zend_parser_stack_elem *elem)
|
||||
{
|
||||
int token;
|
||||
|
@ -1297,7 +1378,7 @@ BNUM "0b"[01]+(_[01]+)*
|
|||
LABEL [a-zA-Z_\x80-\xff][a-zA-Z0-9_\x80-\xff]*
|
||||
WHITESPACE [ \n\r\t]+
|
||||
TABS_AND_SPACES [ \t]*
|
||||
TOKENS [;:,.\[\]()|^&+-/*=%!~$<>?@]
|
||||
TOKENS [;:,.|^&+-/*=%!~$<>?@]
|
||||
ANY_CHAR [^]
|
||||
NEWLINE ("\r"|"\n"|"\r\n")
|
||||
|
||||
|
@ -1770,6 +1851,16 @@ NEWLINE ("\r"|"\n"|"\r\n")
|
|||
RETURN_TOKEN(T_SR);
|
||||
}
|
||||
|
||||
<ST_IN_SCRIPTING>"]"|")" {
|
||||
/* Check that ] and ) match up properly with a preceding [ or ( */
|
||||
RETURN_EXIT_NESTING_TOKEN(yytext[0]);
|
||||
}
|
||||
|
||||
<ST_IN_SCRIPTING>"["|"(" {
|
||||
enter_nesting(yytext[0]);
|
||||
RETURN_TOKEN(yytext[0]);
|
||||
}
|
||||
|
||||
<ST_IN_SCRIPTING>{TOKENS} {
|
||||
RETURN_TOKEN(yytext[0]);
|
||||
}
|
||||
|
@ -1777,22 +1868,23 @@ NEWLINE ("\r"|"\n"|"\r\n")
|
|||
|
||||
<ST_IN_SCRIPTING>"{" {
|
||||
yy_push_state(ST_IN_SCRIPTING);
|
||||
enter_nesting('{');
|
||||
RETURN_TOKEN('{');
|
||||
}
|
||||
|
||||
|
||||
<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"${" {
|
||||
yy_push_state(ST_LOOKING_FOR_VARNAME);
|
||||
enter_nesting('{');
|
||||
RETURN_TOKEN(T_DOLLAR_OPEN_CURLY_BRACES);
|
||||
}
|
||||
|
||||
|
||||
<ST_IN_SCRIPTING>"}" {
|
||||
RESET_DOC_COMMENT();
|
||||
if (!zend_stack_is_empty(&SCNG(state_stack))) {
|
||||
yy_pop_state();
|
||||
}
|
||||
RETURN_TOKEN('}');
|
||||
RETURN_EXIT_NESTING_TOKEN('}');
|
||||
}
|
||||
|
||||
|
||||
|
@ -2088,7 +2180,7 @@ string:
|
|||
|
||||
<INITIAL>{ANY_CHAR} {
|
||||
if (YYCURSOR > YYLIMIT) {
|
||||
RETURN_TOKEN(END);
|
||||
RETURN_END_TOKEN;
|
||||
}
|
||||
|
||||
inline_char_handler:
|
||||
|
@ -2165,7 +2257,7 @@ inline_char_handler:
|
|||
RETURN_TOKEN(']');
|
||||
}
|
||||
|
||||
<ST_VAR_OFFSET>{TOKENS}|[{}"`] {
|
||||
<ST_VAR_OFFSET>{TOKENS}|[[(){}"`] {
|
||||
/* Only '[' or '-' can be valid, but returning other tokens will allow a more explicit parse error */
|
||||
RETURN_TOKEN(yytext[0]);
|
||||
}
|
||||
|
@ -2569,6 +2661,7 @@ skip_escape_conversion:
|
|||
<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"{$" {
|
||||
yy_push_state(ST_IN_SCRIPTING);
|
||||
yyless(1);
|
||||
enter_nesting('{');
|
||||
RETURN_TOKEN(T_CURLY_OPEN);
|
||||
}
|
||||
|
||||
|
@ -2593,7 +2686,7 @@ skip_escape_conversion:
|
|||
}
|
||||
|
||||
if (YYCURSOR > YYLIMIT) {
|
||||
RETURN_TOKEN(END);
|
||||
RETURN_END_TOKEN;
|
||||
}
|
||||
if (yytext[0] == '\\' && YYCURSOR < YYLIMIT) {
|
||||
YYCURSOR++;
|
||||
|
@ -2640,7 +2733,7 @@ double_quotes_scan_done:
|
|||
|
||||
<ST_BACKQUOTE>{ANY_CHAR} {
|
||||
if (YYCURSOR > YYLIMIT) {
|
||||
RETURN_TOKEN(END);
|
||||
RETURN_END_TOKEN;
|
||||
}
|
||||
if (yytext[0] == '\\' && YYCURSOR < YYLIMIT) {
|
||||
YYCURSOR++;
|
||||
|
@ -2689,7 +2782,7 @@ double_quotes_scan_done:
|
|||
int newline = 0, indentation = 0, spacing = 0;
|
||||
|
||||
if (YYCURSOR > YYLIMIT) {
|
||||
RETURN_TOKEN(END);
|
||||
RETURN_END_TOKEN;
|
||||
}
|
||||
|
||||
YYCURSOR--;
|
||||
|
@ -2813,7 +2906,7 @@ heredoc_scan_done:
|
|||
int newline = 0, indentation = 0, spacing = -1;
|
||||
|
||||
if (YYCURSOR > YYLIMIT) {
|
||||
RETURN_TOKEN(END);
|
||||
RETURN_END_TOKEN;
|
||||
}
|
||||
|
||||
YYCURSOR--;
|
||||
|
@ -2901,7 +2994,7 @@ nowdoc_scan_done:
|
|||
|
||||
<ST_IN_SCRIPTING,ST_VAR_OFFSET>{ANY_CHAR} {
|
||||
if (YYCURSOR > YYLIMIT) {
|
||||
RETURN_TOKEN(END);
|
||||
RETURN_END_TOKEN;
|
||||
}
|
||||
|
||||
RETURN_TOKEN(T_BAD_CHARACTER);
|
||||
|
|
|
@ -32,6 +32,7 @@ try {
|
|||
} catch(Exception $e) {
|
||||
echo "caught exception:";
|
||||
}
|
||||
}
|
||||
?>';
|
||||
$tokens = token_get_all($source);
|
||||
var_dump($tokens);
|
||||
|
@ -40,7 +41,7 @@ echo "Done"
|
|||
?>
|
||||
--EXPECTF--
|
||||
*** Testing token_get_all() : with exception keywords ***
|
||||
array(81) {
|
||||
array(83) {
|
||||
[0]=>
|
||||
array(3) {
|
||||
[0]=>
|
||||
|
@ -601,13 +602,25 @@ array(81) {
|
|||
int(14)
|
||||
}
|
||||
[80]=>
|
||||
string(1) "}"
|
||||
[81]=>
|
||||
array(3) {
|
||||
[0]=>
|
||||
int(%d)
|
||||
[1]=>
|
||||
string(1) "
|
||||
"
|
||||
[2]=>
|
||||
int(15)
|
||||
}
|
||||
[82]=>
|
||||
array(3) {
|
||||
[0]=>
|
||||
int(%d)
|
||||
[1]=>
|
||||
string(2) "?>"
|
||||
[2]=>
|
||||
int(15)
|
||||
int(16)
|
||||
}
|
||||
}
|
||||
Done
|
||||
|
|
|
@ -392,6 +392,8 @@ static zend_bool tokenize(zval *return_value, zend_string *source, zend_class_en
|
|||
array_init(return_value);
|
||||
|
||||
while ((token_type = lex_scan(&token, NULL))) {
|
||||
ZEND_ASSERT(token_type != T_ERROR);
|
||||
|
||||
add_token(
|
||||
return_value, token_type, zendtext, zendleng, token_line,
|
||||
token_class, &interned_strings);
|
||||
|
@ -408,7 +410,7 @@ static zend_bool tokenize(zval *return_value, zend_string *source, zend_class_en
|
|||
&& --need_tokens == 0
|
||||
) {
|
||||
/* fetch the rest into a T_INLINE_HTML */
|
||||
if (zendcursor != zendlimit) {
|
||||
if (zendcursor < zendlimit) {
|
||||
add_token(
|
||||
return_value, T_INLINE_HTML, zendcursor, zendlimit - zendcursor,
|
||||
token_line, token_class, &interned_strings);
|
||||
|
|
57
tests/lang/syntax_errors.phpt
Normal file
57
tests/lang/syntax_errors.phpt
Normal file
|
@ -0,0 +1,57 @@
|
|||
--TEST--
|
||||
Detailed reporting on specific types of syntax errors
|
||||
--FILE--
|
||||
<?
|
||||
$badCode = [
|
||||
"if(1 > 2", /* unclosed ( */
|
||||
"[1, 2,", /* unclosed [ */
|
||||
"if(1) { echo 'hello'; ", /* unclosed { */
|
||||
"(1 + 2));", /* too many ) */
|
||||
"[1, 2]]", /* too many ] */
|
||||
"if (1) { } }", /* too many } */
|
||||
"(1 + 2];", /* ] doesn't match ( */
|
||||
"[1, 2)];", /* ) doesn't match [ */
|
||||
"if(1) { echo 'a'; )}", /* ) doesn't match { */
|
||||
/* separately test cases where the faulty construct spans multiple lines,
|
||||
since the error message should refer to the starting line in those cases */
|
||||
"if(1 > 2) {\n echo '1';", /* unclosed (, spans multiple lines */
|
||||
"[1,\n2,\n3,", /* unclosed [, spans multiple lines */
|
||||
"{\n echo '1';\n echo '2';", /* unclosed {, spans multiple lines */
|
||||
"(1 +\n 2 +\n 3))", /* too many ), spans multiple lines */
|
||||
"[1,\n2,\n3]];", /* too many ], spans multiple lines */
|
||||
"if (1)\n {\n }}", /* too many }, spans multiple lines */
|
||||
"(1 +\n\n 2])", /* ] doesn't match (, spans multiple lines */
|
||||
"[1,\n2,\n3)]", /* ) doesn't match [, spans multiple lines */
|
||||
"if(1) {\n echo 'a';\n)}", /* ) doesn't match {, spans multiple lines */
|
||||
];
|
||||
|
||||
foreach ($badCode as $code) {
|
||||
try {
|
||||
eval($code);
|
||||
} catch (ParseError $e) {
|
||||
echo $e->getMessage(), "\n";
|
||||
}
|
||||
}
|
||||
|
||||
echo "==DONE==\n";
|
||||
?>
|
||||
--EXPECT--
|
||||
Unclosed '('
|
||||
Unclosed '['
|
||||
Unclosed '{'
|
||||
Unmatched ')'
|
||||
Unmatched ']'
|
||||
Unmatched '}'
|
||||
Unclosed '(' does not match ']'
|
||||
Unclosed '[' does not match ')'
|
||||
Unclosed '{' does not match ')'
|
||||
Unclosed '{' on line 1
|
||||
Unclosed '[' on line 1
|
||||
Unclosed '{' on line 1
|
||||
Unmatched ')'
|
||||
Unmatched ']'
|
||||
Unmatched '}'
|
||||
Unclosed '(' on line 1 does not match ']'
|
||||
Unclosed '[' on line 1 does not match ')'
|
||||
Unclosed '{' on line 1 does not match ')'
|
||||
==DONE==
|
Loading…
Add table
Add a link
Reference in a new issue