- Improved parse error messages

This commit is contained in:
Felipe Pena 2011-06-23 23:00:53 +00:00
parent f9f298cc0b
commit 0372e6ad80
9 changed files with 542 additions and 414 deletions

1
NEWS
View file

@ -91,6 +91,7 @@ PHP NEWS
. Changed $GLOBALS into a JIT autoglobal, so it's initialized only if used.
(this may affect opcode caches!)
. Improved performance of @ (silence) operator.
. Improved parse error messages. (Felipe)
. Simplified string offset reading. $str[1][0] is now a legal construct.
. Added caches to eliminate repeatable run-time bindings of functions,
classes, constants, methods and properties.

View file

@ -20,7 +20,7 @@ var_dump(
--EXPECTF--
Parse error: syntax error, unexpected '}' in %s(%d) : runtime-created function on line 1
Parse error: syntax error, unexpected $end in %s(%d) : runtime-created function on line 1
Parse error: syntax error, unexpected end of file in %s(%d) : runtime-created function on line 1
bool(false)
int(2)
bool(false)

View file

@ -5,5 +5,5 @@ Argument parsing error #001
function foo($arg1 string) {}
?>
--EXPECTF--
Parse error: syntax error, unexpected T_STRING, expecting ')' in %sfunction_arguments_001.php on line %d
Parse error: syntax error, unexpected 'string' (T_STRING), expecting ')' in %sfunction_arguments_001.php on line %d

View file

@ -101,6 +101,7 @@ struct _zend_compiler_globals {
HashTable *auto_globals;
zend_bool parse_error;
zend_bool in_compilation;
zend_bool short_tags;
zend_bool asp_tags;

View file

@ -35,6 +35,9 @@
#include "zend_API.h"
#include "zend_constants.h"
#define YYSIZE_T size_t
#define yytnamerr zend_yytnamerr
static YYSIZE_T zend_yytnamerr(char*, const char*);
#define YYERROR_VERBOSE
#define YYSTYPE znode
@ -49,107 +52,160 @@
%pure_parser
%expect 2
%token END 0 "end of file"
%left T_INCLUDE T_INCLUDE_ONCE T_EVAL T_REQUIRE T_REQUIRE_ONCE
%token T_INCLUDE "include (T_INCLUDE)"
%token T_INCLUDE_ONCE "include_once (T_INCLUDE_ONCE)"
%token T_EVAL "eval (T_EVAL)"
%token T_REQUIRE "require (T_REQUIRE)"
%token T_REQUIRE_ONCE "require_once (T_REQUIRE_ONCE)"
%left ','
%left T_LOGICAL_OR
%token T_LOGICAL_OR "or (T_LOGICAL_OR)"
%left T_LOGICAL_XOR
%token T_LOGICAL_XOR "xor (T_LOGICAL_XOR)"
%left T_LOGICAL_AND
%token T_LOGICAL_AND "and (T_LOGICAL_AND)"
%right T_PRINT
%token T_PRINT "print (T_PRINT)"
%left '=' T_PLUS_EQUAL T_MINUS_EQUAL T_MUL_EQUAL T_DIV_EQUAL T_CONCAT_EQUAL T_MOD_EQUAL T_AND_EQUAL T_OR_EQUAL T_XOR_EQUAL T_SL_EQUAL T_SR_EQUAL
%token T_PLUS_EQUAL "+= (T_PLUS_EQUAL)"
%token T_MINUS_EQUAL "-= (T_MINUS_EQUAL)"
%token T_MUL_EQUAL "*= (T_MUL_EQUAL)"
%token T_DIV_EQUAL "/= (T_DIV_EQUAL)"
%token T_CONCAT_EQUAL ".= (T_CONCAT_EQUAL)"
%token T_MOD_EQUAL "%= (T_MOD_EQUAL)"
%token T_AND_EQUAL "&= (T_AND_EQUAL)"
%token T_OR_EQUAL "|= (T_OR_EQUAL)"
%token T_XOR_EQUAL "^= (T_XOR_EQUAL)"
%token T_SL_EQUAL "<<= (T_SL_EQUAL)"
%token T_SR_EQUAL ">>= (T_SR_EQUAL)"
%left '?' ':'
%left T_BOOLEAN_OR
%token T_BOOLEAN_OR "|| (T_BOOLEAN_OR)"
%left T_BOOLEAN_AND
%token T_BOOLEAN_AND "&& (T_BOOLEAN_AND)"
%left '|'
%left '^'
%left '&'
%nonassoc T_IS_EQUAL T_IS_NOT_EQUAL T_IS_IDENTICAL T_IS_NOT_IDENTICAL
%token T_IS_EQUAL "== (T_IS_EQUAL)"
%token T_IS_NOT_EQUAL "!= (T_IS_NOT_EQUAL)"
%token T_IS_IDENTICAL "=== (T_IS_IDENTICAL)"
%token T_IS_NOT_IDENTICAL "!== (T_IS_NOT_IDENTICAL)"
%nonassoc '<' T_IS_SMALLER_OR_EQUAL '>' T_IS_GREATER_OR_EQUAL
%token T_IS_SMALLER_OR_EQUAL "<= (T_IS_SMALLER_OR_EQUAL)"
%token T_IS_GREATER_OR_EQUAL ">= (T_IS_GREATER_OR_EQUAL)"
%left T_SL T_SR
%token T_SL "<< (T_SL)"
%token T_SR ">> (T_SR)"
%left '+' '-' '.'
%left '*' '/' '%'
%right '!'
%nonassoc T_INSTANCEOF
%token T_INSTANCEOF "instanceof (T_INSTANCEOF)"
%right '~' T_INC T_DEC T_INT_CAST T_DOUBLE_CAST T_STRING_CAST T_ARRAY_CAST T_OBJECT_CAST T_BOOL_CAST T_UNSET_CAST '@'
%token T_INC "++ (T_INC)"
%token T_DEC "-- (T_DEC)"
%token T_INT_CAST "(int) (T_INT_CAST)"
%token T_SCALAR_CAST "(scalar) (T_SCALAR_CAST)"
%token T_DOUBLE_CAST "(double) (T_DOUBLE_CAST)"
%token T_STRING_CAST "(string) (T_STRING_CAST)"
%token T_ARRAY_CAST "(array) (T_ARRAY_CAST)"
%token T_OBJECT_CAST "(object) (T_OBJECT_CAST)"
%token T_BOOL_CAST "(bool) (T_BOOL_CAST)"
%token T_UNSET_CAST "(unset) (T_UNSET_CAST)"
%right '['
%nonassoc T_NEW T_CLONE
%token T_EXIT
%token T_IF
%token T_NEW "new (T_NEW)"
%token T_CLONE "clone (T_CLONE)"
%token T_EXIT "exit (T_EXIT)"
%token T_IF "if (T_IF)"
%left T_ELSEIF
%token T_ELSEIF "elseif (T_ELSEIF)"
%left T_ELSE
%token T_ELSE "else (T_ELSE)"
%left T_ENDIF
%token T_LNUMBER
%token T_DNUMBER
%token T_STRING
%token T_STRING_VARNAME
%token T_VARIABLE
%token T_NUM_STRING
%token T_ENDIF "endif (T_ENDIF)"
%token T_LNUMBER "integer number (T_LNUMBER)"
%token T_DNUMBER "floating-point number (T_DNUMBER)"
%token T_STRING "identifier (T_STRING)"
%token T_STRING_VARNAME "variable name (T_STRING_VARNAME)"
%token T_VARIABLE "variable (T_VARIABLE)"
%token T_NUM_STRING "number (T_NUM_STRING)"
%token T_INLINE_HTML
%token T_CHARACTER
%token T_BAD_CHARACTER
%token T_ENCAPSED_AND_WHITESPACE
%token T_CONSTANT_ENCAPSED_STRING
%token T_ECHO
%token T_DO
%token T_WHILE
%token T_ENDWHILE
%token T_FOR
%token T_ENDFOR
%token T_FOREACH
%token T_ENDFOREACH
%token T_DECLARE
%token T_ENDDECLARE
%token T_AS
%token T_SWITCH
%token T_ENDSWITCH
%token T_CASE
%token T_DEFAULT
%token T_BREAK
%token T_CONTINUE
%token T_GOTO
%token T_FUNCTION
%token T_CONST
%token T_RETURN
%token T_TRY
%token T_CATCH
%token T_THROW
%token T_USE
%token T_INSTEADOF
%token T_GLOBAL
%token T_ENCAPSED_AND_WHITESPACE "quoted-string and whitespace (T_ENCAPSED_AND_WHITESPACE)"
%token T_CONSTANT_ENCAPSED_STRING "quoted-string (T_CONSTANT_ENCAPSED_STRING)"
%token T_ECHO "echo (T_ECHO)"
%token T_DO "do (T_DO)"
%token T_WHILE "while (T_WHILE)"
%token T_ENDWHILE "endwhile (T_ENDWHILE)"
%token T_FOR "for (T_FOR)"
%token T_ENDFOR "endfor (T_ENDFOR)"
%token T_FOREACH "foreach (T_FOREACH)"
%token T_ENDFOREACH "endforeach (T_ENDFOREACH)"
%token T_DECLARE "declare (T_DECLARE)"
%token T_ENDDECLARE "enddeclare (T_ENDDECLARE)"
%token T_AS "as (T_AS)"
%token T_SWITCH "switch (T_SWITCH)"
%token T_ENDSWITCH "endswitch (T_ENDSWITCH)"
%token T_CASE "case (T_CASE)"
%token T_DEFAULT "default (T_DEFAULT)"
%token T_BREAK "break (T_BREAK)"
%token T_CONTINUE "continue (T_CONTINUE)"
%token T_GOTO "goto (T_GOTO)"
%token T_FUNCTION "function (T_FUNCTION)"
%token T_CONST "const (T_CONST)"
%token T_RETURN "return (T_RETURN)"
%token T_TRY "try (T_TRY)"
%token T_CATCH "catch (T_CATCH)"
%token T_THROW "throw (T_THROW)"
%token T_USE "use (T_USE)"
%token T_INSTEADOF "insteadof (T_INSTEADOF)"
%token T_GLOBAL "global (T_GLOBAL)"
%right T_STATIC T_ABSTRACT T_FINAL T_PRIVATE T_PROTECTED T_PUBLIC
%token T_VAR
%token T_UNSET
%token T_ISSET
%token T_EMPTY
%token T_HALT_COMPILER
%token T_CLASS
%token T_TRAIT
%token T_INTERFACE
%token T_EXTENDS
%token T_IMPLEMENTS
%token T_OBJECT_OPERATOR
%token T_DOUBLE_ARROW
%token T_LIST
%token T_ARRAY
%token T_CLASS_C
%token T_METHOD_C
%token T_FUNC_C
%token T_LINE
%token T_FILE
%token T_COMMENT
%token T_DOC_COMMENT
%token T_OPEN_TAG
%token T_OPEN_TAG_WITH_ECHO
%token T_CLOSE_TAG
%token T_WHITESPACE
%token T_START_HEREDOC
%token T_END_HEREDOC
%token T_DOLLAR_OPEN_CURLY_BRACES
%token T_CURLY_OPEN
%token T_PAAMAYIM_NEKUDOTAYIM
%token T_NAMESPACE
%token T_NS_C
%token T_DIR
%token T_NS_SEPARATOR
%token T_STATIC "static (T_STATIC)"
%token T_ABSTRACT "abstract (T_ABSTRACT)"
%token T_FINAL "final (T_FINAL)"
%token T_PRIVATE "private (T_PRIVATE)"
%token T_PROTECTED "protected (T_PROTECTED)"
%token T_PUBLIC "public (T_PUBLIC)"
%token T_VAR "var (T_VAR)"
%token T_UNSET "unset (T_UNSET)"
%token T_ISSET "isset (T_ISSET)"
%token T_EMPTY "empty (T_EMPTY)"
%token T_HALT_COMPILER "__halt_compiler (T_HALT_COMPILER)"
%token T_CLASS "class (T_CLASS)"
%token T_TRAIT "trait (T_TRAIT)"
%token T_INTERFACE "interface (T_INTERFACE)"
%token T_EXTENDS "extends (T_EXTENDS)"
%token T_IMPLEMENTS "implements (T_IMPLEMENTS)"
%token T_OBJECT_OPERATOR "-> (T_OBJECT_OPERATOR)"
%token T_DOUBLE_ARROW "=> (T_DOUBLE_ARROW)"
%token T_LIST "list (T_LIST)"
%token T_ARRAY "array (T_ARRAY)"
%token T_CLASS_C "__CLASS__ (T_CLASS_C)"
%token T_METHOD_C "__METHOD__ (T_METHOD_C)"
%token T_FUNC_C "__FUNCTION__ (T_FUNC_C)"
%token T_LINE "__LINE__ (T_LINE)"
%token T_FILE "__FILE__ (T_FILE)"
%token T_COMMENT "comment (T_COMMENT)"
%token T_DOC_COMMENT "doc comment (T_DOC_COMMENT)"
%token T_OPEN_TAG "open tag (T_OPEN_TAG)"
%token T_OPEN_TAG_WITH_ECHO "open tag with echo (T_OPEN_TAG_WITH_ECHO)"
%token T_CLOSE_TAG "close tag (T_CLOSE_TAG)"
%token T_WHITESPACE "whitespace (T_WHITESPACE)"
%token T_START_HEREDOC "heredoc start (T_START_HEREDOC)"
%token T_END_HEREDOC "heredoc end (T_END_HEREDOC)"
%token T_DOLLAR_OPEN_CURLY_BRACES "${ (T_DOLLAR_OPEN_CURLY_BRACES)"
%token T_CURLY_OPEN "{$ (T_CURLY_OPEN)"
%token T_PAAMAYIM_NEKUDOTAYIM ":: (T_PAAMAYIM_NEKUDOTAYIM)"
%token T_NAMESPACE "namespace (T_NAMESPACE)"
%token T_NS_C "__NAMESPACE__ (T_NS_C)"
%token T_DIR "__DIR__ (T_DIR)"
%token T_NS_SEPARATOR "\\ (T_NS_SEPARATOR)"
%% /* Rules */
@ -1079,6 +1135,70 @@ class_constant:
%%
/* Copy to YYRES the contents of YYSTR after stripping away unnecessary
quotes and backslashes, so that it's suitable for yyerror. The
heuristic is that double-quoting is unnecessary unless the string
contains an apostrophe, a comma, or backslash (other than
backslash-backslash). YYSTR is taken from yytname. If YYRES is
null, do not copy; instead, return the length of what the result
would have been. */
static YYSIZE_T zend_yytnamerr(char *yyres, const char *yystr)
{
if (!yyres) {
return yystrlen(yystr);
}
{
TSRMLS_FETCH();
if (CG(parse_error) == 0) {
char buffer[120], *end, *str, *tok1 = NULL, *tok2 = NULL;
unsigned int len = 0, toklen = 0, yystr_len;
CG(parse_error) = 1;
if (LANG_SCNG(yy_text)[0] == 0 &&
LANG_SCNG(yy_leng) == 1 &&
memcmp(yystr, ZEND_STRL("\"end of file\"")) == 0) {
return yystpcpy(yyres, "end of file") - yyres;
}
str = LANG_SCNG(yy_text);
end = memchr(str, '\n', LANG_SCNG(yy_leng));
yystr_len = yystrlen(yystr);
if ((tok1 = memchr(yystr, '(', yystr_len)) != NULL
&& (tok2 = zend_memrchr(yystr, ')', yystr_len)) != NULL) {
toklen = (tok2 - tok1) + 1;
} else {
tok1 = tok2 = NULL;
toklen = 0;
}
if (end == NULL) {
len = LANG_SCNG(yy_leng) > 30 ? 30 : LANG_SCNG(yy_leng);
} else {
len = (end - str) > 30 ? 30 : (end - str);
}
if (toklen) {
snprintf(buffer, sizeof(buffer), "'%.*s' %.*s", len, str, toklen, tok1);
} else {
snprintf(buffer, sizeof(buffer), "'%.*s'", len, str);
}
return yystpcpy(yyres, buffer) - yyres;
}
}
if (*yystr == '"') {
YYSIZE_T yyn = 0;
const char *yyp = yystr;
for (; *++yyp != '"'; ++yyn) {
yyres[yyn] = *yyp;
}
yyres[yyn] = '\0';
return yyn;
}
return yystpcpy(yyres, yystr) - yyres;
}
/*
* Local variables:
* tab-width: 4

File diff suppressed because it is too large Load diff

View file

@ -174,6 +174,7 @@ static void yy_scan_buffer(char *str, unsigned int len TSRMLS_DC)
void startup_scanner(TSRMLS_D)
{
CG(parse_error) = 0;
CG(heredoc) = NULL;
CG(heredoc_len) = 0;
CG(doc_comment) = NULL;
@ -187,6 +188,7 @@ void shutdown_scanner(TSRMLS_D)
efree(CG(heredoc));
CG(heredoc_len)=0;
}
CG(parse_error) = 0;
zend_stack_destroy(&SCNG(state_stack));
RESET_DOC_COMMENT();
}

View file

@ -1,4 +1,4 @@
/* Generated by re2c 0.13.5 on Thu Jun 2 17:34:19 2011 */
/* Generated by re2c 0.13.5 on Thu Jun 23 19:47:38 2011 */
#line 3 "Zend/zend_language_scanner_defs.h"
enum YYCONDTYPE {

View file

@ -990,7 +990,7 @@ static void php_error_cb(int type, const char *error_filename, const uint error_
char *append_string = INI_STR("error_append_string");
if (PG(html_errors)) {
if (type == E_ERROR) {
if (type == E_ERROR || type == E_PARSE) {
size_t len;
char *buf = php_escape_html_entities(buffer, buffer_len, &len, 0, ENT_COMPAT, NULL TSRMLS_CC);
php_printf("%s<br />\n<b>%s</b>: %s in <b>%s</b> on line <b>%d</b><br />\n%s", STR_PRINT(prepend_string), error_type_str, buf, error_filename, error_lineno, STR_PRINT(append_string));
@ -1061,7 +1061,9 @@ static void php_error_cb(int type, const char *error_filename, const uint error_
sapi_header_op(SAPI_HEADER_REPLACE, &ctr TSRMLS_CC);
}
/* the parser would return 1 (failure), we can bail out nicely */
if (type != E_PARSE) {
if (type == E_PARSE) {
CG(parse_error) = 0;
} else {
/* restore memory limit */
zend_set_memory_limit(PG(memory_limit));
efree(buffer);