implement context sensitive language with lexical feedback

The implementation has no regression risks, has an even smaller footprint
compared to the previous attempt involving a pure lexical approach, is higly
predictable and higly configurable.

To turn a word semi-reserved you only need to edit the "SEMI_RESERVED" parser rule,
it's an inclusive list of all the words that should be matched as T_STRING on specific contexts.
Example:

```
method_modifiers function returns_ref indentifier '(' parameter_list ')' ...
```

instead of:

```
method_modifiers function returns_ref T_STRING '(' parameter_list ')' ...
```

TODO: port ext tokenizer
This commit is contained in:
Márcio Almada 2015-03-09 02:24:50 -03:00
parent a54e1237ec
commit a75decd452
2 changed files with 63 additions and 17 deletions

View file

@ -35,6 +35,7 @@
#include "zend_globals.h"
#include "zend_API.h"
#include "zend_constants.h"
#include "zend_language_scanner_defs.h"
#define YYSIZE_T size_t
#define yytnamerr zend_yytnamerr
@ -48,6 +49,12 @@ static YYSIZE_T zend_yytnamerr(char*, const char*);
#define YYFREE free
#endif
#define REWIND { \
zend_stack_push(&LANG_SCNG(state_stack), (void *) &LANG_SCNG(yy_state)); \
LANG_SCNG(yy_state) = yycST_LOOKING_FOR_SEMI_RESERVED_NAME; \
LANG_SCNG(yy_cursor) = (unsigned char*)LANG_SCNG(yy_text); \
LANG_SCNG(yy_leng) = 0; }
%}
%pure_parser
@ -243,7 +250,7 @@ static YYSIZE_T zend_yytnamerr(char*, const char*);
%type <ast> absolute_trait_method_reference trait_method_reference property echo_expr
%type <ast> new_expr anonymous_class class_name class_name_reference simple_variable
%type <ast> internal_functions_in_yacc
%type <ast> exit_expr scalar backticks_expr lexical_var function_call member_name
%type <ast> exit_expr scalar backticks_expr lexical_var function_call member_name property_name
%type <ast> variable_class_name dereferencable_scalar class_name_scalar constant dereferencable
%type <ast> callable_expr callable_variable static_member new_variable
%type <ast> assignment_list_element array_pair encaps_var encaps_var_offset isset_variables
@ -252,10 +259,11 @@ static YYSIZE_T zend_yytnamerr(char*, const char*);
%type <ast> echo_expr_list unset_variables catch_list parameter_list class_statement_list
%type <ast> implements_list case_list if_stmt_without_else
%type <ast> non_empty_parameter_list argument_list non_empty_argument_list property_list
%type <ast> class_const_list name_list trait_adaptations method_body non_empty_for_exprs
%type <ast> class_const_list class_const_decl name_list trait_adaptations method_body non_empty_for_exprs
%type <ast> ctor_arguments alt_if_stmt_without_else trait_adaptation_list lexical_vars
%type <ast> lexical_var_list encaps_list array_pair_list non_empty_array_pair_list
%type <ast> assignment_list isset_variable type return_type
%type <ast> identifier
%type <num> returns_ref function is_reference is_variadic variable_modifiers
%type <num> method_modifiers trait_modifiers non_empty_member_modifiers member_modifier
@ -269,6 +277,22 @@ start:
top_statement_list { CG(ast) = $1; }
;
semi_reserved:
T_INCLUDE | T_INCLUDE_ONCE | T_EVAL | T_REQUIRE | T_REQUIRE_ONCE | T_LOGICAL_OR | T_LOGICAL_XOR | T_LOGICAL_AND
| T_INSTANCEOF | T_NEW | T_CLONE | T_EXIT | T_IF | T_ELSEIF | T_ELSE | T_ENDIF | T_ECHO | T_DO | T_WHILE | T_ENDWHILE
| T_FOR | T_ENDFOR | T_FOREACH | T_ENDFOREACH | T_DECLARE | T_ENDDECLARE | T_AS | T_TRY | T_CATCH | T_FINALLY
| T_THROW | T_USE | T_INSTEADOF | T_GLOBAL | T_VAR | T_UNSET | T_ISSET | T_EMPTY | T_CONTINUE | T_GOTO
| T_FUNCTION | T_CONST | T_RETURN | T_PRINT | T_YIELD | T_LIST | T_SWITCH | T_ENDSWITCH | T_CASE | T_DEFAULT | T_BREAK
| T_ARRAY | T_CALLABLE | T_EXTENDS | T_IMPLEMENTS | T_NAMESPACE | T_TRAIT | T_INTERFACE
// | T_STATIC | T_ABSTRACT | T_FINAL | T_PRIVATE | T_PROTECTED | T_PUBLIC
// | T_CLASS
;
identifier:
T_STRING { $$ = $1; }
| /* if */ semi_reserved { REWIND } /* and rematch as */ T_STRING { $$ = $3; }
;
top_statement_list:
top_statement_list top_statement { $$ = zend_ast_list_add($1, $2); }
| /* empty */ { $$ = zend_ast_create_list(0, ZEND_AST_STMT_LIST); }
@ -673,7 +697,7 @@ class_statement:
{ $$ = $2; RESET_DOC_COMMENT(); }
| T_USE name_list trait_adaptations
{ $$ = zend_ast_create(ZEND_AST_USE_TRAIT, $2, $3); }
| method_modifiers function returns_ref T_STRING '(' parameter_list ')'
| method_modifiers function returns_ref identifier '(' parameter_list ')'
return_type backup_doc_comment method_body
{ $$ = zend_ast_create_decl(ZEND_AST_METHOD, $3 | $1, $2, $9,
zend_ast_get_str($4), $6, NULL, $10, $8); }
@ -708,20 +732,20 @@ trait_precedence:
;
trait_alias:
trait_method_reference T_AS trait_modifiers T_STRING
trait_method_reference T_AS trait_modifiers identifier
{ $$ = zend_ast_create_ex(ZEND_AST_TRAIT_ALIAS, $3, $1, $4); }
| trait_method_reference T_AS member_modifier
{ $$ = zend_ast_create_ex(ZEND_AST_TRAIT_ALIAS, $3, $1, NULL); }
;
trait_method_reference:
T_STRING
identifier
{ $$ = zend_ast_create(ZEND_AST_METHOD_REFERENCE, NULL, $1); }
| absolute_trait_method_reference { $$ = $1; }
;
absolute_trait_method_reference:
name T_PAAMAYIM_NEKUDOTAYIM T_STRING
name T_PAAMAYIM_NEKUDOTAYIM identifier
{ $$ = zend_ast_create(ZEND_AST_METHOD_REFERENCE, $1, $3); }
;
@ -773,8 +797,12 @@ property:
;
class_const_list:
class_const_list ',' const_decl { $$ = zend_ast_list_add($1, $3); }
| const_decl { $$ = zend_ast_create_list(1, ZEND_AST_CLASS_CONST_DECL, $1); }
class_const_list ',' class_const_decl { $$ = zend_ast_list_add($1, $3); }
| class_const_decl { $$ = zend_ast_create_list(1, ZEND_AST_CLASS_CONST_DECL, $1); }
;
class_const_decl:
identifier '=' expr { $$ = zend_ast_create(ZEND_AST_CONST_ELEM, $1, $3); }
;
const_decl:
@ -1034,9 +1062,9 @@ scalar:
constant:
name { $$ = zend_ast_create(ZEND_AST_CONST, $1); }
| class_name T_PAAMAYIM_NEKUDOTAYIM T_STRING
| class_name T_PAAMAYIM_NEKUDOTAYIM identifier
{ $$ = zend_ast_create(ZEND_AST_CLASS_CONST, $1, $3); }
| variable_class_name T_PAAMAYIM_NEKUDOTAYIM T_STRING
| variable_class_name T_PAAMAYIM_NEKUDOTAYIM identifier
{ $$ = zend_ast_create(ZEND_AST_CLASS_CONST, $1, $3); }
;
@ -1080,7 +1108,7 @@ callable_variable:
{ $$ = zend_ast_create(ZEND_AST_DIM, $1, $3); }
| dereferencable '{' expr '}'
{ $$ = zend_ast_create(ZEND_AST_DIM, $1, $3); }
| dereferencable T_OBJECT_OPERATOR member_name argument_list
| dereferencable T_OBJECT_OPERATOR property_name argument_list
{ $$ = zend_ast_create(ZEND_AST_METHOD_CALL, $1, $3, $4); }
| function_call { $$ = $1; }
;
@ -1090,7 +1118,7 @@ variable:
{ $$ = $1; }
| static_member
{ $$ = $1; }
| dereferencable T_OBJECT_OPERATOR member_name
| dereferencable T_OBJECT_OPERATOR property_name
{ $$ = zend_ast_create(ZEND_AST_PROP, $1, $3); }
;
@ -1114,7 +1142,7 @@ new_variable:
{ $$ = zend_ast_create(ZEND_AST_DIM, $1, $3); }
| new_variable '{' expr '}'
{ $$ = zend_ast_create(ZEND_AST_DIM, $1, $3); }
| new_variable T_OBJECT_OPERATOR member_name
| new_variable T_OBJECT_OPERATOR property_name
{ $$ = zend_ast_create(ZEND_AST_PROP, $1, $3); }
| class_name T_PAAMAYIM_NEKUDOTAYIM simple_variable
{ $$ = zend_ast_create(ZEND_AST_STATIC_PROP, $1, $3); }
@ -1123,7 +1151,13 @@ new_variable:
;
member_name:
T_STRING { $$ = $1; }
identifier { $$ = $1; }
| '{' expr '}' { $$ = $2; }
| simple_variable { $$ = zend_ast_create(ZEND_AST_VAR, $1); }
;
property_name:
T_STRING { $$ = $1; }
| '{' expr '}' { $$ = $2; }
| simple_variable { $$ = zend_ast_create(ZEND_AST_VAR, $1); }
;

View file

@ -1271,7 +1271,7 @@ NEWLINE ("\r"|"\n"|"\r\n")
return T_OBJECT_OPERATOR;
}
<ST_IN_SCRIPTING,ST_LOOKING_FOR_PROPERTY>{WHITESPACE}+ {
<ST_IN_SCRIPTING,ST_LOOKING_FOR_PROPERTY,ST_LOOKING_FOR_SEMI_RESERVED_NAME>{WHITESPACE}+ {
HANDLE_NEWLINES(yytext, yyleng);
return T_WHITESPACE;
}
@ -1875,7 +1875,7 @@ inline_char_handler:
}
<ST_IN_SCRIPTING>"#"|"//" {
<ST_IN_SCRIPTING,ST_LOOKING_FOR_SEMI_RESERVED_NAME>"#"|"//" {
while (YYCURSOR < YYLIMIT) {
switch (*YYCURSOR++) {
case '\r':
@ -1904,7 +1904,7 @@ inline_char_handler:
return T_COMMENT;
}
<ST_IN_SCRIPTING>"/*"|"/**"{WHITESPACE} {
<ST_IN_SCRIPTING,ST_LOOKING_FOR_SEMI_RESERVED_NAME>"/*"|"/**"{WHITESPACE} {
int doc_com;
if (yyleng > 2) {
@ -1937,6 +1937,18 @@ inline_char_handler:
return T_COMMENT;
}
<ST_LOOKING_FOR_SEMI_RESERVED_NAME>{LABEL} {
zend_copy_value(zendlval, yytext, yyleng);
yy_pop_state();
return T_STRING;
}
<ST_LOOKING_FOR_SEMI_RESERVED_NAME>{ANY_CHAR} {
yyless(0);
yy_pop_state();
goto restart;
}
<ST_IN_SCRIPTING>"?>"{NEWLINE}? {
BEGIN(INITIAL);
return T_CLOSE_TAG; /* implicit ';' at php-end tag */