Add proper EOF handling for language scanner. Fixes bug #46817. Removes dos newlines from a previous commit.

This commit is contained in:
Brian Shire 2009-03-11 22:11:41 +00:00
parent 350da73bbc
commit b7ff76c6da
5 changed files with 4859 additions and 4616 deletions

File diff suppressed because it is too large Load diff

View file

@ -48,7 +48,7 @@
#include "tsrm_config_common.h" #include "tsrm_config_common.h"
#define YYCTYPE unsigned char #define YYCTYPE unsigned char
#define YYFILL(n) { if (YYCURSOR >= YYLIMIT) return 0; } #define YYFILL(n) { if ((YYCURSOR + n) >= (YYLIMIT + ZEND_MMAP_AHEAD)) { zend_error(E_COMPILE_ERROR, "Exceeded YYLIMIT bounds during scanning. Please report this."); return 0; } }
#define YYCURSOR SCNG(yy_cursor) #define YYCURSOR SCNG(yy_cursor)
#define YYLIMIT SCNG(yy_limit) #define YYLIMIT SCNG(yy_limit)
#define YYMARKER SCNG(yy_marker) #define YYMARKER SCNG(yy_marker)
@ -1327,15 +1327,6 @@ restart:
yymore_restart: yymore_restart:
/* detect EOF */
if (YYCURSOR >= YYLIMIT) {
/* special case */
if (YYSTATE == STATE(ST_COMMENT) || YYSTATE == STATE(ST_DOC_COMMENT)) {
zend_error(E_COMPILE_WARNING, "Unterminated comment starting line %d", CG(zend_lineno));
}
return 0;
}
/*!re2c /*!re2c
re2c:yyfill:check = 0; re2c:yyfill:check = 0;
LNUM [0-9]+ LNUM [0-9]+
@ -1346,17 +1337,18 @@ LABEL [a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*
WHITESPACE [ \n\r\t]+ WHITESPACE [ \n\r\t]+
TABS_AND_SPACES [ \t]* TABS_AND_SPACES [ \t]*
TOKENS [;:,.\[\]()|^&+-/*=%!~$<>?@] TOKENS [;:,.\[\]()|^&+-/*=%!~$<>?@]
ANY_CHAR [^] ANY_CHAR [^\x00]
NEWLINE ("\r"|"\n"|"\r\n") NEWLINE ("\r"|"\n"|"\r\n")
NULL [\x00]{1}
/* /*
* LITERAL_DOLLAR matches unescaped $ that aren't followed by a label character * LITERAL_DOLLAR matches unescaped $ that aren't followed by a label character
* or a { and therefore will be taken literally. The case of literal $ before * or a { and therefore will be taken literally. The case of literal $ before
* a variable or "${" is handled in a rule for each string type * a variable or "${" is handled in a rule for each string type
*/ */
DOUBLE_QUOTES_LITERAL_DOLLAR ("$"+([^a-zA-Z_\x7f-\xff$"\\{]|("\\"{ANY_CHAR}))) DOUBLE_QUOTES_LITERAL_DOLLAR ("$"+([^a-zA-Z_\x7f-\xff$"\\{\x00]|("\\"{ANY_CHAR})))
BACKQUOTE_LITERAL_DOLLAR ("$"+([^a-zA-Z_\x7f-\xff$`\\{]|("\\"{ANY_CHAR}))) BACKQUOTE_LITERAL_DOLLAR ("$"+([^a-zA-Z_\x7f-\xff$`\\{\x00]|("\\"{ANY_CHAR})))
HEREDOC_LITERAL_DOLLAR ("$"+([^a-zA-Z_\x7f-\xff$\n\r\\{]|("\\"[^\n\r]))) HEREDOC_LITERAL_DOLLAR ("$"+([^a-zA-Z_\x7f-\xff$\n\r\\{\x00]|("\\"[^\n\r\x00])))
/* /*
* Usually, HEREDOC_NEWLINE will just function like a simple NEWLINE, but some * Usually, HEREDOC_NEWLINE will just function like a simple NEWLINE, but some
@ -1373,7 +1365,7 @@ HEREDOC_NEWLINE ((({LABEL}";"?((("{"+|"$"+)"\\"?)|"\\"))|(("{"*|"$"*)"\\"?)){NEW
* This pattern is just used in the next 2 for matching { or literal $, and/or * This pattern is just used in the next 2 for matching { or literal $, and/or
* \ escape sequence immediately at the beginning of a line or after a label * \ escape sequence immediately at the beginning of a line or after a label
*/ */
HEREDOC_CURLY_OR_ESCAPE_OR_DOLLAR (("{"+[^$\n\r\\{])|("{"*"\\"[^\n\r])|{HEREDOC_LITERAL_DOLLAR}) HEREDOC_CURLY_OR_ESCAPE_OR_DOLLAR (("{"+[^$\n\r\\{\x00])|("{"*"\\"[^\n\r\x00])|{HEREDOC_LITERAL_DOLLAR})
/* /*
* These 2 label-related patterns allow HEREDOC_CHARS to continue "regular" * These 2 label-related patterns allow HEREDOC_CHARS to continue "regular"
@ -1382,12 +1374,12 @@ HEREDOC_CURLY_OR_ESCAPE_OR_DOLLAR (("{"+[^$\n\r\\{])|("{"*"\\"[^\n\r])|{HEREDOC_
* a variable or "{$" Matching a newline, and possibly label, up TO a variable * a variable or "{$" Matching a newline, and possibly label, up TO a variable
* or "{$", is handled in the heredoc rules * or "{$", is handled in the heredoc rules
* *
* The HEREDOC_LABEL_NO_NEWLINE pattern (";"[^$\n\r\\{]) handles cases where ; * The HEREDOC_LABEL_NO_NEWLINE pattern (";"[^$\n\r\\{\x00]) handles cases where ;
* follows a label. [^a-zA-Z0-9_\x7f-\xff;$\n\r\\{] is needed to prevent a label * follows a label. [^a-zA-Z0-9_\x7f-\xff;$\n\r\\{\x00] is needed to prevent a label
* character or ; from matching on a possible (real) ending label * character or ; from matching on a possible (real) ending label
*/ */
HEREDOC_NON_LABEL ([^a-zA-Z_\x7f-\xff$\n\r\\{]|{HEREDOC_CURLY_OR_ESCAPE_OR_DOLLAR}) HEREDOC_NON_LABEL ([^a-zA-Z_\x7f-\xff$\n\r\\{\x00]|{HEREDOC_CURLY_OR_ESCAPE_OR_DOLLAR})
HEREDOC_LABEL_NO_NEWLINE ({LABEL}([^a-zA-Z0-9_\x7f-\xff;$\n\r\\{]|(";"[^$\n\r\\{])|(";"?{HEREDOC_CURLY_OR_ESCAPE_OR_DOLLAR}))) HEREDOC_LABEL_NO_NEWLINE ({LABEL}([^a-zA-Z0-9_\x7f-\xff;$\n\r\\{\x00]|(";"[^$\n\r\\{\x00])|(";"?{HEREDOC_CURLY_OR_ESCAPE_OR_DOLLAR})))
/* /*
* CHARS matches everything up to a variable or "{$" * CHARS matches everything up to a variable or "{$"
@ -1397,11 +1389,11 @@ HEREDOC_LABEL_NO_NEWLINE ({LABEL}([^a-zA-Z0-9_\x7f-\xff;$\n\r\\{]|(";"[^$\n\r\\{
* For heredocs, matching continues across/after newlines if/when it's known * For heredocs, matching continues across/after newlines if/when it's known
* that the next line doesn't contain a possible ending label * that the next line doesn't contain a possible ending label
*/ */
DOUBLE_QUOTES_CHARS ("{"*([^$"\\{]|("\\"{ANY_CHAR}))|{DOUBLE_QUOTES_LITERAL_DOLLAR}) DOUBLE_QUOTES_CHARS ("{"*([^$"\\{\x00]|("\\"{ANY_CHAR}))|{DOUBLE_QUOTES_LITERAL_DOLLAR})
BACKQUOTE_CHARS ("{"*([^$`\\{]|("\\"{ANY_CHAR}))|{BACKQUOTE_LITERAL_DOLLAR}) BACKQUOTE_CHARS ("{"*([^$`\\{\x00]|("\\"{ANY_CHAR}))|{BACKQUOTE_LITERAL_DOLLAR})
HEREDOC_CHARS ("{"*([^$\n\r\\{]|("\\"[^\n\r]))|{HEREDOC_LITERAL_DOLLAR}|({HEREDOC_NEWLINE}+({HEREDOC_NON_LABEL}|{HEREDOC_LABEL_NO_NEWLINE}))) HEREDOC_CHARS ("{"*([^$\n\r\\{\x00]|("\\"[^\n\r\x00]))|{HEREDOC_LITERAL_DOLLAR}|({HEREDOC_NEWLINE}+({HEREDOC_NON_LABEL}|{HEREDOC_LABEL_NO_NEWLINE})))
NOWDOC_CHARS ([^\n\r]|{NEWLINE}+([^a-zA-Z_\x7f-\xff\n\r]|({LABEL}([^a-zA-Z0-9_\x7f-\xff;\n\r]|(";"[^\n\r]))))) NOWDOC_CHARS ([^\n\r\x00]|{NEWLINE}+([^a-zA-Z_\x7f-\xff\n\r\x00]|({LABEL}([^a-zA-Z0-9_\x7f-\xff;\n\r\x00]|(";"[^\n\r\x00])))))
/* compute yyleng before each rule */ /* compute yyleng before each rule */
<!*> := yyleng = YYCURSOR - SCNG(yy_text); <!*> := yyleng = YYCURSOR - SCNG(yy_text);
@ -2262,7 +2254,7 @@ inline_char_handler:
yymore(); yymore();
} }
<ST_ONE_LINE_COMMENT>[^\n\r?%>]*{ANY_CHAR} { <ST_ONE_LINE_COMMENT>[^\n\r?%>\x00]*{ANY_CHAR} {
switch (yytext[yyleng-1]) { switch (yytext[yyleng-1]) {
case '?': case '%': case '>': case '?': case '%': case '>':
yyless(yyleng-1); yyless(yyleng-1);
@ -2308,13 +2300,18 @@ inline_char_handler:
yymore(); yymore();
} }
<ST_COMMENT,ST_DOC_COMMENT>{NULL} {
zend_error(E_COMPILE_WARNING, "Unterminated comment starting line %d", CG(zend_lineno));
return 0;
}
<ST_IN_SCRIPTING>"/*" { <ST_IN_SCRIPTING>"/*" {
BEGIN(ST_COMMENT); BEGIN(ST_COMMENT);
yymore(); yymore();
} }
<ST_COMMENT,ST_DOC_COMMENT>[^*]+ { <ST_COMMENT,ST_DOC_COMMENT>[^*\x00]+ {
yymore(); yymore();
} }
@ -2657,6 +2654,7 @@ inline_char_handler:
return '`'; return '`';
} }
<*>{NULL} { return 0; } /* EOF */
<ST_IN_SCRIPTING,ST_VAR_OFFSET>{ANY_CHAR} { <ST_IN_SCRIPTING,ST_VAR_OFFSET>{ANY_CHAR} {
zend_error(E_COMPILE_WARNING,"Unexpected character in input: '%c' (ASCII=%d) state=%d", yytext[0], yytext[0], YYSTATE); zend_error(E_COMPILE_WARNING,"Unexpected character in input: '%c' (ASCII=%d) state=%d", yytext[0], yytext[0], YYSTATE);

View file

@ -1,5 +1,5 @@
/* Generated by re2c 0.13.5 on Fri Jan 9 12:18:37 2009 */ /* Generated by re2c 0.13.5 on Wed Mar 11 14:39:25 2009 */
#line 3 "./zend_language_scanner_defs.h" #line 3 "Zend/zend_language_scanner_defs.h"
enum YYCONDTYPE { enum YYCONDTYPE {
yycST_IN_SCRIPTING, yycST_IN_SCRIPTING,

View file

@ -29,7 +29,7 @@ typedef size_t (*zend_stream_fsizer_t)(void* handle TSRMLS_DC);
typedef size_t (*zend_stream_reader_t)(void* handle, char *buf, size_t len TSRMLS_DC); typedef size_t (*zend_stream_reader_t)(void* handle, char *buf, size_t len TSRMLS_DC);
typedef void (*zend_stream_closer_t)(void* handle TSRMLS_DC); typedef void (*zend_stream_closer_t)(void* handle TSRMLS_DC);
#define ZEND_MMAP_AHEAD 16 #define ZEND_MMAP_AHEAD 32
typedef enum { typedef enum {
ZEND_HANDLE_FILENAME, ZEND_HANDLE_FILENAME,

View file

@ -49,7 +49,7 @@ bool(false)
</span> </span>
</code>bool(true) </code>bool(true)
<code><span style="color: #000000"> <code><span style="color: #000000">
<span style="color: #0000BB">&lt;?php&nbsp;</span><span style="color: #007700">echo&nbsp;</span><span style="color: #FF9900">"test&nbsp;?&gt;</span> <span style="color: #0000BB">&lt;?php&nbsp;</span><span style="color: #007700">echo&nbsp;</span><span style="color: #DD0000">"test&nbsp;?&gt;</span>
</span> </span>
</code>bool(true) </code>bool(true)
<code><span style="color: #000000"> <code><span style="color: #000000">