MFH: Implemented manual scanning for strings/comments, plus misc. fixes

This commit is contained in:
Matt Wilmas 2009-05-05 01:35:44 +00:00
parent af442c0bde
commit 09034cf3f4
4 changed files with 386 additions and 311 deletions

2
NEWS
View file

@ -59,6 +59,8 @@ PHP NEWS
- Fixed bug #47038 (Memory leak in include). (Dmitry) - Fixed bug #47038 (Memory leak in include). (Dmitry)
- Fixed bug #47021 (SoapClient stumbles over WSDL delivered with - Fixed bug #47021 (SoapClient stumbles over WSDL delivered with
"Transfer-Encoding: chunked"). (Dmitry) "Transfer-Encoding: chunked"). (Dmitry)
- Fixed bug #46817 (tokenizer misses last single-line comment (PHP 5.3+, with
re2c lexer)). (Matt, Shire)
- Fixed bug #46108 (DateTime - Memory leak when unserializing). (Felipe) - Fixed bug #46108 (DateTime - Memory leak when unserializing). (Felipe)
- Fixed bug #44861 (scrollable cursor don't work with pgsql). (Matteo) - Fixed bug #44861 (scrollable cursor don't work with pgsql). (Matteo)
- Fixed bug #44409 (PDO::FETCH_SERIALIZE calls __construct()). (Matteo) - Fixed bug #44409 (PDO::FETCH_SERIALIZE calls __construct()). (Matteo)

View file

@ -142,14 +142,8 @@ ZEND_API void zend_highlight(zend_syntax_highlighter_ini *syntax_highlighter_ini
zend_printf("<span style=\"color: %s\">", last_color); zend_printf("<span style=\"color: %s\">", last_color);
} }
} }
switch (token_type) {
case T_END_HEREDOC: zend_html_puts(LANG_SCNG(yy_text), LANG_SCNG(yy_leng) TSRMLS_CC);
zend_html_puts(token.value.str.val, token.value.str.len TSRMLS_CC);
break;
default:
zend_html_puts(LANG_SCNG(yy_text), LANG_SCNG(yy_leng) TSRMLS_CC);
break;
}
if (token.type == IS_STRING) { if (token.type == IS_STRING) {
switch (token_type) { switch (token_type) {
@ -170,19 +164,6 @@ ZEND_API void zend_highlight(zend_syntax_highlighter_ini *syntax_highlighter_ini
token.type = 0; token.type = 0;
} }
/* handler for trailing comments, see bug #42767 */
if (LANG_SCNG(yy_leng) && LANG_SCNG(yy_text) < LANG_SCNG(yy_limit)) {
if (last_color != syntax_highlighter_ini->highlight_comment) {
if (last_color != syntax_highlighter_ini->highlight_html) {
zend_printf("</span>");
}
if (syntax_highlighter_ini->highlight_comment != syntax_highlighter_ini->highlight_html) {
zend_printf("<span style=\"color: %s\">", syntax_highlighter_ini->highlight_comment);
}
}
zend_html_puts(LANG_SCNG(yy_text), (LANG_SCNG(yy_limit) - LANG_SCNG(yy_text)) TSRMLS_CC);
}
if (last_color != syntax_highlighter_ini->highlight_html) { if (last_color != syntax_highlighter_ini->highlight_html) {
zend_printf("</span>\n"); zend_printf("</span>\n");
} }

View file

@ -109,6 +109,12 @@ do { \
} \ } \
} }
/* To save initial string length after scanning to first variable, CG(doc_comment_len) can be reused */
#define SET_DOUBLE_QUOTES_SCANNED_LENGTH(len) CG(doc_comment_len) = (len)
#define GET_DOUBLE_QUOTES_SCANNED_LENGTH() CG(doc_comment_len)
#define IS_LABEL_START(c) (((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z') || (c) == '_' || (c) >= 0x7F)
#define ZEND_IS_OCT(c) ((c)>='0' && (c)<='7') #define ZEND_IS_OCT(c) ((c)>='0' && (c)<='7')
#define ZEND_IS_HEX(c) (((c)>='0' && (c)<='9') || ((c)>='a' && (c)<='f') || ((c)>='A' && (c)<='F')) #define ZEND_IS_HEX(c) (((c)>='0' && (c)<='9') || ((c)>='a' && (c)<='f') || ((c)>='A' && (c)<='F'))
@ -835,63 +841,8 @@ LABEL [a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*
WHITESPACE [ \n\r\t]+ WHITESPACE [ \n\r\t]+
TABS_AND_SPACES [ \t]* TABS_AND_SPACES [ \t]*
TOKENS [;:,.\[\]()|^&+-/*=%!~$<>?@] TOKENS [;:,.\[\]()|^&+-/*=%!~$<>?@]
ANY_CHAR [^\x00] ANY_CHAR [^]
NEWLINE ("\r"|"\n"|"\r\n") NEWLINE ("\r"|"\n"|"\r\n")
NULL [\x00]{1}
/*
* LITERAL_DOLLAR matches unescaped $ that aren't followed by a label character
* or a { and therefore will be taken literally. The case of literal $ before
* a variable or "${" is handled in a rule for each string type
*/
DOUBLE_QUOTES_LITERAL_DOLLAR ("$"+([^a-zA-Z_\x7f-\xff$"\\{]|("\\"{ANY_CHAR})))
BACKQUOTE_LITERAL_DOLLAR ("$"+([^a-zA-Z_\x7f-\xff$`\\{]|("\\"{ANY_CHAR})))
HEREDOC_LITERAL_DOLLAR ("$"+([^a-zA-Z_\x7f-\xff$\n\r\\{]|("\\"[^\n\r])))
/*
* Usually, HEREDOC_NEWLINE will just function like a simple NEWLINE, but some
* special cases need to be handled. HEREDOC_CHARS doesn't allow a line to
* match when { or $, and/or \ is at the end. (("{"*|"$"*)"\\"?) handles that,
* along with cases where { or $, and/or \ is the ONLY thing on a line
*
* The other case is when a line contains a label, followed by ONLY
* { or $, and/or \ Handled by ({LABEL}";"?((("{"+|"$"+)"\\"?)|"\\"))
*/
HEREDOC_NEWLINE ((({LABEL}";"?((("{"+|"$"+)"\\"?)|"\\"))|(("{"*|"$"*)"\\"?)){NEWLINE})
/*
* This pattern is just used in the next 2 for matching { or literal $, and/or
* \ escape sequence immediately at the beginning of a line or after a label
*/
HEREDOC_CURLY_OR_ESCAPE_OR_DOLLAR (("{"+[^$\n\r\\{])|("{"*"\\"[^\n\r])|{HEREDOC_LITERAL_DOLLAR})
/*
* These 2 label-related patterns allow HEREDOC_CHARS to continue "regular"
* matching after a newline that starts with either a non-label character or a
* label that isn't followed by a newline. Like HEREDOC_CHARS, they won't match
* a variable or "{$" Matching a newline, and possibly label, up TO a variable
* or "{$", is handled in the heredoc rules
*
* The HEREDOC_LABEL_NO_NEWLINE pattern (";"[^$\n\r\\{]) handles cases where ;
* follows a label. [^a-zA-Z0-9_\x7f-\xff;$\n\r\\{] is needed to prevent a label
* character or ; from matching on a possible (real) ending label
*/
HEREDOC_NON_LABEL ([^a-zA-Z_\x7f-\xff$\n\r\\{]|{HEREDOC_CURLY_OR_ESCAPE_OR_DOLLAR})
HEREDOC_LABEL_NO_NEWLINE ({LABEL}([^a-zA-Z0-9_\x7f-\xff;$\n\r\\{]|(";"[^$\n\r\\{])|(";"?{HEREDOC_CURLY_OR_ESCAPE_OR_DOLLAR})))
/*
* CHARS matches everything up to a variable or "{$"
* {'s are matched as long as they aren't followed by a $
* The case of { before "{$" is handled in a rule for each string type
*
* For heredocs, matching continues across/after newlines if/when it's known
* that the next line doesn't contain a possible ending label
*/
DOUBLE_QUOTES_CHARS ("{"*([^$"\\{]|("\\"{ANY_CHAR}))|{DOUBLE_QUOTES_LITERAL_DOLLAR})
BACKQUOTE_CHARS ("{"*([^$`\\{]|("\\"{ANY_CHAR}))|{BACKQUOTE_LITERAL_DOLLAR})
HEREDOC_CHARS ("{"*([^$\n\r\\{]|("\\"[^\n\r]))|{HEREDOC_LITERAL_DOLLAR}|({HEREDOC_NEWLINE}+({HEREDOC_NON_LABEL}|{HEREDOC_LABEL_NO_NEWLINE})))
NOWDOC_CHARS ([^\n\r]|{NEWLINE}+([^a-zA-Z_\x7f-\xff\n\r]|({LABEL}([^a-zA-Z0-9_\x7f-\xff;\n\r]|(";"[^\n\r])))))
/* compute yyleng before each rule */ /* compute yyleng before each rule */
<!*> := yyleng = YYCURSOR - SCNG(yy_text); <!*> := yyleng = YYCURSOR - SCNG(yy_text);
@ -1530,6 +1481,14 @@ NOWDOC_CHARS ([^\n\r]|{NEWLINE}+([^a-zA-Z_\x7f-\xff\n\r]|({LABEL}([^a-zA-
} }
<INITIAL>"<script"{WHITESPACE}+"language"{WHITESPACE}*"="{WHITESPACE}*("php"|"\"php\""|"'php'"){WHITESPACE}*">" { <INITIAL>"<script"{WHITESPACE}+"language"{WHITESPACE}*"="{WHITESPACE}*("php"|"\"php\""|"'php'"){WHITESPACE}*">" {
YYCTYPE *bracket = zend_memrchr(yytext, '<', yyleng - (sizeof("script language=php>") - 1));
if (bracket != SCNG(yy_text)) {
/* Handle previously scanned HTML, as possible <script> tags found are assumed to not be PHP's */
YYCURSOR = bracket;
goto inline_html;
}
HANDLE_NEWLINES(yytext, yyleng); HANDLE_NEWLINES(yytext, yyleng);
zendlval->value.str.val = yytext; /* no copying - intentional */ zendlval->value.str.val = yytext; /* no copying - intentional */
zendlval->value.str.len = yyleng; zendlval->value.str.len = yyleng;
@ -1601,29 +1560,48 @@ NOWDOC_CHARS ([^\n\r]|{NEWLINE}+([^a-zA-Z_\x7f-\xff\n\r]|({LABEL}([^a-zA-
} }
<INITIAL>{ANY_CHAR} { <INITIAL>{ANY_CHAR} {
if (YYCURSOR > YYLIMIT) {
return 0;
}
inline_char_handler: inline_char_handler:
while (1) { while (1) {
YYCTYPE *ptr = memchr(YYCURSOR, '<', YYLIMIT - YYCURSOR); YYCTYPE *ptr = memchr(YYCURSOR, '<', YYLIMIT - YYCURSOR);
if (ptr == NULL) { YYCURSOR = ptr ? ptr + 1 : YYLIMIT;
YYCURSOR = YYLIMIT;
yyleng = YYCURSOR - SCNG(yy_text);
break;
} else { if (YYCURSOR < YYLIMIT) {
YYCURSOR = ptr + 1; switch (*YYCURSOR) {
case '?':
/* stop if it may be an opening tag (<?, <%, <script>). this condition is not optimal though */ if (CG(short_tags) || !strncasecmp(YYCURSOR + 1, "php", 3)) { /* Assume [ \t\n\r] follows "php" */
if (YYCURSOR < YYLIMIT && (*YYCURSOR == '?' || *YYCURSOR == '%' || *YYCURSOR == 's')) { break;
--YYCURSOR; }
yyleng = YYCURSOR - SCNG(yy_text); continue;
break; case '%':
if (CG(asp_tags)) {
break;
}
continue;
case 's':
case 'S':
/* Probably NOT an opening PHP <script> tag, so don't end the HTML chunk yet
* If it is, the PHP <script> tag rule checks for any HTML scanned before it */
YYCURSOR--;
yymore();
default:
continue;
} }
YYCURSOR--;
} }
break;
} }
inline_html:
yyleng = YYCURSOR - SCNG(yy_text);
#ifdef ZEND_MULTIBYTE #ifdef ZEND_MULTIBYTE
if (SCNG(output_filter)) { if (SCNG(output_filter)) {
int readsize; int readsize;
@ -1688,7 +1666,6 @@ inline_char_handler:
/* Invalid rule to return a more explicit parse error with proper line number */ /* Invalid rule to return a more explicit parse error with proper line number */
yyless(0); yyless(0);
yy_pop_state(TSRMLS_C); yy_pop_state(TSRMLS_C);
ZVAL_EMPTY_STRING(zendlval); /* Empty since it won't be used */
return T_ENCAPSED_AND_WHITESPACE; return T_ENCAPSED_AND_WHITESPACE;
} }
@ -1700,93 +1677,73 @@ inline_char_handler:
<ST_IN_SCRIPTING>"#"|"//" { <ST_IN_SCRIPTING>"#"|"//" {
BEGIN(ST_ONE_LINE_COMMENT); while (YYCURSOR < YYLIMIT) {
yymore(); switch (*YYCURSOR++) {
} case '\r':
if (*YYCURSOR == '\n') {
YYCURSOR++;
}
/* fall through */
case '\n':
CG(zend_lineno)++;
break;
case '%':
if (!CG(asp_tags)) {
continue;
}
/* fall through */
case '?':
if (*YYCURSOR == '>') {
YYCURSOR--;
break;
}
/* fall through */
default:
continue;
}
<ST_ONE_LINE_COMMENT>"?"|"%"|">" { break;
yymore();
}
<ST_ONE_LINE_COMMENT>[^\n\r?%>]*{ANY_CHAR} {
switch (yytext[yyleng-1]) {
case '?': case '%': case '>':
yyless(yyleng-1);
yymore();
break;
case '\n':
CG(zend_lineno)++;
/* intentional fall through */
default:
zendlval->value.str.val = yytext; /* no copying - intentional */
zendlval->value.str.len = yyleng;
zendlval->type = IS_STRING;
BEGIN(ST_IN_SCRIPTING);
return T_COMMENT;
} }
}
<ST_ONE_LINE_COMMENT>{NEWLINE} { yyleng = YYCURSOR - SCNG(yy_text);
zendlval->value.str.val = yytext; /* no copying - intentional */
zendlval->value.str.len = yyleng;
zendlval->type = IS_STRING;
BEGIN(ST_IN_SCRIPTING);
CG(zend_lineno)++;
return T_COMMENT; return T_COMMENT;
} }
<ST_ONE_LINE_COMMENT>"?>"|"%>" { <ST_IN_SCRIPTING>"/*"|"/**"{WHITESPACE} {
if (CG(asp_tags) || yytext[yyleng-2] != '%') { /* asp comment? */ int doc_com;
zendlval->value.str.val = yytext; /* no copying - intentional */
zendlval->value.str.len = yyleng-2; if (yyleng > 2) {
zendlval->type = IS_STRING; doc_com = 1;
yyless(yyleng - 2); RESET_DOC_COMMENT();
BEGIN(ST_IN_SCRIPTING);
return T_COMMENT;
} else { } else {
yymore(); doc_com = 0;
} }
}
<ST_IN_SCRIPTING>"/**"{WHITESPACE} { while (YYCURSOR < YYLIMIT) {
RESET_DOC_COMMENT(); if (*YYCURSOR++ == '*' && *YYCURSOR == '/') {
BEGIN(ST_DOC_COMMENT); break;
yymore(); }
} }
<ST_COMMENT,ST_DOC_COMMENT>{NULL} { if (YYCURSOR < YYLIMIT) {
zend_error(E_COMPILE_WARNING, "Unterminated comment starting line %d", CG(zend_lineno)); YYCURSOR++;
return 0; } else {
} zend_error(E_COMPILE_WARNING, "Unterminated comment starting line %d", CG(zend_lineno));
}
<ST_IN_SCRIPTING>"/*" { yyleng = YYCURSOR - SCNG(yy_text);
BEGIN(ST_COMMENT);
yymore();
}
<ST_COMMENT,ST_DOC_COMMENT>[^*]+ {
yymore();
}
<ST_DOC_COMMENT>"*/" {
CG(doc_comment) = estrndup(yytext, yyleng);
CG(doc_comment_len) = yyleng;
HANDLE_NEWLINES(yytext, yyleng); HANDLE_NEWLINES(yytext, yyleng);
BEGIN(ST_IN_SCRIPTING);
return T_DOC_COMMENT;
}
<ST_COMMENT>"*/" { if (doc_com) {
HANDLE_NEWLINES(yytext, yyleng); CG(doc_comment) = estrndup(yytext, yyleng);
BEGIN(ST_IN_SCRIPTING); CG(doc_comment_len) = yyleng;
return T_DOC_COMMENT;
}
return T_COMMENT; return T_COMMENT;
} }
<ST_COMMENT,ST_DOC_COMMENT>"*" {
yymore();
}
<ST_IN_SCRIPTING>("?>"|"</script"{WHITESPACE}*">"){NEWLINE}? { <ST_IN_SCRIPTING>("?>"|"</script"{WHITESPACE}*">"){NEWLINE}? {
zendlval->value.str.val = yytext; /* no copying - intentional */ zendlval->value.str.val = yytext; /* no copying - intentional */
zendlval->value.str.len = yyleng; zendlval->value.str.len = yyleng;
@ -1810,21 +1767,31 @@ inline_char_handler:
} }
/* ("{"*|"$"*) handles { or $ at the end of a string (or the entire contents) <ST_IN_SCRIPTING>b?['] {
*/
<ST_IN_SCRIPTING>(b?["]{DOUBLE_QUOTES_CHARS}*("{"*|"$"*)["]) {
int bprefix = (yytext[0] != '"') ? 1 : 0;
zend_scan_escape_string(zendlval, yytext+bprefix+1, yyleng-bprefix-2, '"' TSRMLS_CC);
return T_CONSTANT_ENCAPSED_STRING;
}
<ST_IN_SCRIPTING>(b?[']([^'\\]|("\\"{ANY_CHAR}))*[']) {
register char *s, *t; register char *s, *t;
char *end; char *end;
int bprefix = (yytext[0] != '\'') ? 1 : 0; int bprefix = (yytext[0] != '\'') ? 1 : 0;
while (1) {
if (YYCURSOR < YYLIMIT) {
if (*YYCURSOR == '\'') {
YYCURSOR++;
yyleng = YYCURSOR - SCNG(yy_text);
break;
} else if (*YYCURSOR++ == '\\' && YYCURSOR < YYLIMIT) {
YYCURSOR++;
}
} else {
yyleng = YYLIMIT - SCNG(yy_text);
/* Unclosed single quotes; treat similar to double quotes, but without a separate token
* for ' (unrecognized by parser), instead of old flex fallback to "Unexpected character..."
* rule, which continued in ST_IN_SCRIPTING state after the quote */
return T_ENCAPSED_AND_WHITESPACE;
}
}
zendlval->value.str.val = estrndup(yytext+bprefix+1, yyleng-bprefix-2); zendlval->value.str.val = estrndup(yytext+bprefix+1, yyleng-bprefix-2);
zendlval->value.str.len = yyleng-bprefix-2; zendlval->value.str.len = yyleng-bprefix-2;
zendlval->type = IS_STRING; zendlval->type = IS_STRING;
@ -1872,6 +1839,42 @@ inline_char_handler:
<ST_IN_SCRIPTING>b?["] { <ST_IN_SCRIPTING>b?["] {
int bprefix = (yytext[0] != '"') ? 1 : 0;
while (YYCURSOR < YYLIMIT) {
switch (*YYCURSOR++) {
case '"':
yyleng = YYCURSOR - SCNG(yy_text);
zend_scan_escape_string(zendlval, yytext+bprefix+1, yyleng-bprefix-2, '"' TSRMLS_CC);
return T_CONSTANT_ENCAPSED_STRING;
case '$':
if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
break;
}
continue;
case '{':
if (*YYCURSOR == '$') {
break;
}
continue;
case '\\':
if (YYCURSOR < YYLIMIT) {
YYCURSOR++;
}
/* fall through */
default:
continue;
}
YYCURSOR--;
break;
}
/* Remember how much was scanned to save rescanning */
SET_DOUBLE_QUOTES_SCANNED_LENGTH(YYCURSOR - SCNG(yy_text) - yyleng);
YYCURSOR = SCNG(yy_text) + yyleng;
BEGIN(ST_DOUBLE_QUOTES); BEGIN(ST_DOUBLE_QUOTES);
return '"'; return '"';
} }
@ -1911,7 +1914,7 @@ inline_char_handler:
/* Check for ending label on the next line */ /* Check for ending label on the next line */
if (CG(heredoc_len) < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, s, CG(heredoc_len))) { if (CG(heredoc_len) < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, s, CG(heredoc_len))) {
unsigned char *end = YYCURSOR + CG(heredoc_len); YYCTYPE *end = YYCURSOR + CG(heredoc_len);
if (*end == ';') { if (*end == ';') {
end++; end++;
@ -1932,49 +1935,6 @@ inline_char_handler:
} }
/* Match everything up to and including a possible ending label, so if the label
* doesn't match, it's kept with the rest of the string
*
* {HEREDOC_NEWLINE}+ handles the case of more than one newline sequence that
* couldn't be matched with HEREDOC_CHARS, because of the following label
*/
<ST_HEREDOC>{HEREDOC_CHARS}*{HEREDOC_NEWLINE}+{LABEL}";"?[\n\r] {
char *end = yytext + yyleng - 1;
if (end[-1] == ';') {
end--;
yyleng--;
}
if (yyleng > CG(heredoc_len) && !memcmp(end - CG(heredoc_len), CG(heredoc), CG(heredoc_len))) {
int len = yyleng - CG(heredoc_len) - 2; /* 2 for newline before and after label */
/* May have matched fooLABEL; make sure there's a newline before it */
if (yytext[len] != '\n') {
if (yytext[len] != '\r') {
yyless(yyleng - 1);
yymore();
}
} else if (len > 0 && yytext[len - 1] == '\r') {
len--; /* Windows newline */
}
/* Go back before label, to match in ST_END_HEREDOC state. yytext will include
* newline before label, for zend_highlight/strip, tokenizer, etc. */
yyless(yyleng - CG(heredoc_len) - 1); /* 1 for newline after label */
CG(increment_lineno) = 1; /* For newline before label */
BEGIN(ST_END_HEREDOC);
zend_scan_escape_string(zendlval, yytext, len, 0 TSRMLS_CC);
return T_ENCAPSED_AND_WHITESPACE;
} else {
/* Go back to end of label, so the next match works correctly in case of
* a variable or another label at the beginning of the next line */
yyless(yyleng - 1);
yymore();
}
}
<ST_END_HEREDOC>{ANY_CHAR} { <ST_END_HEREDOC>{ANY_CHAR} {
YYCURSOR += CG(heredoc_len) - 1; YYCURSOR += CG(heredoc_len) - 1;
yyleng = CG(heredoc_len); yyleng = CG(heredoc_len);
@ -1988,118 +1948,250 @@ inline_char_handler:
} }
/* Will only match when $ follows: "{$" */ <ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"{$" {
<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"{" {
zendlval->value.lval = (long) '{'; zendlval->value.lval = (long) '{';
yy_push_state(ST_IN_SCRIPTING TSRMLS_CC); yy_push_state(ST_IN_SCRIPTING TSRMLS_CC);
yyless(1);
return T_CURLY_OPEN; return T_CURLY_OPEN;
} }
<ST_DOUBLE_QUOTES>{DOUBLE_QUOTES_CHARS}+ {
zend_scan_escape_string(zendlval, yytext, yyleng, '"' TSRMLS_CC);
return T_ENCAPSED_AND_WHITESPACE;
}
/* "{"{2,}|"$"{2,} handles { before "{$" or literal $ before a variable or "${"
* (("{"+|"$"+)["]) handles { or $ at the end of a string
*
* Same for backquotes and heredocs, except the second case doesn't apply to
* heredocs. yyless(yyleng - 1) is used to correct taking one character too many
*/
<ST_DOUBLE_QUOTES>{DOUBLE_QUOTES_CHARS}*("{"{2,}|"$"{2,}|(("{"+|"$"+)["])) {
yyless(yyleng - 1);
zend_scan_escape_string(zendlval, yytext, yyleng, '"' TSRMLS_CC);
return T_ENCAPSED_AND_WHITESPACE;
}
<ST_BACKQUOTE>{BACKQUOTE_CHARS}+ {
zend_scan_escape_string(zendlval, yytext, yyleng, '`' TSRMLS_CC);
return T_ENCAPSED_AND_WHITESPACE;
}
<ST_BACKQUOTE>{BACKQUOTE_CHARS}*("{"{2,}|"$"{2,}|(("{"+|"$"+)[`])) {
yyless(yyleng - 1);
zend_scan_escape_string(zendlval, yytext, yyleng, '`' TSRMLS_CC);
return T_ENCAPSED_AND_WHITESPACE;
}
/* ({HEREDOC_NEWLINE}+({LABEL}";"?)?)? handles the possible case of newline
* sequences, possibly followed by a label, that couldn't be matched with
* HEREDOC_CHARS because of a following variable or "{$"
*
* This doesn't affect real ending labels, as they are followed by a newline,
* which will result in a longer match for the correct rule if present
*/
<ST_HEREDOC>{HEREDOC_CHARS}*({HEREDOC_NEWLINE}+({LABEL}";"?)?)? {
zend_scan_escape_string(zendlval, yytext, yyleng, 0 TSRMLS_CC);
return T_ENCAPSED_AND_WHITESPACE;
}
<ST_HEREDOC>{HEREDOC_CHARS}*({HEREDOC_NEWLINE}+({LABEL}";"?)?)?("{"{2,}|"$"{2,}) {
yyless(yyleng - 1);
zend_scan_escape_string(zendlval, yytext, yyleng, 0 TSRMLS_CC);
return T_ENCAPSED_AND_WHITESPACE;
}
<ST_NOWDOC>({NOWDOC_CHARS}+{NEWLINE}+|{NEWLINE}+){LABEL}";"?[\n\r] {
char *end = yytext + yyleng - 1;
if (end[-1] == ';') {
end--;
yyleng--;
}
if (yyleng > CG(heredoc_len) && !memcmp(end - CG(heredoc_len), CG(heredoc), CG(heredoc_len))) {
int len = yyleng - CG(heredoc_len) - 2; /* 2 for newline before and after label */
/* May have matched fooLABEL; make sure there's a newline before it */
if (yytext[len] != '\n') {
if (yytext[len] != '\r') {
yyless(yyleng - 1);
yymore();
}
} else if (len > 0 && yytext[len - 1] == '\r') {
len--; /* Windows newline */
}
/* Go back before label, to match in ST_END_HEREDOC state. yytext will include
* newline before label, for zend_highlight/strip, tokenizer, etc. */
yyless(yyleng - CG(heredoc_len) - 1); /* 1 for newline after label */
CG(increment_lineno) = 1; /* For newline before label */
BEGIN(ST_END_HEREDOC);
zend_copy_value(zendlval, yytext, len);
zendlval->type = IS_STRING;
HANDLE_NEWLINES(yytext, len);
return T_ENCAPSED_AND_WHITESPACE;
} else {
/* Go back to end of label, so the next match works correctly in case of
* another label at the beginning of the next line */
yyless(yyleng - 1);
yymore();
}
}
<ST_DOUBLE_QUOTES>["] { <ST_DOUBLE_QUOTES>["] {
BEGIN(ST_IN_SCRIPTING); BEGIN(ST_IN_SCRIPTING);
return '"'; return '"';
} }
<ST_BACKQUOTE>[`] { <ST_BACKQUOTE>[`] {
BEGIN(ST_IN_SCRIPTING); BEGIN(ST_IN_SCRIPTING);
return '`'; return '`';
} }
<*>{NULL} { return 0; } /* EOF */
<ST_DOUBLE_QUOTES>{ANY_CHAR} {
if (GET_DOUBLE_QUOTES_SCANNED_LENGTH()) {
YYCURSOR += GET_DOUBLE_QUOTES_SCANNED_LENGTH() - 1;
SET_DOUBLE_QUOTES_SCANNED_LENGTH(0);
goto double_quotes_scan_done;
}
if (YYCURSOR > YYLIMIT) {
return 0;
}
if (yytext[0] == '\\' && YYCURSOR < YYLIMIT) {
YYCURSOR++;
}
while (YYCURSOR < YYLIMIT) {
switch (*YYCURSOR++) {
case '"':
break;
case '$':
if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
break;
}
continue;
case '{':
if (*YYCURSOR == '$') {
break;
}
continue;
case '\\':
if (YYCURSOR < YYLIMIT) {
YYCURSOR++;
}
/* fall through */
default:
continue;
}
YYCURSOR--;
break;
}
double_quotes_scan_done:
yyleng = YYCURSOR - SCNG(yy_text);
zend_scan_escape_string(zendlval, yytext, yyleng, '"' TSRMLS_CC);
return T_ENCAPSED_AND_WHITESPACE;
}
<ST_BACKQUOTE>{ANY_CHAR} {
if (YYCURSOR > YYLIMIT) {
return 0;
}
if (yytext[0] == '\\' && YYCURSOR < YYLIMIT) {
YYCURSOR++;
}
while (YYCURSOR < YYLIMIT) {
switch (*YYCURSOR++) {
case '`':
break;
case '$':
if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
break;
}
continue;
case '{':
if (*YYCURSOR == '$') {
break;
}
continue;
case '\\':
if (YYCURSOR < YYLIMIT) {
YYCURSOR++;
}
/* fall through */
default:
continue;
}
YYCURSOR--;
break;
}
yyleng = YYCURSOR - SCNG(yy_text);
zend_scan_escape_string(zendlval, yytext, yyleng, '`' TSRMLS_CC);
return T_ENCAPSED_AND_WHITESPACE;
}
<ST_HEREDOC>{ANY_CHAR} {
int newline = 0;
if (YYCURSOR > YYLIMIT) {
return 0;
}
YYCURSOR--;
while (YYCURSOR < YYLIMIT) {
switch (*YYCURSOR++) {
case '\r':
if (*YYCURSOR == '\n') {
YYCURSOR++;
}
/* fall through */
case '\n':
/* Check for ending label on the next line */
if (IS_LABEL_START(*YYCURSOR) && CG(heredoc_len) < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, CG(heredoc), CG(heredoc_len))) {
YYCTYPE *end = YYCURSOR + CG(heredoc_len);
if (*end == ';') {
end++;
}
if (*end == '\n' || *end == '\r') {
/* newline before label will be subtracted from returned text, but
* yyleng/yytext will include it, for zend_highlight/strip, tokenizer, etc. */
if (YYCURSOR[-2] == '\r' && YYCURSOR[-1] == '\n') {
newline = 2; /* Windows newline */
} else {
newline = 1;
}
CG(increment_lineno) = 1; /* For newline before label */
BEGIN(ST_END_HEREDOC);
goto heredoc_scan_done;
}
}
continue;
case '$':
if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
break;
}
continue;
case '{':
if (*YYCURSOR == '$') {
break;
}
continue;
case '\\':
if (YYCURSOR < YYLIMIT && *YYCURSOR != '\n' && *YYCURSOR != '\r') {
YYCURSOR++;
}
/* fall through */
default:
continue;
}
YYCURSOR--;
break;
}
heredoc_scan_done:
yyleng = YYCURSOR - SCNG(yy_text);
zend_scan_escape_string(zendlval, yytext, yyleng - newline, 0 TSRMLS_CC);
return T_ENCAPSED_AND_WHITESPACE;
}
<ST_NOWDOC>{ANY_CHAR} {
int newline = 0;
if (YYCURSOR > YYLIMIT) {
return 0;
}
YYCURSOR--;
while (YYCURSOR < YYLIMIT) {
switch (*YYCURSOR++) {
case '\r':
if (*YYCURSOR == '\n') {
YYCURSOR++;
}
/* fall through */
case '\n':
/* Check for ending label on the next line */
if (IS_LABEL_START(*YYCURSOR) && CG(heredoc_len) < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, CG(heredoc), CG(heredoc_len))) {
YYCTYPE *end = YYCURSOR + CG(heredoc_len);
if (*end == ';') {
end++;
}
if (*end == '\n' || *end == '\r') {
/* newline before label will be subtracted from returned text, but
* yyleng/yytext will include it, for zend_highlight/strip, tokenizer, etc. */
if (YYCURSOR[-2] == '\r' && YYCURSOR[-1] == '\n') {
newline = 2; /* Windows newline */
} else {
newline = 1;
}
CG(increment_lineno) = 1; /* For newline before label */
BEGIN(ST_END_HEREDOC);
goto nowdoc_scan_done;
}
}
/* fall through */
default:
continue;
}
}
nowdoc_scan_done:
yyleng = YYCURSOR - SCNG(yy_text);
zend_copy_value(zendlval, yytext, yyleng - newline);
zendlval->type = IS_STRING;
HANDLE_NEWLINES(yytext, yyleng - newline);
return T_ENCAPSED_AND_WHITESPACE;
}
<ST_IN_SCRIPTING,ST_VAR_OFFSET>{ANY_CHAR} { <ST_IN_SCRIPTING,ST_VAR_OFFSET>{ANY_CHAR} {
if (YYCURSOR > YYLIMIT) {
return 0;
}
zend_error(E_COMPILE_WARNING,"Unexpected character in input: '%c' (ASCII=%d) state=%d", yytext[0], yytext[0], YYSTATE); zend_error(E_COMPILE_WARNING,"Unexpected character in input: '%c' (ASCII=%d) state=%d", yytext[0], yytext[0], YYSTATE);
goto restart; goto restart;
} }

View file

@ -50,7 +50,7 @@ bool(false)
</span> </span>
</code>bool(true) </code>bool(true)
<code><span style="color: #000000"> <code><span style="color: #000000">
<span style="color: #0000BB">&lt;?php&nbsp;</span><span style="color: #007700">echo&nbsp;</span><span style="color: #FF9900">"test&nbsp;?&gt;</span> <span style="color: #0000BB">&lt;?php&nbsp;</span><span style="color: #007700">echo&nbsp;</span><span style="color: #DD0000">"test&nbsp;?&gt;</span>
</span> </span>
</code>bool(true) </code>bool(true)
<code><span style="color: #000000"> <code><span style="color: #000000">