Don't include trailing newline in comment token

Don't include a trailing newline in T_COMMENT tokens, instead leave
it for a following T_WHITESPACE token. The newline does not belong
to the comment logically, and this makes for an ugly special case,
as other tokens do not include trailing newlines.

Whitespace-sensitive tooling will want to either forward or backward
emulate this change.

Closes GH-5182.
This commit is contained in:
Nikita Popov 2020-02-15 17:54:02 +01:00
parent e12b9df05d
commit 47cf18ba4e
4 changed files with 149 additions and 127 deletions

View file

@ -470,6 +470,12 @@ PHP 8.0 UPGRADE NOTES
. The $use_include_path parameter, which was not used internally, has been
removed from tidy_repair_string().
- Tokenizer:
. T_COMMENT tokens will no longer include a trailing newline. The newline will
instead be part of a following T_WHITESPACE token. It should be noted that
T_COMMENT is not always followed by whitespace, it may also be followed by
T_CLOSE_TAG or end-of-file.
- XML:
. xml_parser_create(_ns) will now return an XmlParser object rather than a
resource. Return value checks using is_resource() should be replaced with

View file

@ -2293,12 +2293,8 @@ inline_char_handler:
while (YYCURSOR < YYLIMIT) {
switch (*YYCURSOR++) {
case '\r':
if (*YYCURSOR == '\n') {
YYCURSOR++;
}
/* fall through */
case '\n':
CG(zend_lineno)++;
YYCURSOR--;
break;
case '?':
if (*YYCURSOR == '>') {

View file

@ -22,50 +22,51 @@ foreach ($tokens as $i => $token) {
}
// is() variations
$token = $tokens[5];
echo "\nSuccess:\n";
var_dump($tokens[4]->is(T_FUNCTION));
var_dump($tokens[4]->is('function'));
var_dump($tokens[4]->is(['class', T_FUNCTION]));
var_dump($tokens[4]->is([T_CLASS, 'function']));
var_dump($token->is(T_FUNCTION));
var_dump($token->is('function'));
var_dump($token->is(['class', T_FUNCTION]));
var_dump($token->is([T_CLASS, 'function']));
echo "\nFailure:\n";
var_dump($tokens[4]->is(T_CLASS));
var_dump($tokens[4]->is('class'));
var_dump($tokens[4]->is(['class', T_TRAIT]));
var_dump($tokens[4]->is([T_CLASS, 'trait']));
var_dump($token->is(T_CLASS));
var_dump($token->is('class'));
var_dump($token->is(['class', T_TRAIT]));
var_dump($token->is([T_CLASS, 'trait']));
echo "\nError:\n";
try {
$tokens[4]->is(3.141);
$token->is(3.141);
} catch (TypeError $e) {
echo $e->getMessage(), "\n";
}
try {
$tokens[4]->is([3.141]);
$token->is([3.141]);
} catch (TypeError $e) {
echo $e->getMessage(), "\n";
}
unset($tokens[4]->id);
unset($tokens[4]->text);
unset($token->id);
unset($token->text);
try {
$tokens[4]->is(T_FUNCTION);
$token->is(T_FUNCTION);
} catch (Error $e) {
echo $e->getMessage(), "\n";
}
try {
$tokens[4]->is('function');
$token->is('function');
} catch (Error $e) {
echo $e->getMessage(), "\n";
}
try {
$tokens[4]->is([T_FUNCTION]);
$token->is([T_FUNCTION]);
} catch (Error $e) {
echo $e->getMessage(), "\n";
}
try {
$tokens[4]->is(['function']);
$token->is(['function']);
} catch (Error $e) {
echo $e->getMessage(), "\n";
}
@ -78,22 +79,23 @@ var_dump($token->getTokenName());
--EXPECT--
[ 0] T_OPEN_TAG ignorable
[ 1] T_COMMENT ignorable
[ 2] T_DOC_COMMENT ignorable
[ 3] T_WHITESPACE ignorable
[ 4] T_FUNCTION meaningful
[ 5] T_WHITESPACE ignorable
[ 6] T_STRING meaningful
[ 7] ( meaningful
[ 8] ) meaningful
[ 9] T_WHITESPACE ignorable
[10] { meaningful
[11] T_WHITESPACE ignorable
[12] T_ECHO meaningful
[13] T_WHITESPACE ignorable
[14] T_CONSTANT_ENCAPSED_STRING meaningful
[15] ; meaningful
[16] T_WHITESPACE ignorable
[17] } meaningful
[ 2] T_WHITESPACE ignorable
[ 3] T_DOC_COMMENT ignorable
[ 4] T_WHITESPACE ignorable
[ 5] T_FUNCTION meaningful
[ 6] T_WHITESPACE ignorable
[ 7] T_STRING meaningful
[ 8] ( meaningful
[ 9] ) meaningful
[10] T_WHITESPACE ignorable
[11] { meaningful
[12] T_WHITESPACE ignorable
[13] T_ECHO meaningful
[14] T_WHITESPACE ignorable
[15] T_CONSTANT_ENCAPSED_STRING meaningful
[16] ; meaningful
[17] T_WHITESPACE ignorable
[18] } meaningful
Success:
bool(true)

View file

@ -40,7 +40,7 @@ echo "Done"
?>
--EXPECTF--
*** Testing token_get_all() : 'source' string with different comments ***
array(50) {
array(52) {
[0]=>
array(3) {
[0]=>
@ -78,8 +78,7 @@ array(50) {
[0]=>
int(%d)
[1]=>
string(13) "// int value
"
string(12) "// int value"
[2]=>
int(6)
}
@ -88,22 +87,21 @@ array(50) {
[0]=>
int(%d)
[1]=>
string(2) "$a"
string(1) "
"
[2]=>
int(7)
int(6)
}
[5]=>
array(3) {
[0]=>
int(%d)
[1]=>
string(1) " "
string(2) "$a"
[2]=>
int(7)
}
[6]=>
string(1) "="
[7]=>
array(3) {
[0]=>
int(%d)
@ -112,7 +110,18 @@ array(50) {
[2]=>
int(7)
}
[7]=>
string(1) "="
[8]=>
array(3) {
[0]=>
int(%d)
[1]=>
string(1) " "
[2]=>
int(7)
}
[9]=>
array(3) {
[0]=>
int(%d)
@ -121,9 +130,9 @@ array(50) {
[2]=>
int(7)
}
[9]=>
string(1) ";"
[10]=>
string(1) ";"
[11]=>
array(3) {
[0]=>
int(%d)
@ -133,7 +142,7 @@ array(50) {
[2]=>
int(7)
}
[11]=>
[12]=>
array(3) {
[0]=>
int(%d)
@ -142,18 +151,7 @@ array(50) {
[2]=>
int(8)
}
[12]=>
array(3) {
[0]=>
int(%d)
[1]=>
string(1) " "
[2]=>
int(8)
}
[13]=>
string(1) "="
[14]=>
array(3) {
[0]=>
int(%d)
@ -162,7 +160,18 @@ array(50) {
[2]=>
int(8)
}
[14]=>
string(1) "="
[15]=>
array(3) {
[0]=>
int(%d)
[1]=>
string(1) " "
[2]=>
int(8)
}
[16]=>
array(3) {
[0]=>
int(%d)
@ -171,9 +180,9 @@ array(50) {
[2]=>
int(8)
}
[16]=>
string(1) ";"
[17]=>
string(1) ";"
[18]=>
array(3) {
[0]=>
int(%d)
@ -183,7 +192,7 @@ array(50) {
[2]=>
int(8)
}
[18]=>
[19]=>
array(3) {
[0]=>
int(%d)
@ -192,18 +201,7 @@ array(50) {
[2]=>
int(9)
}
[19]=>
array(3) {
[0]=>
int(%d)
[1]=>
string(1) " "
[2]=>
int(9)
}
[20]=>
string(1) "="
[21]=>
array(3) {
[0]=>
int(%d)
@ -212,7 +210,18 @@ array(50) {
[2]=>
int(9)
}
[21]=>
string(1) "="
[22]=>
array(3) {
[0]=>
int(%d)
[1]=>
string(1) " "
[2]=>
int(9)
}
[23]=>
array(3) {
[0]=>
int(%d)
@ -221,9 +230,9 @@ array(50) {
[2]=>
int(9)
}
[23]=>
string(1) ";"
[24]=>
string(1) ";"
[25]=>
array(3) {
[0]=>
int(%d)
@ -232,27 +241,27 @@ array(50) {
[2]=>
int(9)
}
[25]=>
array(3) {
[0]=>
int(%d)
[1]=>
string(14) "// bool value
"
[2]=>
int(9)
}
[26]=>
array(3) {
[0]=>
int(%d)
[1]=>
string(1) "
"
string(13) "// bool value"
[2]=>
int(10)
int(9)
}
[27]=>
array(3) {
[0]=>
int(%d)
[1]=>
string(2) "
"
[2]=>
int(9)
}
[28]=>
array(3) {
[0]=>
int(%d)
@ -264,7 +273,7 @@ array(50) {
[2]=>
int(11)
}
[28]=>
[29]=>
array(3) {
[0]=>
int(%d)
@ -274,7 +283,7 @@ array(50) {
[2]=>
int(14)
}
[29]=>
[30]=>
array(3) {
[0]=>
int(%d)
@ -283,18 +292,7 @@ array(50) {
[2]=>
int(15)
}
[30]=>
array(3) {
[0]=>
int(%d)
[1]=>
string(1) " "
[2]=>
int(15)
}
[31]=>
string(1) "="
[32]=>
array(3) {
[0]=>
int(%d)
@ -303,12 +301,14 @@ array(50) {
[2]=>
int(15)
}
[32]=>
string(1) "="
[33]=>
array(3) {
[0]=>
int(%d)
[1]=>
string(2) "$a"
string(1) " "
[2]=>
int(15)
}
@ -317,13 +317,11 @@ array(50) {
[0]=>
int(%d)
[1]=>
string(1) " "
string(2) "$a"
[2]=>
int(15)
}
[35]=>
string(1) "+"
[36]=>
array(3) {
[0]=>
int(%d)
@ -332,7 +330,18 @@ array(50) {
[2]=>
int(15)
}
[36]=>
string(1) "+"
[37]=>
array(3) {
[0]=>
int(%d)
[1]=>
string(1) " "
[2]=>
int(15)
}
[38]=>
array(3) {
[0]=>
int(%d)
@ -341,9 +350,9 @@ array(50) {
[2]=>
int(15)
}
[38]=>
string(1) ";"
[39]=>
string(1) ";"
[40]=>
array(3) {
[0]=>
int(%d)
@ -353,7 +362,7 @@ array(50) {
[2]=>
int(15)
}
[40]=>
[41]=>
array(3) {
[0]=>
int(%d)
@ -362,9 +371,9 @@ array(50) {
[2]=>
int(16)
}
[41]=>
string(1) "("
[42]=>
string(1) "("
[43]=>
array(3) {
[0]=>
int(%d)
@ -373,11 +382,11 @@ array(50) {
[2]=>
int(16)
}
[43]=>
string(1) ")"
[44]=>
string(1) ";"
string(1) ")"
[45]=>
string(1) ";"
[46]=>
array(3) {
[0]=>
int(%d)
@ -386,37 +395,46 @@ array(50) {
[2]=>
int(16)
}
[46]=>
[47]=>
array(3) {
[0]=>
int(%d)
[1]=>
string(20) "# expected: int(%d)
string(19) "# expected: int(30)"
[2]=>
int(16)
}
[48]=>
array(3) {
[0]=>
int(%d)
[1]=>
string(2) "
"
[2]=>
int(16)
}
[47]=>
[49]=>
array(3) {
[0]=>
int(%d)
[1]=>
string(16) "# end of program"
[2]=>
int(18)
}
[50]=>
array(3) {
[0]=>
int(%d)
[1]=>
string(1) "
"
[2]=>
int(17)
}
[48]=>
array(3) {
[0]=>
int(%d)
[1]=>
string(17) "# end of program
"
[2]=>
int(18)
}
[49]=>
[51]=>
array(3) {
[0]=>
int(%d)