Fix #77423: parse_url() will deliver a wrong host to user

To avoid that `parse_url()` returns an erroneous host, which would be
valid for `FILTER_VALIDATE_URL`, we make sure that only userinfo which
is valid according to RFC 3986 is treated as such.

For consistency with the existing url parsing code, we use ctype
functions, although that is not necessarily correct.
This commit is contained in:
Christoph M. Becker 2020-05-13 09:36:52 +02:00 committed by Stanislav Malyshev
parent 9bf43c4590
commit 5174de7cd3
7 changed files with 60 additions and 16 deletions

View file

@ -589,15 +589,13 @@ $sample_urls = array (
string(16) "some_page_ref123"
}
--> http://secret@hideout@www.php.net:80/index.php?test=1&test2=char&test3=mixesCI#some_page_ref123: array(7) {
--> http://secret@hideout@www.php.net:80/index.php?test=1&test2=char&test3=mixesCI#some_page_ref123: array(6) {
["scheme"]=>
string(4) "http"
["host"]=>
string(11) "www.php.net"
string(26) "secret@hideout@www.php.net"
["port"]=>
int(80)
["user"]=>
string(14) "secret@hideout"
["path"]=>
string(10) "/index.php"
["query"]=>

View file

@ -0,0 +1,30 @@
--TEST--
Bug #77423 (parse_url() will deliver a wrong host to user)
--FILE--
<?php
$urls = array(
"http://php.net\@aliyun.com/aaa.do",
"https://example.com\uFF03@bing.com",
);
foreach ($urls as $url) {
var_dump(filter_var($url, FILTER_VALIDATE_URL));
var_dump(parse_url($url));
}
?>
--EXPECT--
bool(false)
array(3) {
["scheme"]=>
string(4) "http"
["host"]=>
string(19) "php.net\@aliyun.com"
["path"]=>
string(7) "/aaa.do"
}
bool(false)
array(2) {
["scheme"]=>
string(5) "https"
["host"]=>
string(26) "example.com\uFF03@bing.com"
}

View file

@ -514,15 +514,13 @@ echo "Done";
string(16) "some_page_ref123"
}
--> http://secret@hideout@www.php.net:80/index.php?test=1&test2=char&test3=mixesCI#some_page_ref123: array(7) {
--> http://secret@hideout@www.php.net:80/index.php?test=1&test2=char&test3=mixesCI#some_page_ref123: array(6) {
["scheme"]=>
string(4) "http"
["host"]=>
string(11) "www.php.net"
string(26) "secret@hideout@www.php.net"
["port"]=>
int(80)
["user"]=>
string(14) "secret@hideout"
["path"]=>
string(10) "/index.php"
["query"]=>

View file

@ -62,7 +62,7 @@ echo "Done";
--> http://secret:@www.php.net/index.php?test=1&test2=char&test3=mixesCI#some_page_ref123 : string(11) "www.php.net"
--> http://:hideout@www.php.net:80/index.php?test=1&test2=char&test3=mixesCI#some_page_ref123 : string(11) "www.php.net"
--> http://secret:hideout@www.php.net/index.php?test=1&test2=char&test3=mixesCI#some_page_ref123 : string(11) "www.php.net"
--> http://secret@hideout@www.php.net:80/index.php?test=1&test2=char&test3=mixesCI#some_page_ref123 : string(11) "www.php.net"
--> http://secret@hideout@www.php.net:80/index.php?test=1&test2=char&test3=mixesCI#some_page_ref123 : string(26) "secret@hideout@www.php.net"
--> http://secret:hid:out@www.php.net:80/index.php?test=1&test2=char&test3=mixesCI#some_page_ref123 : string(11) "www.php.net"
--> nntp://news.php.net : string(12) "news.php.net"
--> ftp://ftp.gnu.org/gnu/glic/glibc.tar.gz : string(11) "ftp.gnu.org"

View file

@ -62,7 +62,7 @@ echo "Done";
--> http://secret:@www.php.net/index.php?test=1&test2=char&test3=mixesCI#some_page_ref123 : string(6) "secret"
--> http://:hideout@www.php.net:80/index.php?test=1&test2=char&test3=mixesCI#some_page_ref123 : string(0) ""
--> http://secret:hideout@www.php.net/index.php?test=1&test2=char&test3=mixesCI#some_page_ref123 : string(6) "secret"
--> http://secret@hideout@www.php.net:80/index.php?test=1&test2=char&test3=mixesCI#some_page_ref123 : string(14) "secret@hideout"
--> http://secret@hideout@www.php.net:80/index.php?test=1&test2=char&test3=mixesCI#some_page_ref123 : NULL
--> http://secret:hid:out@www.php.net:80/index.php?test=1&test2=char&test3=mixesCI#some_page_ref123 : string(6) "secret"
--> nntp://news.php.net : NULL
--> ftp://ftp.gnu.org/gnu/glic/glibc.tar.gz : NULL

View file

@ -522,15 +522,13 @@ echo "Done";
string(16) "some_page_ref123"
}
--> http://secret@hideout@www.php.net:80/index.php?test=1&test2=char&test3=mixesCI#some_page_ref123: array(7) {
--> http://secret@hideout@www.php.net:80/index.php?test=1&test2=char&test3=mixesCI#some_page_ref123: array(6) {
["scheme"]=>
string(4) "http"
["host"]=>
string(11) "www.php.net"
string(26) "secret@hideout@www.php.net"
["port"]=>
int(80)
["user"]=>
string(14) "secret@hideout"
["path"]=>
string(10) "/index.php"
["query"]=>

View file

@ -92,6 +92,22 @@ static const char *binary_strcspn(const char *s, const char *e, const char *char
return e;
}
static int is_userinfo_valid(const char *str, size_t len)
{
const char *valid = "-._~!$&'()*+,;=:";
const char *p = str;
while (p - str < len) {
if (isalpha(*p) || isdigit(*p) || strchr(valid, *p)) {
p++;
} else if (*p == '%' && p - str <= len - 3 && isdigit(*(p+1)) && isxdigit(*(p+2))) {
p += 3;
} else {
return 0;
}
}
return 1;
}
/* {{{ php_url_parse */
PHPAPI php_url *php_url_parse_ex(char const *str, size_t length)
{
@ -233,13 +249,17 @@ parse_host:
ret->pass = zend_string_init(pp, (p-pp), 0);
php_replace_controlchars_ex(ZSTR_VAL(ret->pass), ZSTR_LEN(ret->pass));
} else {
ret->user = zend_string_init(s, (p-s), 0);
php_replace_controlchars_ex(ZSTR_VAL(ret->user), ZSTR_LEN(ret->user));
if (!is_userinfo_valid(s, p-s)) {
goto check_port;
}
ret->user = zend_string_init(s, (p-s), 0);
php_replace_controlchars_ex(ZSTR_VAL(ret->user), ZSTR_LEN(ret->user));
}
s = p + 1;
}
check_port:
/* check for port */
if (s < ue && *s == '[' && *(e-1) == ']') {
/* Short circuit portscan,