Implement new custom object serialization mechanism

RFC: https://wiki.php.net/rfc/custom_object_serialization
This commit is contained in:
Nikita Popov 2019-01-22 17:47:16 +01:00
parent e7e2056d1a
commit d373c11e71
8 changed files with 459 additions and 43 deletions

View file

@ -107,6 +107,7 @@ PHP 7.4 UPGRADE NOTES
This will enforce that $user->id can only be assigned integer and
$user->name can only be assigned strings. For more information see the
RFC: https://wiki.php.net/rfc/typed_properties_v2
. Added support for coalesce assign (??=) operator. For example:
$array['key'] ??= computeDefault();
@ -156,6 +157,20 @@ PHP 7.4 UPGRADE NOTES
. strip_tags() now also accepts an array of allowed tags: Instead of
strip_tags($str, '<a><p>') you can now write strip_tags($str, ['a', 'p']).
. A new mechanism for custom object serialization has been added, which
uses two new magic methods:
// Returns array containing all the necessary state of the object.
public function __serialize(): array;
// Restores the object state from the given data array.
public function __unserialize(array $data): void;
The new serialization mechanism supersedes the Serializable interface,
which will be deprecated in the future.
RFC: https://wiki.php.net/rfc/custom_object_serialization
========================================
3. Changes in SAPI modules
========================================

View file

@ -0,0 +1,32 @@
--TEST--
__serialize() mechanism (001): Basics
--FILE--
<?php
class Test {
public $prop;
public $prop2;
public function __serialize() {
return ["value" => $this->prop, 42 => $this->prop2];
}
public function __unserialize(array $data) {
$this->prop = $data["value"];
$this->prop2 = $data[42];
}
}
$test = new Test;
$test->prop = "foobar";
$test->prop2 = "barfoo";
var_dump($s = serialize($test));
var_dump(unserialize($s));
?>
--EXPECT--
string(58) "O:4:"Test":2:{s:5:"value";s:6:"foobar";i:42;s:6:"barfoo";}"
object(Test)#2 (2) {
["prop"]=>
string(6) "foobar"
["prop2"]=>
string(6) "barfoo"
}

View file

@ -0,0 +1,20 @@
--TEST--
__serialize() mechanism (002): TypeError on invalid return type
--FILE--
<?php
class Test {
public function __serialize() {
return $this;
}
}
try {
serialize(new Test);
} catch (TypeError $e) {
echo $e->getMessage(), "\n";
}
?>
--EXPECT--
__serialize() must return an array

View file

@ -0,0 +1,56 @@
--TEST--
__serialize() mechanism (003): Interoperability of different serialization mechanisms
--FILE--
<?php
class Test implements Serializable {
public function __sleep() {
echo "__sleep() called\n";
}
public function __wakeup() {
echo "__wakeup() called\n";
}
public function __serialize() {
echo "__serialize() called\n";
return ["key" => "value"];
}
public function __unserialize(array $data) {
echo "__unserialize() called\n";
var_dump($data);
}
public function serialize() {
echo "serialize() called\n";
return "payload";
}
public function unserialize($payload) {
echo "unserialize() called\n";
var_dump($payload);
}
}
$test = new Test;
var_dump($s = serialize($test));
var_dump(unserialize($s));
var_dump(unserialize('C:4:"Test":7:{payload}'));
?>
--EXPECT--
__serialize() called
string(37) "O:4:"Test":1:{s:3:"key";s:5:"value";}"
__unserialize() called
array(1) {
["key"]=>
string(5) "value"
}
object(Test)#2 (0) {
}
unserialize() called
string(7) "payload"
object(Test)#2 (0) {
}

View file

@ -0,0 +1,131 @@
--TEST--
__serialize() mechanism (004): Delayed __unserialize() calls
--FILE--
<?php
class Wakeup {
public $data;
public function __construct(array $data) {
$this->data = $data;
}
public function __wakeup() {
echo "__wakeup() called\n";
var_dump($this->data);
$this->woken_up = true;
}
}
class Unserialize {
public $data;
public function __construct(array $data) {
$this->data = $data;
}
public function __serialize() {
return $this->data;
}
public function __unserialize(array $data) {
$this->data = $data;
echo "__unserialize() called\n";
var_dump($this->data);
$this->unserialized = true;
}
}
$obj = new Wakeup([new Unserialize([new Wakeup([new Unserialize([])])])]);
var_dump($s = serialize($obj));
var_dump(unserialize($s));
?>
--EXPECT--
string(126) "O:6:"Wakeup":1:{s:4:"data";a:1:{i:0;O:11:"Unserialize":1:{i:0;O:6:"Wakeup":1:{s:4:"data";a:1:{i:0;O:11:"Unserialize":0:{}}}}}}"
__unserialize() called
array(0) {
}
__wakeup() called
array(1) {
[0]=>
object(Unserialize)#8 (2) {
["data"]=>
array(0) {
}
["unserialized"]=>
bool(true)
}
}
__unserialize() called
array(1) {
[0]=>
object(Wakeup)#7 (2) {
["data"]=>
array(1) {
[0]=>
object(Unserialize)#8 (2) {
["data"]=>
array(0) {
}
["unserialized"]=>
bool(true)
}
}
["woken_up"]=>
bool(true)
}
}
__wakeup() called
array(1) {
[0]=>
object(Unserialize)#6 (2) {
["data"]=>
array(1) {
[0]=>
object(Wakeup)#7 (2) {
["data"]=>
array(1) {
[0]=>
object(Unserialize)#8 (2) {
["data"]=>
array(0) {
}
["unserialized"]=>
bool(true)
}
}
["woken_up"]=>
bool(true)
}
}
["unserialized"]=>
bool(true)
}
}
object(Wakeup)#5 (2) {
["data"]=>
array(1) {
[0]=>
object(Unserialize)#6 (2) {
["data"]=>
array(1) {
[0]=>
object(Wakeup)#7 (2) {
["data"]=>
array(1) {
[0]=>
object(Unserialize)#8 (2) {
["data"]=>
array(0) {
}
["unserialized"]=>
bool(true)
}
}
["woken_up"]=>
bool(true)
}
}
["unserialized"]=>
bool(true)
}
}
["woken_up"]=>
bool(true)
}

View file

@ -0,0 +1,56 @@
--TEST--
__serialize() mechanism (005): parent::__unserialize() is safe
--FILE--
<?php
class A {
private $data;
public function __construct(array $data) {
$this->data = $data;
}
public function __serialize() {
return $this->data;
}
public function __unserialize(array $data) {
$this->data = $data;
}
}
class B extends A {
private $data2;
public function __construct(array $data, array $data2) {
parent::__construct($data);
$this->data2 = $data2;
}
public function __serialize() {
return [$this->data2, parent::__serialize()];
}
public function __unserialize(array $payload) {
[$data2, $data] = $payload;
parent::__unserialize($data);
$this->data2 = $data2;
}
}
$common = new stdClass;
$obj = new B([$common], [$common]);
var_dump($s = serialize($obj));
var_dump(unserialize($s));
?>
--EXPECT--
string(63) "O:1:"B":2:{i:0;a:1:{i:0;O:8:"stdClass":0:{}}i:1;a:1:{i:0;r:3;}}"
object(B)#3 (2) {
["data2":"B":private]=>
array(1) {
[0]=>
object(stdClass)#4 (0) {
}
}
["data":"A":private]=>
array(1) {
[0]=>
object(stdClass)#4 (0) {
}
}
}

View file

@ -745,6 +745,32 @@ static int php_var_serialize_call_sleep(zval *retval, zval *struc) /* {{{ */
}
/* }}} */
static int php_var_serialize_call_magic_serialize(zval *retval, zval *obj) /* {{{ */
{
zval fname;
int res;
ZVAL_STRINGL(&fname, "__serialize", sizeof("__serialize") - 1);
BG(serialize_lock)++;
res = call_user_function(CG(function_table), obj, &fname, retval, 0, 0);
BG(serialize_lock)--;
zval_ptr_dtor_str(&fname);
if (res == FAILURE || Z_ISUNDEF_P(retval)) {
zval_ptr_dtor(retval);
return FAILURE;
}
if (Z_TYPE_P(retval) != IS_ARRAY) {
zval_ptr_dtor(retval);
zend_type_error("__serialize() must return an array");
return FAILURE;
}
return SUCCESS;
}
/* }}} */
static void php_var_serialize_collect_names(HashTable *ht, HashTable *src) /* {{{ */
{
zval *val;
@ -915,6 +941,43 @@ again:
case IS_OBJECT: {
zend_class_entry *ce = Z_OBJCE_P(struc);
if (zend_hash_str_exists(&ce->function_table, "__serialize", sizeof("__serialize")-1)) {
zval retval, obj;
zend_string *key;
zval *data;
zend_ulong index;
ZVAL_COPY(&obj, struc);
if (php_var_serialize_call_magic_serialize(&retval, &obj) == FAILURE) {
if (!EG(exception)) {
smart_str_appendl(buf, "N;", 2);
}
zval_ptr_dtor(&obj);
return;
}
php_var_serialize_class_name(buf, &obj);
smart_str_append_unsigned(buf, zend_array_count(Z_ARRVAL(retval)));
smart_str_appendl(buf, ":{", 2);
ZEND_HASH_FOREACH_KEY_VAL_IND(Z_ARRVAL(retval), index, key, data) {
if (!key) {
php_var_serialize_long(buf, index);
} else {
php_var_serialize_string(buf, ZSTR_VAL(key), ZSTR_LEN(key));
}
if (Z_ISREF_P(data) && Z_REFCOUNT_P(data) == 1) {
data = Z_REFVAL_P(data);
}
php_var_serialize_intern(buf, data, var_hash);
} ZEND_HASH_FOREACH_END();
smart_str_appendc(buf, '}');
zval_ptr_dtor(&obj);
zval_ptr_dtor(&retval);
return;
}
if (ce->serialize != NULL) {
/* has custom handler */
unsigned char *serialized_data = NULL;

View file

@ -26,8 +26,10 @@
#define VAR_DTOR_ENTRIES_MAX 255 /* 256 - offsetof(var_dtor_entries, data) / sizeof(zval) */
#define VAR_ENTRIES_DBG 0
/* VAR_FLAG used in var_dtor entries to signify an entry on which __wakeup should be called */
/* VAR_FLAG used in var_dtor entries to signify an entry on which
* __wakeup/__unserialize should be called */
#define VAR_WAKEUP_FLAG 1
#define VAR_UNSERIALIZE_FLAG 2
typedef struct {
zend_long used_slots;
@ -191,9 +193,10 @@ PHPAPI void var_destroy(php_unserialize_data_t *var_hashx)
zend_long i;
var_entries *var_hash = (*var_hashx)->entries.next;
var_dtor_entries *var_dtor_hash = (*var_hashx)->first_dtor;
zend_bool wakeup_failed = 0;
zval wakeup_name;
zend_bool delayed_call_failed = 0;
zval wakeup_name, unserialize_name;
ZVAL_UNDEF(&wakeup_name);
ZVAL_UNDEF(&unserialize_name);
#if VAR_ENTRIES_DBG
fprintf(stderr, "var_destroy(%ld)\n", var_hash?var_hash->used_slots:-1L);
@ -212,9 +215,9 @@ PHPAPI void var_destroy(php_unserialize_data_t *var_hashx)
fprintf(stderr, "var_destroy dtor(%p, %ld)\n", var_dtor_hash->data[i], Z_REFCOUNT_P(var_dtor_hash->data[i]));
#endif
/* Perform delayed __wakeup calls */
if (Z_EXTRA_P(zv) == VAR_WAKEUP_FLAG) {
if (!wakeup_failed) {
/* Perform delayed __wakeup calls */
if (!delayed_call_failed) {
zval retval;
if (Z_ISUNDEF(wakeup_name)) {
ZVAL_STRINGL(&wakeup_name, "__wakeup", sizeof("__wakeup") - 1);
@ -222,11 +225,33 @@ PHPAPI void var_destroy(php_unserialize_data_t *var_hashx)
BG(serialize_lock)++;
if (call_user_function(NULL, zv, &wakeup_name, &retval, 0, 0) == FAILURE || Z_ISUNDEF(retval)) {
wakeup_failed = 1;
delayed_call_failed = 1;
GC_ADD_FLAGS(Z_OBJ_P(zv), IS_OBJ_DESTRUCTOR_CALLED);
}
BG(serialize_lock)--;
zval_ptr_dtor(&retval);
} else {
GC_ADD_FLAGS(Z_OBJ_P(zv), IS_OBJ_DESTRUCTOR_CALLED);
}
} else if (Z_EXTRA_P(zv) == VAR_UNSERIALIZE_FLAG) {
/* Perform delayed __unserialize calls */
if (!delayed_call_failed) {
zval retval, param;
ZVAL_COPY(&param, &var_dtor_hash->data[i + 1]);
if (Z_ISUNDEF(unserialize_name)) {
ZVAL_STRINGL(&unserialize_name, "__unserialize", sizeof("__unserialize") - 1);
}
BG(serialize_lock)++;
if (call_user_function(CG(function_table), zv, &unserialize_name, &retval, 1, &param) == FAILURE || Z_ISUNDEF(retval)) {
delayed_call_failed = 1;
GC_ADD_FLAGS(Z_OBJ_P(zv), IS_OBJ_DESTRUCTOR_CALLED);
}
BG(serialize_lock)--;
zval_ptr_dtor(&param);
zval_ptr_dtor(&retval);
} else {
GC_ADD_FLAGS(Z_OBJ_P(zv), IS_OBJ_DESTRUCTOR_CALLED);
@ -241,6 +266,7 @@ PHPAPI void var_destroy(php_unserialize_data_t *var_hashx)
}
zval_ptr_dtor_nogc(&wakeup_name);
zval_ptr_dtor_nogc(&unserialize_name);
if ((*var_hashx)->ref_props) {
zend_hash_destroy((*var_hashx)->ref_props);
@ -601,43 +627,40 @@ static inline int object_custom(UNSERIALIZE_PARAMETER, zend_class_entry *ce)
return 1;
}
static inline zend_long object_common1(UNSERIALIZE_PARAMETER, zend_class_entry *ce)
{
zend_long elements;
if( *p >= max - 2) {
zend_error(E_WARNING, "Bad unserialize data");
return -1;
}
elements = parse_iv2((*p) + 2, p);
(*p) += 2;
if (ce->serialize == NULL) {
object_init_ex(rval, ce);
} else {
/* If this class implements Serializable, it should not land here but in object_custom(). The passed string
obviously doesn't descend from the regular serializer. */
zend_error(E_WARNING, "Erroneous data format for unserializing '%s'", ZSTR_VAL(ce->name));
return -1;
}
return elements;
}
#ifdef PHP_WIN32
# pragma optimize("", off)
#endif
static inline int object_common2(UNSERIALIZE_PARAMETER, zend_long elements)
static inline int object_common(UNSERIALIZE_PARAMETER, zend_long elements, zend_bool has_unserialize)
{
HashTable *ht;
zend_bool has_wakeup;
if (Z_TYPE_P(rval) != IS_OBJECT) {
if (has_unserialize) {
zval ary, *tmp;
if (elements >= HT_MAX_SIZE) {
return 0;
}
array_init_size(&ary, elements);
if (!process_nested_data(UNSERIALIZE_PASSTHRU, Z_ARRVAL(ary), elements, NULL)) {
ZVAL_DEREF(rval);
GC_ADD_FLAGS(Z_OBJ_P(rval), IS_OBJ_DESTRUCTOR_CALLED);
return 0;
}
/* Delay __unserialize() call until end of serialization. We use two slots here to
* store both the object and the unserialized data array. */
ZVAL_DEREF(rval);
tmp = var_tmp_var(var_hash);
ZVAL_COPY(tmp, rval);
Z_EXTRA_P(tmp) = VAR_UNSERIALIZE_FLAG;
tmp = var_tmp_var(var_hash);
ZVAL_COPY_VALUE(tmp, &ary);
return finish_nested_data(UNSERIALIZE_PASSTHRU);
}
has_wakeup = Z_OBJCE_P(rval) != PHP_IC_ENTRY
&& zend_hash_str_exists(&Z_OBJCE_P(rval)->function_table, "__wakeup", sizeof("__wakeup")-1);
@ -954,9 +977,9 @@ object ":" uiv ":" ["] {
char *str;
zend_string *class_name;
zend_class_entry *ce;
int incomplete_class = 0;
int custom_object = 0;
zend_bool incomplete_class = 0;
zend_bool custom_object = 0;
zend_bool has_unserialize = 0;
zval user_func;
zval retval;
@ -1085,19 +1108,39 @@ object ":" uiv ":" ["] {
return ret;
}
elements = object_common1(UNSERIALIZE_PASSTHRU, ce);
if (elements < 0) {
if (*p >= max - 2) {
zend_error(E_WARNING, "Bad unserialize data");
zend_string_release_ex(class_name, 0);
return 0;
}
elements = parse_iv2(*p + 2, p);
if (elements < 0) {
zend_string_release_ex(class_name, 0);
return 0;
}
*p += 2;
has_unserialize = !incomplete_class
&& zend_hash_str_exists(&ce->function_table, "__unserialize", sizeof("__unserialize")-1);
/* If this class implements Serializable, it should not land here but in object_custom().
* The passed string obviously doesn't descend from the regular serializer. However, if
* there is both Serializable::unserialize() and __unserialize(), then both may be used,
* depending on the serialization format. */
if (ce->serialize != NULL && !has_unserialize) {
zend_error(E_WARNING, "Erroneous data format for unserializing '%s'", ZSTR_VAL(ce->name));
zend_string_release_ex(class_name, 0);
return 0;
}
object_init_ex(rval, ce);
if (incomplete_class) {
php_store_class_name(rval, ZSTR_VAL(class_name), len2);
}
zend_string_release_ex(class_name, 0);
return object_common2(UNSERIALIZE_PASSTHRU, elements);
return object_common(UNSERIALIZE_PASSTHRU, elements, has_unserialize);
}
"}" {