random: Use branchless implementation for mask generation in Randomizer::getBytesFromString() (#10522)

* random: Add `max_offset` local to Randomizer::getBytesFromString()

* random: Use branchless implementation for mask generation in Randomizer::getBytesFromString()

This was benchmarked against clzl with a standalone script with random inputs
and is slightly faster. clzl requires an additional branch to handle the
source_length = 1 / max_offset = 0 case.

* Improve comment for masking in Randomizer::getBytesFromString()
This commit is contained in:
Tim Düsterhus 2023-02-08 09:36:12 +01:00 committed by GitHub
parent eabb9b7dea
commit 0cfc45b667
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 62 additions and 23 deletions

View file

@ -388,6 +388,7 @@ PHP_METHOD(Random_Randomizer, getBytesFromString)
ZEND_PARSE_PARAMETERS_END(); ZEND_PARSE_PARAMETERS_END();
const size_t source_length = ZSTR_LEN(source); const size_t source_length = ZSTR_LEN(source);
const size_t max_offset = source_length - 1;
if (source_length < 1) { if (source_length < 1) {
zend_argument_value_error(1, "cannot be empty"); zend_argument_value_error(1, "cannot be empty");
@ -401,9 +402,9 @@ PHP_METHOD(Random_Randomizer, getBytesFromString)
retval = zend_string_alloc(length, 0); retval = zend_string_alloc(length, 0);
if (source_length > 0x100) { if (max_offset > 0xff) {
while (total_size < length) { while (total_size < length) {
uint64_t offset = randomizer->algo->range(randomizer->status, 0, source_length - 1); uint64_t offset = randomizer->algo->range(randomizer->status, 0, max_offset);
if (EG(exception)) { if (EG(exception)) {
zend_string_free(retval); zend_string_free(retval);
@ -413,26 +414,14 @@ PHP_METHOD(Random_Randomizer, getBytesFromString)
ZSTR_VAL(retval)[total_size++] = ZSTR_VAL(source)[offset]; ZSTR_VAL(retval)[total_size++] = ZSTR_VAL(source)[offset];
} }
} else { } else {
uint64_t mask; uint64_t mask = max_offset;
if (source_length <= 0x1) { // Copy the top-most bit into all lower bits.
mask = 0x0; // Shifting by 4 is sufficient, because max_offset
} else if (source_length <= 0x2) { // is guaranteed to fit in an 8-bit integer at this
mask = 0x1; // point.
} else if (source_length <= 0x4) { mask |= mask >> 1;
mask = 0x3; mask |= mask >> 2;
} else if (source_length <= 0x8) { mask |= mask >> 4;
mask = 0x7;
} else if (source_length <= 0x10) {
mask = 0xF;
} else if (source_length <= 0x20) {
mask = 0x1F;
} else if (source_length <= 0x40) {
mask = 0x3F;
} else if (source_length <= 0x80) {
mask = 0x7F;
} else {
mask = 0xFF;
}
int failures = 0; int failures = 0;
while (total_size < length) { while (total_size < length) {
@ -445,7 +434,7 @@ PHP_METHOD(Random_Randomizer, getBytesFromString)
for (size_t i = 0; i < randomizer->status->last_generated_size; i++) { for (size_t i = 0; i < randomizer->status->last_generated_size; i++) {
uint64_t offset = (result >> (i * 8)) & mask; uint64_t offset = (result >> (i * 8)) & mask;
if (offset >= source_length) { if (offset > max_offset) {
if (++failures > PHP_RANDOM_RANGE_ATTEMPTS) { if (++failures > PHP_RANDOM_RANGE_ATTEMPTS) {
zend_string_free(retval); zend_string_free(retval);
zend_throw_error(random_ce_Random_BrokenRandomEngineError, "Failed to generate an acceptable random number in %d attempts", PHP_RANDOM_RANGE_ATTEMPTS); zend_throw_error(random_ce_Random_BrokenRandomEngineError, "Failed to generate an acceptable random number in %d attempts", PHP_RANDOM_RANGE_ATTEMPTS);

View file

@ -47,6 +47,32 @@ for ($i = 1; $i <= strlen($allBytes); $i *= 2) {
echo PHP_EOL; echo PHP_EOL;
} }
// Test lengths that are one more than the powers of two. For these
// the maximum offset will be a power of two and thus a minimal number
// of bits will be set in the offset.
for ($i = 1; ($i + 1) <= strlen($allBytes); $i *= 2) {
$oneMore = $i + 1;
echo "{$oneMore}:", PHP_EOL;
$wrapper = new TestWrapperEngine($xoshiro);
$r = new Randomizer($wrapper);
$result = $r->getBytesFromString(substr($allBytes, 0, $oneMore), 20000);
$count = [];
for ($j = 0; $j < strlen($result); $j++) {
$b = $result[$j];
$count[ord($b)] ??= 0;
$count[ord($b)]++;
}
// We expect that each possible value appears at least once, if
// not is is very likely that some bits were erroneously masked away.
var_dump(count($count));
echo PHP_EOL;
}
echo "Slow Path:", PHP_EOL; echo "Slow Path:", PHP_EOL;
$wrapper = new TestWrapperEngine($xoshiro); $wrapper = new TestWrapperEngine($xoshiro);
@ -107,6 +133,30 @@ int(128)
int(2500) int(2500)
int(256) int(256)
2:
int(2)
3:
int(3)
5:
int(5)
9:
int(9)
17:
int(17)
33:
int(33)
65:
int(65)
129:
int(129)
Slow Path: Slow Path:
int(20000) int(20000)
int(256) int(256)