mirror of
https://github.com/php/php-src.git
synced 2025-08-15 21:48:51 +02:00
Refactor BCMath _bc_do_sub
(#14132)
_bc_do_sub now uses SIMD to perform calculations at high speed. Moved the macros used for SIMD to `private.h`, and added some constants and macros.
This commit is contained in:
parent
bb21d195c1
commit
02732007f7
4 changed files with 126 additions and 17 deletions
|
@ -16,16 +16,11 @@
|
|||
|
||||
#include "bcmath.h"
|
||||
#include "convert.h"
|
||||
#include "private.h"
|
||||
#ifdef __SSE2__
|
||||
# include <emmintrin.h>
|
||||
#endif
|
||||
|
||||
/* This will be 0x01010101 for 32-bit and 0x0101010101010101 */
|
||||
#define SWAR_ONES (~((size_t) 0) / 0xFF)
|
||||
/* This repeats a byte `x` into an entire 32/64-bit word.
|
||||
* Example: SWAR_REPEAT(0xAB) will be 0xABABABAB for 32-bit and 0xABABABABABABABAB for 64-bit. */
|
||||
#define SWAR_REPEAT(x) (SWAR_ONES * (x))
|
||||
|
||||
static char *bc_copy_and_shift_numbers(char *restrict dest, const char *source, const char *source_end, unsigned char shift, bool add)
|
||||
{
|
||||
size_t bulk_shift = SWAR_REPEAT(shift);
|
||||
|
|
|
@ -124,17 +124,19 @@ bc_num _bc_do_add(bc_num n1, bc_num n2, size_t scale_min)
|
|||
bc_num _bc_do_sub(bc_num n1, bc_num n2, size_t scale_min)
|
||||
{
|
||||
bc_num diff;
|
||||
size_t diff_scale, diff_len;
|
||||
size_t min_scale, min_len;
|
||||
size_t borrow, count;
|
||||
/* The caller is guaranteed that n1 is always large. */
|
||||
size_t diff_len = EXPECTED(n1->n_len >= n2->n_len) ? n1->n_len : n2->n_len;
|
||||
size_t diff_scale = MAX(n1->n_scale, n2->n_scale);
|
||||
/* Same condition as EXPECTED before, but using EXPECTED again will make it slower. */
|
||||
size_t min_len = n1->n_len >= n2->n_len ? n2->n_len : n1->n_len;
|
||||
size_t min_scale = MIN(n1->n_scale, n2->n_scale);
|
||||
size_t min_bytes = min_len + min_scale;
|
||||
size_t borrow = 0;
|
||||
size_t count;
|
||||
int val;
|
||||
char *n1ptr, *n2ptr, *diffptr;
|
||||
|
||||
/* Allocate temporary storage. */
|
||||
diff_len = MAX(n1->n_len, n2->n_len);
|
||||
diff_scale = MAX(n1->n_scale, n2->n_scale);
|
||||
min_len = MIN(n1->n_len, n2->n_len);
|
||||
min_scale = MIN(n1->n_scale, n2->n_scale);
|
||||
diff = bc_new_num (diff_len, MAX(diff_scale, scale_min));
|
||||
|
||||
/* Initialize the subtract. */
|
||||
|
@ -142,9 +144,6 @@ bc_num _bc_do_sub(bc_num n1, bc_num n2, size_t scale_min)
|
|||
n2ptr = (char *) (n2->n_value + n2->n_len + n2->n_scale - 1);
|
||||
diffptr = (char *) (diff->n_value + diff_len + diff_scale - 1);
|
||||
|
||||
/* Subtract the numbers. */
|
||||
borrow = 0;
|
||||
|
||||
/* Take care of the longer scaled number. */
|
||||
if (n1->n_scale != min_scale) {
|
||||
/* n1 has the longer scale */
|
||||
|
@ -166,7 +165,59 @@ bc_num _bc_do_sub(bc_num n1, bc_num n2, size_t scale_min)
|
|||
}
|
||||
|
||||
/* Now do the equal length scale and integer parts. */
|
||||
for (count = 0; count < min_len + min_scale; count++) {
|
||||
count = 0;
|
||||
/* Uses SIMD to perform calculations at high speed. */
|
||||
if (min_bytes >= sizeof(BC_UINT_T)) {
|
||||
diffptr++;
|
||||
n1ptr++;
|
||||
n2ptr++;
|
||||
while (count + sizeof(BC_UINT_T) <= min_bytes) {
|
||||
diffptr -= sizeof(BC_UINT_T);
|
||||
n1ptr -= sizeof(BC_UINT_T);
|
||||
n2ptr -= sizeof(BC_UINT_T);
|
||||
|
||||
BC_UINT_T n1bytes;
|
||||
BC_UINT_T n2bytes;
|
||||
memcpy(&n1bytes, n1ptr, sizeof(n1bytes));
|
||||
memcpy(&n2bytes, n2ptr, sizeof(n2bytes));
|
||||
|
||||
#if BC_LITTLE_ENDIAN
|
||||
/* Little endian requires changing the order of bytes. */
|
||||
n1bytes = BC_BSWAP(n1bytes);
|
||||
n2bytes = BC_BSWAP(n2bytes);
|
||||
#endif
|
||||
|
||||
n1bytes -= n2bytes + borrow;
|
||||
/* If the most significant bit is 1, a carry down has occurred. */
|
||||
bool tmp_borrow = n1bytes & ((BC_UINT_T) 1 << (8 * sizeof(BC_UINT_T) - 1));
|
||||
|
||||
/*
|
||||
* Check the most significant bit of each of the bytes, and if it is 1, a carry down has
|
||||
* occurred. When carrying down occurs, due to the difference between decimal and hexadecimal
|
||||
* numbers, an extra 6 is added to the lower 4 bits.
|
||||
* Therefore, for a byte that has been carried down, set all the upper 4 bits to 0 and subtract
|
||||
* 6 from the lower 4 bits to adjust it to the correct value as a decimal number.
|
||||
*/
|
||||
BC_UINT_T borrow_mask = ((n1bytes & SWAR_REPEAT(0x80)) >> 7) * 0x06;
|
||||
n1bytes = (n1bytes & SWAR_REPEAT(0x0F)) - borrow_mask;
|
||||
|
||||
#if BC_LITTLE_ENDIAN
|
||||
/* Little endian requires changing the order of bytes back. */
|
||||
n1bytes = BC_BSWAP(n1bytes);
|
||||
#endif
|
||||
|
||||
memcpy(diffptr, &n1bytes, sizeof(n1bytes));
|
||||
|
||||
borrow = tmp_borrow;
|
||||
count += sizeof(BC_UINT_T);
|
||||
}
|
||||
diffptr--;
|
||||
n1ptr--;
|
||||
n2ptr--;
|
||||
}
|
||||
|
||||
/* Calculate the remaining bytes that are less than the size of BC_UINT_T using a normal loop. */
|
||||
for (; count < min_bytes; count++) {
|
||||
val = *n1ptr-- - *n2ptr-- - borrow;
|
||||
if (val < 0) {
|
||||
val += BASE;
|
||||
|
|
|
@ -34,6 +34,68 @@
|
|||
#include <stdbool.h>
|
||||
#include <stddef.h>
|
||||
|
||||
/* This will be 0x01010101 for 32-bit and 0x0101010101010101 for 64-bit */
|
||||
#define SWAR_ONES (~((size_t) 0) / 0xFF)
|
||||
/* This repeats a byte `x` into an entire 32/64-bit word.
|
||||
* Example: SWAR_REPEAT(0xAB) will be 0xABABABAB for 32-bit and 0xABABABABABABABAB for 64-bit. */
|
||||
#define SWAR_REPEAT(x) (SWAR_ONES * (x))
|
||||
|
||||
/* Bytes swap */
|
||||
#if defined(_MSC_VER)
|
||||
# include <stdlib.h>
|
||||
# define BSWAP32(u) _byteswap_ulong(u)
|
||||
# define BSWAP64(u) _byteswap_uint64(u)
|
||||
#else
|
||||
# ifdef __has_builtin
|
||||
# if __has_builtin(__builtin_bswap32)
|
||||
# define BSWAP32(u) __builtin_bswap32(u)
|
||||
# endif // __has_builtin(__builtin_bswap32)
|
||||
# if __has_builtin(__builtin_bswap64)
|
||||
# define BSWAP64(u) __builtin_bswap64(u)
|
||||
# endif // __has_builtin(__builtin_bswap64)
|
||||
# elif defined(__GNUC__)
|
||||
# define BSWAP32(u) __builtin_bswap32(u)
|
||||
# define BSWAP64(u) __builtin_bswap64(u)
|
||||
# endif // __has_builtin
|
||||
#endif // defined(_MSC_VER)
|
||||
#ifndef BSWAP32
|
||||
inline uint32_t BSWAP32(uint32_t u)
|
||||
{
|
||||
return (((u & 0xff000000) >> 24)
|
||||
| ((u & 0x00ff0000) >> 8)
|
||||
| ((u & 0x0000ff00) << 8)
|
||||
| ((u & 0x000000ff) << 24));
|
||||
}
|
||||
#endif
|
||||
#ifndef BSWAP64
|
||||
inline uint64_t BSWAP64(uint64_t u)
|
||||
{
|
||||
return (((u & 0xff00000000000000ULL) >> 56)
|
||||
| ((u & 0x00ff000000000000ULL) >> 40)
|
||||
| ((u & 0x0000ff0000000000ULL) >> 24)
|
||||
| ((u & 0x000000ff00000000ULL) >> 8)
|
||||
| ((u & 0x00000000ff000000ULL) << 8)
|
||||
| ((u & 0x0000000000ff0000ULL) << 24)
|
||||
| ((u & 0x000000000000ff00ULL) << 40)
|
||||
| ((u & 0x00000000000000ffULL) << 56));
|
||||
}
|
||||
#endif
|
||||
|
||||
#if SIZEOF_SIZE_T >= 8
|
||||
#define BC_BSWAP(u) BSWAP64(u)
|
||||
#define BC_UINT_T uint64_t
|
||||
#else
|
||||
#define BC_BSWAP(u) BSWAP32(u)
|
||||
#define BC_UINT_T uint32_t
|
||||
#endif
|
||||
|
||||
#ifdef WORDS_BIGENDIAN
|
||||
#define BC_LITTLE_ENDIAN 0
|
||||
#else
|
||||
#define BC_LITTLE_ENDIAN 1
|
||||
#endif
|
||||
|
||||
|
||||
/* routines */
|
||||
int _bc_do_compare (bc_num n1, bc_num n2, bool use_sign);
|
||||
bc_num _bc_do_add (bc_num n1, bc_num n2, size_t scale_min);
|
||||
|
|
|
@ -31,6 +31,7 @@
|
|||
|
||||
#include "bcmath.h"
|
||||
#include "convert.h"
|
||||
#include "private.h"
|
||||
#include <stdbool.h>
|
||||
#include <stddef.h>
|
||||
#ifdef __SSE2__
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue