ext/bcmath: Optimize bcdiv processing (#14660)

Co-authored-by: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Co-authored-by: Gina Peter Banyard <girgias@php.net>
2025-08-15 21:48:51 +02:00 · 2024-08-18 17:57:27 +09:00 · 2024-08-18 17:57:27 +09:00 · 8c704ab401
commit 8c704ab401
parent 7e5171d1f6
2 changed files with 428 additions and 192 deletions
--- a/ext/bcmath/libbcmath/src/bcmath.h
+++ b/ext/bcmath/libbcmath/src/bcmath.h
@ -146,7 +146,7 @@ bc_num bc_multiply(bc_num n1, bc_num n2, size_t scale);
 	*(result) = mul_ex;                             \
 } while (0)

-bool bc_divide(bc_num n1, bc_num n2, bc_num *quot, int scale);
+bool bc_divide(bc_num n1, bc_num n2, bc_num *quot, size_t scale);

 bool bc_modulo(bc_num num1, bc_num num2, bc_num *resul, size_t scale);

--- a/ext/bcmath/libbcmath/src/div.c
+++ b/ext/bcmath/libbcmath/src/div.c
@ -31,219 +31,455 @@

 #include "bcmath.h"
 #include "private.h"
+#include "convert.h"
 #include <stdbool.h>
 #include <stddef.h>
 #include <string.h>
 #include "zend_alloc.h"

+static const BC_VECTOR POW_10_LUT[9] = {
+	1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000
+};

-/* Some utility routines for the divide:  First a one digit multiply.
-   NUM (with SIZE digits) is multiplied by DIGIT and the result is
-   placed into RESULT.  It is written so that NUM and RESULT can be
-   the same pointers.  */
-
-static void _one_mult(unsigned char *num, size_t size, int digit, unsigned char *result)
+/*
+ * This function should be used when the divisor is not split into multiple chunks, i.e. when the size of the array is one.
+ * This is because the algorithm can be simplified.
+ * This function is therefore an optimized version of bc_standard_div().
+ */
+static inline void bc_fast_div(
+	BC_VECTOR *numerator_vectors, size_t numerator_arr_size, BC_VECTOR divisor_vector, BC_VECTOR *quot_vectors, size_t quot_arr_size)
 {
-	size_t carry, value;
-	unsigned char *nptr, *rptr;
+	size_t numerator_top_index = numerator_arr_size - 1;
+	size_t quot_top_index = quot_arr_size - 1;
+	for (size_t i = 0; i < quot_arr_size - 1; i++) {
+		if (numerator_vectors[numerator_top_index - i] < divisor_vector) {
+			quot_vectors[quot_top_index - i] = 0;
+			/* numerator_vectors[numerator_top_index - i] < divisor_vector, so there will be no overflow. */
+			numerator_vectors[numerator_top_index - i - 1] += numerator_vectors[numerator_top_index - i] * BC_VECTOR_BOUNDARY_NUM;
+			continue;
+		}
+		quot_vectors[quot_top_index - i] = numerator_vectors[numerator_top_index - i] / divisor_vector;
+		numerator_vectors[numerator_top_index - i] -= divisor_vector * quot_vectors[quot_top_index - i];
+		numerator_vectors[numerator_top_index - i - 1] += numerator_vectors[numerator_top_index - i] * BC_VECTOR_BOUNDARY_NUM;
+		numerator_vectors[numerator_top_index - i] = 0;
+	}
+	/* last */
+	quot_vectors[0] = numerator_vectors[0] / divisor_vector;
+}

-	if (digit == 0) {
-		memset(result, 0, size);
+/*
+ * Used when the divisor is split into 2 or more chunks.
+ * This use the restoring division algorithm.
+ * https://en.wikipedia.org/wiki/Division_algorithm#Restoring_division
+ */
+static inline void bc_standard_div(
+	BC_VECTOR *numerator_vectors, size_t numerator_arr_size,
+	BC_VECTOR *divisor_vectors, size_t divisor_arr_size, size_t divisor_len,
+	BC_VECTOR *quot_vectors, size_t quot_arr_size
+) {
+	size_t numerator_top_index = numerator_arr_size - 1;
+	size_t divisor_top_index = divisor_arr_size - 1;
+	size_t quot_top_index = quot_arr_size - 1;
+
+	BC_VECTOR div_carry = 0;
+
+	/*
+	 * Errors might occur between the true quotient and the temporary quotient calculated using only the high order digits.
+	 * For example, the true quotient of 240 / 121 is 1.
+	 * However, if it is calculated using only the high-order digits (24 / 12), we would get 2.
+	 * We refer to this value of 2 as the temporary quotient.
+	 * We also define E to be error between the true quotient and the temporary quotient,
+	 * which in this case, is 1.
+	 *
+	 * Another example: Calculating 999_0000_0000 / 1000_9999 with base 10000.
+	 * The true quotient is 9980, but if it is calculated using only the high-order digits (999 / 1000), we would get 0
+	 * If the temporary quotient is 0, we need to carry the digits to the next division, which is 999_0000 / 1000.
+	 * The new temporary quotient we get is 9990, with error E = 10.
+	 *
+	 * Because we use the restoring division we need to perform E restorations, which can be significant if E is large.
+	 *
+	 * Therefore, for the error E to be at most 1 we adjust the number of high-order digits used to calculate the temporary quotient as follows:
+	 * - Include BC_VECTOR_SIZE + 1 digits of the divisor used in the calculation of the temporary quotient.
+	      The missing digits are filled in from the next array element.
+	 * - Adjust the number of digits in the numerator similarly to what was done for the divisor.
+	 *
+	 * e.g.
+	 * numerator = 123456780000
+	 * divisor = 123456789
+	 * base = 10000
+	 * numerator_arr = [0, 5678, 1234]
+	 * divisor_arr = [6789, 2345, 1]
+	 * numerator_top = 1234
+	 * divisor_top = 1
+	 * divisor_top_tmp = 12345 (+4 digits)
+	 * numerator_top_tmp = 12345678 (+4 digits)
+	 * tmp_quot = numerator_top_tmp / divisor_top_tmp = 1000
+	 * tmp_rem = -9000
+	 * tmp_quot is too large, so tmp_quot--, tmp_rem += divisor (restoring)
+	 * quot = 999
+	 * rem = 123447789
+	 *
+	 * Details:
+	 * Suppose that when calculating the temporary quotient, only the high-order elements of the BC_VECTOR array are used.
+	 * At this time, numerator and divisor can be considered to be decomposed as follows. (Here, B = 10^b, any b ∈ ℕ, any k ∈ ℕ)
+	 * numerator = numerator_high * B^k + numerator_low
+	 * divisor = divisor_high * B^k + divisor_low
+	 *
+	 * The error E can be expressed by the following formula.
+	 *
+	 * E = (numerator_high * B^k) / (divisor_high * B^k) - (numerator_high * B^k + numerator_low) / (divisor_high * B^k + divisor_low)
+	 * <=> E = numerator_high / divisor_high - (numerator_high * B^k + numerator_low) / (divisor_high * B^k + divisor_low)
+	 * <=> E = (numerator_high * (divisor_high * B^k + divisor_low) - (numerator_high * B^k + numerator_low) * divisor_high) / (divisor_high * (divisor_high * B^k + divisor_low))
+	 * <=> E = (numerator_high * divisor_low - divisor_high * numerator_low) / (divisor_high * (divisor_high * B^k + divisor_low))
+	 *
+	 * Find the error MAX_E when the error E is maximum (0 <= E <= MAX_E).
+	 * First, numerator_high, which only exists in the numerator, uses its maximum value.
+	 * Considering carry-back, numerator_high can be expressed as follows.
+	 * numerator_high = divisor_high * B
+	 * Also, numerator_low is only present in the numerator, but since this is a subtraction, use the smallest possible value here, 0.
+	 * numerator_low = 0
+	 *
+	 * MAX_E = (numerator_high * divisor_low - divisor_high * numerator_low) / (divisor_high * (divisor_high * B^k + divisor_low))
+	 * <=> MAX_E = (divisor_high * B * divisor_low) / (divisor_high * (divisor_high * B^k + divisor_low))
+	 *
+	 * divisor_low uses the maximum value.
+	 * divisor_low = B^k - 1
+	 * MAX_E = (divisor_high * B * divisor_low) / (divisor_high * (divisor_high * B^k + divisor_low))
+	 * <=> MAX_E = (divisor_high * B * (B^k - 1)) / (divisor_high * (divisor_high * B^k + B^k - 1))
+	 *
+	 * divisor_high uses the minimum value, but want to see how the number of digits affects the error, so represent
+	 * the minimum value as:
+	 * divisor_high = 10^x (any x ∈ ℕ)
+	 * Since B = 10^b, the formula becomes:
+	 * MAX_E = (divisor_high * B * (B^k - 1)) / (divisor_high * (divisor_high * B^k + B^k - 1))
+	 * <=> MAX_E = (10^x * 10^b * ((10^b)^k - 1)) / (10^x * (10^x * (10^b)^k + (10^b)^k - 1))
+	 *
+	 * Now let's make an approximation. Remove -1 from the numerator. Approximate the numerator to be
+	 * large and the denominator to be small, such that MAX_E is less than this expression.
+	 * Also, the denominator "(10^b)^k - 1" is never a negative value, so delete it.
+	 *
+	 * MAX_E = (10^x * 10^b * ((10^b)^k - 1)) / (10^x * (10^x * (10^b)^k + (10^b)^k - 1))
+	 * MAX_E < (10^x * 10^b * (10^b)^k) / (10^x * 10^x * (10^b)^k)
+	 * <=> MAX_E < 10^b / 10^x
+	 * <=> MAX_E < 10^(b - x)
+	 *
+	 * Therefore, found that if the number of digits in base B and divisor_high are equal, the error will be less
+	 * than 1 regardless of the number of digits in the value of k.
+	 *
+	 * Here, B is BC_VECTOR_BOUNDARY_NUM, so adjust the number of digits in high part of divisor to be BC_VECTOR_SIZE + 1.
+	 */
+	/* the number of usable digits, thus divisor_len % BC_VECTOR_SIZE == 0 implies we have BC_VECTOR_SIZE usable digits */
+	size_t divisor_top_digits = divisor_len % BC_VECTOR_SIZE;
+	if (divisor_top_digits == 0) {
+		divisor_top_digits = BC_VECTOR_SIZE;
+	}
+
+	size_t high_part_shift = POW_10_LUT[BC_VECTOR_SIZE - divisor_top_digits + 1];
+	size_t low_part_shift = POW_10_LUT[divisor_top_digits - 1];
+	BC_VECTOR divisor_high_part = divisor_vectors[divisor_top_index] * high_part_shift + divisor_vectors[divisor_top_index - 1] / low_part_shift;
+	for (size_t i = 0; i < quot_arr_size; i++) {
+		BC_VECTOR numerator_high_part = numerator_vectors[numerator_top_index - i] * high_part_shift + numerator_vectors[numerator_top_index - i - 1] / low_part_shift;
+
+		/*
+		 * Determine the temporary quotient.
+		 * The maximum value of numerator_high is divisor_high * B in the previous example, so here numerator_high_part is
+		 * divisor_high_part * BC_VECTOR_BOUNDARY_NUM.
+		 * The number of digits in divisor_high_part is BC_VECTOR_SIZE + 1, so even if divisor_high_part is at its maximum value,
+		 * it will never overflow here.
+		 */
+		numerator_high_part += div_carry * BC_VECTOR_BOUNDARY_NUM * high_part_shift;
+
+		/* numerator_high_part < divisor_high_part => quotient == 0 */
+		if (numerator_high_part < divisor_high_part) {
+			quot_vectors[quot_top_index - i] = 0;
+			div_carry = numerator_vectors[numerator_top_index - i];
+			numerator_vectors[numerator_top_index - i] = 0;
+			continue;
+		}
+
+		BC_VECTOR quot_guess = numerator_high_part / divisor_high_part;
+
+		/* For sub, add the remainder to the high-order digit */
+		numerator_vectors[numerator_top_index - i] += div_carry * BC_VECTOR_BOUNDARY_NUM;
+
+		/*
+		 * It is impossible for the temporary quotient to underestimate the true quotient.
+		 * Therefore a temporary quotient of 0 implies the true quotient is also 0.
+		 */
+		if (quot_guess == 0) {
+			quot_vectors[quot_top_index - i] = 0;
+			div_carry = numerator_vectors[numerator_top_index - i];
+			numerator_vectors[numerator_top_index - i] = 0;
+			continue;
+		}
+
+		/* sub */
+		BC_VECTOR sub;
+		BC_VECTOR borrow = 0;
+		BC_VECTOR *numerator_calc_bottom = numerator_vectors + numerator_arr_size - divisor_arr_size - i;
+		size_t j;
+		for (j = 0; j < divisor_arr_size - 1; j++) {
+			sub = divisor_vectors[j] * quot_guess + borrow;
+			BC_VECTOR sub_low = sub % BC_VECTOR_BOUNDARY_NUM;
+			borrow = sub / BC_VECTOR_BOUNDARY_NUM;
+
+			if (numerator_calc_bottom[j] >= sub_low) {
+				numerator_calc_bottom[j] -= sub_low;
 			} else {
-		if (digit == 1) {
-			memcpy(result, num, size);
+				numerator_calc_bottom[j] += BC_VECTOR_BOUNDARY_NUM - sub_low;
+				borrow++;
+			}
+		}
+		/* last digit sub */
+		sub = divisor_vectors[j] * quot_guess + borrow;
+		bool neg_remainder = numerator_calc_bottom[j] < sub;
+		numerator_calc_bottom[j] -= sub;
+
+		/* If the temporary quotient is too large, add back divisor */
+		BC_VECTOR carry = 0;
+		if (neg_remainder) {
+			quot_guess--;
+			for (j = 0; j < divisor_arr_size - 1; j++) {
+				numerator_calc_bottom[j] += divisor_vectors[j] + carry;
+				carry = numerator_calc_bottom[j] / BC_VECTOR_BOUNDARY_NUM;
+				numerator_calc_bottom[j] %= BC_VECTOR_BOUNDARY_NUM;
+			}
+			/* last add */
+			numerator_calc_bottom[j] += divisor_vectors[j] + carry;
+		}
+
+		quot_vectors[quot_top_index - i] = quot_guess;
+		div_carry = numerator_vectors[numerator_top_index - i];
+		numerator_vectors[numerator_top_index - i] = 0;
+	}
+}
+
+static void bc_do_div(
+	const char *numerator, size_t numerator_readable_len, size_t numerator_bottom_extension,
+	const char *divisor, size_t divisor_len, bc_num *quot, size_t quot_len
+) {
+	size_t divisor_arr_size = (divisor_len + BC_VECTOR_SIZE - 1) / BC_VECTOR_SIZE;
+	size_t numerator_arr_size = (numerator_readable_len + numerator_bottom_extension + BC_VECTOR_SIZE - 1) / BC_VECTOR_SIZE;
+	size_t quot_arr_size = numerator_arr_size - divisor_arr_size + 1;
+	size_t quot_real_arr_size = MIN(quot_arr_size, (quot_len + BC_VECTOR_SIZE - 1) / BC_VECTOR_SIZE);
+
+	BC_VECTOR *numerator_vectors = safe_emalloc(numerator_arr_size + divisor_arr_size + quot_arr_size, sizeof(BC_VECTOR), 0);
+	BC_VECTOR *divisor_vectors = numerator_vectors + numerator_arr_size;
+	BC_VECTOR *quot_vectors = divisor_vectors + divisor_arr_size;
+
+	/* Fill with zeros and convert as many vector elements as needed */
+	size_t numerator_vector_count = 0;
+	while (numerator_bottom_extension >= BC_VECTOR_SIZE) {
+		numerator_vectors[numerator_vector_count] = 0;
+		numerator_bottom_extension -= BC_VECTOR_SIZE;
+		numerator_vector_count++;
+	}
+
+	size_t numerator_bottom_read_len = BC_VECTOR_SIZE - numerator_bottom_extension;
+
+	size_t base;
+	size_t numerator_read = 0;
+	if (numerator_bottom_read_len < BC_VECTOR_SIZE) {
+		numerator_read = MIN(numerator_bottom_read_len, numerator_readable_len);
+		base = POW_10_LUT[numerator_bottom_extension];
+		numerator_vectors[numerator_vector_count] = 0;
+		for (size_t i = 0; i < numerator_read; i++) {
+			numerator_vectors[numerator_vector_count] += *numerator * base;
+			base *= BASE;
+			numerator--;
+		}
+		numerator_vector_count++;
+	}
+
+	/* Bulk convert numerator and divisor to vectors */
+	if (numerator_readable_len > numerator_read) {
+		bc_convert_to_vector(numerator_vectors + numerator_vector_count, numerator, numerator_readable_len - numerator_read);
+	}
+	bc_convert_to_vector(divisor_vectors, divisor, divisor_len);
+
+	/* Do the division */
+	if (divisor_arr_size == 1) {
+		bc_fast_div(numerator_vectors, numerator_arr_size, divisor_vectors[0], quot_vectors, quot_arr_size);
 	} else {
-			/* Initialize */
-			nptr = (unsigned char *) (num + size - 1);
-			rptr = (unsigned char *) (result + size - 1);
-			carry = 0;
-
-			while (size-- > 0) {
-				value = *nptr-- * digit + carry;
-				*rptr-- = value % BASE;
-				carry = value / BASE;
+		bc_standard_div(numerator_vectors, numerator_arr_size, divisor_vectors, divisor_arr_size, divisor_len, quot_vectors, quot_arr_size);
 	}

-			if (carry != 0) {
-				*rptr = carry;
-			}
-		}
-	}
+	/* Convert to bc_num */
+	char *qptr = (*quot)->n_value;
+	char *qend = qptr + quot_len - 1;
+
+	size_t i;
+	for (i = 0; i < quot_real_arr_size - 1; i++) {
+#if BC_VECTOR_SIZE == 4
+		bc_write_bcd_representation(quot_vectors[i], qend - 3);
+		qend -= 4;
+#else
+		bc_write_bcd_representation(quot_vectors[i] / 10000, qend - 7);
+		bc_write_bcd_representation(quot_vectors[i] % 10000, qend - 3);
+		qend -= 8;
+#endif
 	}

+	while (qend >= qptr) {
+		*qend-- = quot_vectors[i] % BASE;
+		quot_vectors[i] /= BASE;
+	}

-/* The full division routine. This computes N1 / N2.  It returns
-   true if the division is ok and the result is in QUOT.  The number of
-   digits after the decimal point is SCALE. It returns false if division
-   by zero is tried. The algorithm is found in Knuth Vol 2. p237. */
-bool bc_divide(bc_num n1, bc_num n2, bc_num *quot, int scale)
+	efree(numerator_vectors);
+}
+
+bool bc_divide(bc_num numerator, bc_num divisor, bc_num *quot, size_t scale)
 {
-	bc_num qval;
-	unsigned char *num1, *num2;
-	unsigned char *ptr1, *ptr2, *n2ptr, *qptr;
-	int scale1, val;
-	unsigned int len1, len2, scale2, qdigits, extra, count;
-	unsigned int qdig, qguess, borrow, carry;
-	unsigned char *mval;
-	unsigned int norm;
-	bool zero;
-
-	/* Test for divide by zero. */
-	if (bc_is_zero(n2)) {
+	/* divide by zero */
+	if (bc_is_zero(divisor)) {
 		return false;
 	}

-	/* Test for divide by 1. If it is we must truncate. */
-	if (n2->n_scale == 0 && n2->n_len == 1 && *n2->n_value == 1) {
-		qval = bc_new_num (n1->n_len, scale);
-		qval->n_sign = (n1->n_sign == n2->n_sign ? PLUS : MINUS);
-		memcpy(qval->n_value, n1->n_value, n1->n_len + MIN(n1->n_scale, scale));
 	bc_free_num(quot);
-		*quot = qval;
-	}

-	/* Set up the divide.  Move the decimal point on n1 by n2's scale.
-	   Remember, zeros on the end of num2 are wasted effort for dividing. */
-	scale2 = n2->n_scale;
-	n2ptr = (unsigned char *) n2->n_value + n2->n_len + scale2 - 1;
-	while ((scale2 > 0) && (*n2ptr == 0)) {
-		scale2--;
-		n2ptr--;
-	}
-
-	len1 = n1->n_len + scale2;
-	scale1 = n1->n_scale - scale2;
-	extra = MAX(scale - scale1, 0);
-
-	num1 = (unsigned char *) safe_emalloc(1, n1->n_len + n1->n_scale, extra + 2);
-	memset(num1, 0, n1->n_len + n1->n_scale + extra + 2);
-	memcpy(num1 + 1, n1->n_value, n1->n_len + n1->n_scale);
-
-	len2 = n2->n_len + scale2;
-	num2 = (unsigned char *) safe_emalloc(1, len2, 1);
-	memcpy(num2, n2->n_value, len2);
-	*(num2 + len2) = 0;
-	n2ptr = num2;
-	while (*n2ptr == 0) {
-		n2ptr++;
-		len2--;
-	}
-
-	/* Calculate the number of quotient digits. */
-	if (len2 > len1 + scale) {
-		qdigits = scale + 1;
-		zero = true;
-	} else {
-		zero = false;
-		if (len2 > len1) {
-			/* One for the zero integer part. */
-			qdigits = scale + 1;
-		} else {
-			qdigits = len1 - len2 + scale + 1;
-		}
-	}
-
-	/* Allocate and zero the storage for the quotient. */
-	qval = bc_new_num (qdigits - scale, scale);
-
-	/* Allocate storage for the temporary storage mval. */
-	mval = (unsigned char *) safe_emalloc(1, len2, 1);
-
-	/* Now for the full divide algorithm. */
-	if (!zero) {
-		/* Normalize */
-		norm = 10 / ((int) *n2ptr + 1);
-		if (norm != 1) {
-			_one_mult(num1, len1 + scale1 + extra + 1, norm, num1);
-			_one_mult(n2ptr, len2, norm, n2ptr);
-		}
-
-		/* Initialize divide loop. */
-		qdig = 0;
-		if (len2 > len1) {
-			qptr = (unsigned char *) qval->n_value + len2 - len1;
-		} else {
-			qptr = (unsigned char *) qval->n_value;
-		}
-
-		/* Loop */
-		while (qdig <= len1 + scale - len2) {
-			/* Calculate the quotient digit guess. */
-			if (*n2ptr == num1[qdig]) {
-				qguess = 9;
-			} else {
-				qguess = (num1[qdig] * 10 + num1[qdig + 1]) / *n2ptr;
-			}
-
-			/* Test qguess. */
-			if (n2ptr[1] * qguess > (num1[qdig] * 10 + num1[qdig + 1] - *n2ptr * qguess) * 10 + num1[qdig + 2]) {
-				qguess--;
-				/* And again. */
-				if (n2ptr[1] * qguess > (num1[qdig] * 10 + num1[qdig + 1] - *n2ptr * qguess) * 10 + num1[qdig + 2]) {
-					qguess--;
-				}
-			}
-
-			/* Multiply and subtract. */
-			borrow = 0;
-			if (qguess != 0) {
-				*mval = 0;
-				_one_mult(n2ptr, len2, qguess, mval + 1);
-				ptr1 = (unsigned char *) num1 + qdig + len2;
-				ptr2 = (unsigned char *) mval + len2;
-				for (count = 0; count < len2 + 1; count++) {
-					val = (int) *ptr1 - (int) *ptr2-- - borrow;
-					if (val < 0) {
-						val += 10;
-						borrow = 1;
-					} else {
-						borrow = 0;
-					}
-					*ptr1-- = val;
-				}
-			}
-
-			/* Test for negative result. */
-			if (borrow == 1) {
-				qguess--;
-				ptr1 = (unsigned char *) num1 + qdig + len2;
-				ptr2 = (unsigned char *) n2ptr + len2 - 1;
-				carry = 0;
-				for (count = 0; count < len2; count++) {
-					val = (int) *ptr1 + (int) *ptr2-- + carry;
-					if (val > 9) {
-						val -= 10;
-						carry = 1;
-					} else {
-						carry = 0;
-					}
-					*ptr1-- = val;
-				}
-				if (carry == 1) {
-					*ptr1 = (*ptr1 + 1) % 10;
-				}
-			}
-
-			/* We now know the quotient digit. */
-			*qptr++ = qguess;
-			qdig++;
-		}
-	}
-
-	/* Clean up and return the number. */
-	qval->n_sign = (n1->n_sign == n2->n_sign ? PLUS : MINUS);
-	if (bc_is_zero(qval)) {
-		qval->n_sign = PLUS;
-	}
-	_bc_rm_leading_zeros(qval);
-	bc_free_num(quot);
-	*quot = qval;
-
-	/* Clean up temporary storage. */
-	efree(mval);
-	efree(num1);
-	efree(num2);
-
-	/* Everything is OK. */
+	/* If numerator is zero, the quotient is always zero. */
+	if (bc_is_zero(numerator)) {
+		*quot = bc_copy_num(BCG(_zero_));
+		return true;
+	}
+
+	/* If divisor is 1 / -1, the quotient's n_value is equal to numerator's n_value. */
+	if (_bc_do_compare(divisor, BCG(_one_), scale, false) == BCMATH_EQUAL) {
+		size_t quot_scale = MIN(numerator->n_scale, scale);
+		*quot = bc_new_num_nonzeroed(numerator->n_len, quot_scale);
+		char *qptr = (*quot)->n_value;
+		memcpy(qptr, numerator->n_value, numerator->n_len + quot_scale);
+		(*quot)->n_sign = numerator->n_sign == divisor->n_sign ? PLUS : MINUS;
+		_bc_rm_leading_zeros(*quot);
+		return true;
+	}
+
+	char *numeratorptr = numerator->n_value;
+	char *numeratorend = numeratorptr + numerator->n_len + numerator->n_scale - 1;
+	size_t numerator_len = numerator->n_len;
+	size_t numerator_scale = numerator->n_scale;
+
+	char *divisorptr = divisor->n_value;
+	char *divisorend = divisorptr + divisor->n_len + divisor->n_scale - 1;
+	size_t divisor_len = divisor->n_len;
+	size_t divisor_scale = divisor->n_scale;
+	size_t divisor_int_right_zeros = 0;
+
+	/* remove divisor trailing zeros */
+	while (*divisorend == 0 && divisor_scale > 0) {
+		divisorend--;
+		divisor_scale--;
+	}
+	while (*divisorend == 0) {
+		divisorend--;
+		divisor_int_right_zeros++;
+	}
+
+	if (*numeratorptr == 0 && numerator_len == 1) {
+		numeratorptr++;
+		numerator_len = 0;
+	}
+
+	size_t numerator_top_extension = 0;
+	size_t numerator_bottom_extension = 0;
+	if (divisor_scale > 0) {
+		/*
+		 * e.g. divisor_scale = 4
+		 * divisor = .0002, to be 2 or divisor = 200.001, to be 200001
+		 * numerator = .03, to be 300 or numerator = .000003, to be .03
+		 * numerator may become longer than the original data length due to the addition of
+		 * trailing zeros in the integer part.
+		 */
+		numerator_len += divisor_scale;
+		numerator_bottom_extension = numerator_scale < divisor_scale ? divisor_scale - numerator_scale : 0;
+		numerator_scale = numerator_scale > divisor_scale ? numerator_scale - divisor_scale : 0;
+		divisor_len += divisor_scale;
+		divisor_scale = 0;
+	} else if (divisor_int_right_zeros > 0) {
+		/*
+		 * e.g. divisor_int_right_zeros = 4
+		 * divisor = 2000, to be 2
+		 * numerator = 30, to be .03 or numerator = 30000, to be 30
+		 * Also, numerator may become longer than the original data length due to the addition of
+		 * leading zeros in the fractional part.
+		 */
+		numerator_top_extension = numerator_len < divisor_int_right_zeros ? divisor_int_right_zeros - numerator_len : 0;
+		numerator_len = numerator_len > divisor_int_right_zeros ? numerator_len - divisor_int_right_zeros : 0;
+		numerator_scale += divisor_int_right_zeros;
+		divisor_len -= divisor_int_right_zeros;
+		divisor_scale = 0;
+	}
+
+	/* remove numerator leading zeros */
+	while (*numeratorptr == 0 && numerator_len > 0) {
+		numeratorptr++;
+		numerator_len--;
+	}
+	/* remove divisor leading zeros */
+	while (*divisorptr == 0) {
+		divisorptr++;
+		divisor_len--;
+	}
+
+	/* Considering the scale specification, the quotient is always 0 if this condition is met */
+	if (divisor_len > numerator_len + scale) {
+		*quot = bc_copy_num(BCG(_zero_));
+		return true;
+	}
+
+	/* set scale to numerator */
+	if (numerator_scale > scale) {
+		size_t scale_diff = numerator_scale - scale;
+		if (numerator_bottom_extension > scale_diff) {
+			numerator_bottom_extension -= scale_diff;
+		} else {
+			numerator_bottom_extension = 0;
+			numeratorend -= scale_diff - numerator_bottom_extension;
+		}
+	} else {
+		numerator_bottom_extension += scale - numerator_scale;
+	}
+	numerator_scale = scale;
+
+	/* Length of numerator data that can be read */
+	size_t numerator_readable_len = numeratorend - numeratorptr + 1;
+
+	/* If divisor is 1 here, return the result of adjusting the decimal point position of numerator. */
+	if (divisor_len == 1 && *divisorptr == 1) {
+		if (numerator_len == 0) {
+			numerator_len = 1;
+			numerator_top_extension++;
+		}
+		size_t quot_scale = numerator_scale > numerator_bottom_extension ? numerator_scale - numerator_bottom_extension : 0;
+		numerator_bottom_extension = numerator_scale < numerator_bottom_extension ? numerator_bottom_extension - numerator_scale : 0;
+
+		*quot = bc_new_num_nonzeroed(numerator_len, quot_scale);
+		char *qptr = (*quot)->n_value;
+		for (size_t i = 0; i < numerator_top_extension; i++) {
+			*qptr++ = 0;
+		}
+		memcpy(qptr, numeratorptr, numerator_readable_len);
+		qptr += numerator_readable_len;
+		for (size_t i = 0; i < numerator_bottom_extension; i++) {
+			*qptr++ = 0;
+		}
+		(*quot)->n_sign = numerator->n_sign == divisor->n_sign ? PLUS : MINUS;
+		return true;
+	}
+
+	size_t quot_full_len;
+	if (divisor_len > numerator_len) {
+		*quot = bc_new_num_nonzeroed(1, scale);
+		quot_full_len = 1 + scale;
+	} else {
+		*quot = bc_new_num_nonzeroed(numerator_len - divisor_len + 1, scale);
+		quot_full_len = numerator_len - divisor_len + 1 + scale;
+	}
+
+	/* do divide */
+	bc_do_div(numeratorend, numerator_readable_len, numerator_bottom_extension, divisorend, divisor_len, quot, quot_full_len);
+	(*quot)->n_sign = numerator->n_sign == divisor->n_sign ? PLUS : MINUS;
+	_bc_rm_leading_zeros(*quot);
+
 	return true;
 }