📄 bignumber.cpp

📁 绝对好东东
💻 CPP
📖 第 1 页 / 共 5 页
字号:
上一页 1 2 3 45
			dh = dh << shift | dl >> (32-shift);
			dl <<= shift;


		}


		for (shift = 0; (dh & (BNWORD32)1 << 32-1-shift)) == 0;
								shift++)
			;
		if (shift) {
		}
		dh = dh << shift | dl >> (32-shift);
		shift = 0;
		while (dh
	}
#endif

	dh = BIGLITTLE(*(d-dlen),*(d+(dlen-1)));
	ASSERT(dh);

	/* Normalize the divisor */
	shift = 0;
	r = dh;
	i = 32/2;
	do {
		if (r >> i)
			r >>= i;
		else
			shift += i;
	} while ((i /= 2) != 0);

	nh = 0;
	if (shift) {
		bniLshift_32(d, dlen, shift);
		dh = BIGLITTLE(*(d-dlen),*(d+(dlen-1)));
		nh = bniLshift_32(n, nlen, shift);
	}

	/* Assert that dh is now normalized */
	ASSERT(dh >> (32-1));

	/* Also get the second-most significant word of the divisor */
	dl = BIGLITTLE(*(d-(dlen-1)),*(d+(dlen-2)));

	/*
	 * Adjust pointers: n to point to least significant end of first
	 * first subtract, and q to one the most-significant end of the
	 * quotient array.
	 */
	BIGLITTLE(n -= qlen,n += qlen);
	BIGLITTLE(q -= qlen,q += qlen);

	/* Fetch the most significant stored word of the dividend */
	nm = BIGLITTLE(*(n-dlen),*(n+(dlen-1)));

	/*
	 * Compute the first digit of the quotient, based on the
	 * first two words of the dividend (the most significant of which
	 * is the overflow word h).
	 */
	if (nh) {
		ASSERT(nh < dh);
		r = bniDiv21_32(&qhat, nh, nm, dh);
	} else if (nm >= dh) {
		qhat = nm/dh;
		r = nm % dh;
	} else {	/* Quotient is zero */
		qhigh = 0;
		goto divloop;
	}

	/* Now get the third most significant word of the dividend */
	nl = BIGLITTLE(*(n-(dlen-1)),*(n+(dlen-2)));

	/*
	 * Correct qhat, the estimate of quotient digit.
	 * qhat can only be high, and at most two words high,
	 * so the loop can be unrolled and abbreviated.
	 */
#ifdef mul32_ppmm
	mul32_ppmm(nm, t32, qhat, dl);
	if (nm > r || (nm == r && t32 > nl)) {
		/* Decrement qhat and adjust comparison parameters */
		qhat--;
		if ((r += dh) >= dh) {
			nm -= (t32 < dl);
			t32 -= dl;
			if (nm > r || (nm == r && t32 > nl))
				qhat--;
		}
	}
#elif defined(BNWORD64)
	t64 = (BNWORD64)qhat * dl;
	if (t64 > ((BNWORD64)r << 32) + nl) {
		/* Decrement qhat and adjust comparison parameters */
		qhat--;
		if ((r += dh) > dh) {
			t64 -= dl;
			if (t64 > ((BNWORD64)r << 32) + nl)
				qhat--;
		}
	}
#else /* Use bniMulN1_32 */
	bniMulN1_32(BIGLITTLE(t2+2,t2), &dl, 1, qhat);
	if (t2high > r || (t2high == r && t2low > nl)) {
		/* Decrement qhat and adjust comparison parameters */
		qhat--;
		if ((r += dh) >= dh) {
			t2high -= (t2low < dl);
			t2low -= dl;
			if (t2high > r || (t2high == r && t2low > nl))
				qhat--;
		}
	}
#endif

	/* Do the multiply and subtract */
	r = bniMulSub1_32(n, d, dlen, qhat);
	/* If there was a borrow, add back once. */
	if (r > nh) {	/* Borrow? */
		(void)bniAddN_32(n, d, dlen);
		qhat--;
	}

	/* Remember the first quotient digit. */
	qhigh = qhat;

	/* Now, the main division loop: */
divloop:
	while (qlen--) {

		/* Advance n */
		nh = BIGLITTLE(*(n-dlen),*(n+(dlen-1)));
		BIGLITTLE(++n,--n);
		nm = BIGLITTLE(*(n-dlen),*(n+(dlen-1)));

		if (nh == dh) {
			qhat = ~(BNWORD32)0;
			/* Optimized computation of r = (nh,nm) - qhat * dh */
			r = nh + nm;
			if (r < nh)
				goto subtract;
		} else {
			ASSERT(nh < dh);
			r = bniDiv21_32(&qhat, nh, nm, dh);
		}

		nl = BIGLITTLE(*(n-(dlen-1)),*(n+(dlen-2)));
#ifdef mul32_ppmm
		mul32_ppmm(nm, t32, qhat, dl);
		if (nm > r || (nm == r && t32 > nl)) {
			/* Decrement qhat and adjust comparison parameters */
			qhat--;
			if ((r += dh) >= dh) {
				nm -= (t32 < dl);
				t32 -= dl;
				if (nm > r || (nm == r && t32 > nl))
					qhat--;
			}
		}
#elif defined(BNWORD64)
		t64 = (BNWORD64)qhat * dl;
		if (t64 > ((BNWORD64)r<<32) + nl) {
			/* Decrement qhat and adjust comparison parameters */
			qhat--;
			if ((r += dh) >= dh) {
				t64 -= dl;
				if (t64 > ((BNWORD64)r << 32) + nl)
					qhat--;
			}
		}
#else /* Use bniMulN1_32 */
		bniMulN1_32(BIGLITTLE(t2+2,t2), &dl, 1, qhat);
		if (t2high > r || (t2high == r && t2low > nl)) {
			/* Decrement qhat and adjust comparison parameters */
			qhat--;
			if ((r += dh) >= dh) {
				t2high -= (t2low < dl);
				t2low -= dl;
				if (t2high > r || (t2high == r && t2low > nl))
					qhat--;
			}
		}
#endif

		/*
		 * As a point of interest, note that it is not worth checking
		 * for qhat of 0 or 1 and installing special-case code.  These
		 * occur with probability 2^-32, so spending 1 cycle to check
		 * for them is only worth it if we save more than 2^15 cycles,
		 * and a multiply-and-subtract for numbers in the 1024-bit
		 * range just doesn't take that long.
		 */
subtract:
		/*
		 * n points to the least significant end of the substring
		 * of n to be subtracted from.  qhat is either exact or
		 * one too large.  If the subtract gets a borrow, it was
		 * one too large and the divisor is added back in.  It's
		 * a dlen+1 word add which is guaranteed to produce a
		 * carry out, so it can be done very simply.
		 */
		r = bniMulSub1_32(n, d, dlen, qhat);
		if (r > nh) {	/* Borrow? */
			(void)bniAddN_32(n, d, dlen);
			qhat--;
		}
		/* Store the quotient digit */
		BIGLITTLE(*q++,*--q) = qhat;
	}
	/* Tah dah! */

	if (shift) {
		bniRshift_32(d, dlen, shift);
		bniRshift_32(n, dlen, shift);
	}

	return qhigh;
}
#endif

/*
 * Find the negative multiplicative inverse of x (x must be odd!) modulo 2^32.
 *
 * This just performs Newton's iteration until it gets the
 * inverse.  The initial estimate is always correct to 3 bits, and
 * sometimes 4.  The number of valid bits doubles each iteration.
 * (To prove it, assume x * y == 1 (mod 2^n), and introduce a variable
 * for the error mod 2^2n.  x * y == 1 + k*2^n (mod 2^2n) and follow
 * the iteration through.)
 */
#ifndef bniMontInv1_32
BNWORD32
bniMontInv1_32(BNWORD32 const x)
{
        BNWORD32 y = x, z;

	ASSERT(x & 1);
 
        while ((z = x*y) != 1)
                y *= 2 - z;
        return -y;
}
#endif /* !bniMontInv1_32 */

#if defined(BNWORD64) && PRODUCT_SCAN
/*
 * Test code for product-scanning Montgomery reduction.
 * This seems to slow the C code down rather than speed it up.
 *
 * The first loop computes the Montgomery multipliers, storing them over
 * the low half of the number n.
 *
 * The second half multiplies the upper half, adding in the modulus
 * times the Montgomery multipliers.  The results of this multiply
 * are stored.
 */
void
bniMontReduce_32(BNWORD32 *n, BNWORD32 const *mod, unsigned mlen, BNWORD32 inv)
{
	BNWORD64 x, y;
	BNWORD32 const *pm;
	BNWORD32 *pn;
	BNWORD32 t;
	unsigned carry;
	unsigned i, j;

	/* Special case of zero */
	if (!mlen)
		return;

	/* Pass 1 - compute Montgomery multipliers */
	/* First iteration can have certain simplifications. */
	t = BIGLITTLE(n[-1],n[0]);
	x = t;
	t *= inv;
	BIGLITTLE(n[-1], n[0]) = t;
	x += (BNWORD64)t * BIGLITTLE(mod[-1],mod[0]); /* Can't overflow */
	ASSERT((BNWORD32)x == 0);
	x = x >> 32;

	for (i = 1; i < mlen; i++) {
		carry = 0;
		pn = n;
		pm = BIGLITTLE(mod-i-1,mod+i+1);
		for (j = 0; j < i; j++) {
			y = (BNWORD64)BIGLITTLE(*--pn * *pm++, *pn++ * *--pm);
			x += y;
			carry += (x < y);
		}
		ASSERT(BIGLITTLE(pn == n-i, pn == n+i));
		y = t = BIGLITTLE(pn[-1], pn[0]);
		x += y;
		carry += (x < y);
		BIGLITTLE(pn[-1], pn[0]) = t = inv * (BNWORD32)x;
		ASSERT(BIGLITTLE(pm == mod-1, pm == mod+1));
		y = (BNWORD64)t * BIGLITTLE(pm[0],pm[-1]);
		x += y;
		carry += (x < y);
		ASSERT((BNWORD32)x == 0);
		x = x >> 32 | (BNWORD64)carry << 32;
	}

	BIGLITTLE(n -= mlen, n += mlen);

	/* Pass 2 - compute upper words and add to n */
	for (i = 1; i < mlen; i++) {
		carry = 0;
		pm = BIGLITTLE(mod-i,mod+i);
		pn = n;
		for (j = i; j < mlen; j++) {
			y = (BNWORD64)BIGLITTLE(*--pm * *pn++, *pm++ * *--pn);
			x += y;
			carry += (x < y);
		}
		ASSERT(BIGLITTLE(pm == mod-mlen, pm == mod+mlen));
		ASSERT(BIGLITTLE(pn == n+mlen-i, pn == n-mlen+i));
		y = t = BIGLITTLE(*(n-i),*(n+i-1));
		x += y;
		carry += (x < y);
		BIGLITTLE(*(n-i),*(n+i-1)) = (BNWORD32)x;
		x = (x >> 32) | (BNWORD64)carry << 32;
	}

	/* Last round of second half, simplified. */
	t = BIGLITTLE(*(n-mlen),*(n+mlen-1));
	x += t;
	BIGLITTLE(*(n-mlen),*(n+mlen-1)) = (BNWORD32)x;
	carry = (unsigned)(x >> 32);

	while (carry)
		carry -= bniSubN_32(n, mod, mlen);
	while (bniCmp_32(n, mod, mlen) >= 0)
		(void)bniSubN_32(n, mod, mlen);
}
#define bniMontReduce_32 bniMontReduce_32
#endif

/*
 * Montgomery reduce n, modulo mod.  This reduces modulo mod and divides by
 * 2^(32*mlen).  Returns the result in the *top* mlen words of the argument n.
 * This is ready for another multiplication using bniMul_32.
 *
 * Montgomery representation is a very useful way to encode numbers when
 * you're doing lots of modular reduction.  What you do is pick a multiplier
 * R which is relatively prime to the modulus and very easy to divide by.
 * Since the modulus is odd, R is closen as a power of 2, so the division
 * is a shift.  In fact, it's a shift of an integral number of words,
 * so the shift can be implicit - just drop the low-order words.
 *
 * Now, choose R *larger* than the modulus m, 2^(32*mlen).  Then convert
 * all numbers a, b, etc. to Montgomery form M(a), M(b), etc using the
 * relationship M(a) = a*R mod m, M(b) = b*R mod m, etc.  Note that:
 * - The Montgomery form of a number depends on the modulus m.
 *   A fixed modulus m is assumed throughout this discussion.
 * - Since R is relaitvely prime to m, multiplication by R is invertible;
 *   no information about the numbers is lost, they're just scrambled.
 * - Adding (and subtracting) numbers in this form works just as usual.
 *   M(a+b) = (a+b)*R mod m = (a*R + b*R) mod m = (M(a) + M(b)) mod m
 * - Multiplying numbers in this form produces a*b*R*R.  The problem
 *   is to divide out the excess factor of R, modulo m as well as to
 *   reduce to the given length mlen.  It turns out that this can be
 *   done *faster* than a normal divide, which is where the speedup
 *   in Montgomery division comes from.
 *
 * Normal reduction chooses a most-significant quotient digit q and then
 * subtracts q*m from the number to be reduced.  Choosing q is tricky
 * and involved (just look at bniDiv_32 to see!) and is usually
 * imperfect, requiring a check for correction after the subtraction.
 *
 * Montgomery reduction *adds* a multiple of m to the *low-order* part
 * of the number to be reduced.  This multiple is chosen to make the
 * low-order part of the number come out to zero.  This can be done
 * with no trickery or error using a precomputed inverse of the modulus.
 * In this code, the "part" is one word, but any width can be used.
 *
 * Repeating this step sufficiently often results in a value which
 * is a multiple of R (a power of two, remember) but is still (since
 * the additions were to the low-order part and thus did not increase
 * the value of the number being reduced very much) still not much
 * larger than m*R.  Then implicitly divide by R and subtract off
 * m until the result is in the correct range.
 *
 * Since the low-order part being cancelled is less than R, the
 * multiple of m added must have a multiplier which is at most R-1.
 * Assuming that the input is at most m*R-1, the final number is
 * at most m*(2*R-1)-1 = 2*m*R - m - 1, so subtracting m once from
 * the high-order part, equivalent to subtracting m*R from the
 * while number, produces a result which is at most m*R - m - 1,
 * which divided by R is at most m-1.
 *
 * To convert *to* Montgomery form, you need a regular remainder
 * routine, although you can just compute R*R (mod m) and do the
 * conversion using Montgomery multiplication.  To convert *from*
 * Montgomery form, just Montgomery reduce the number to
 * remove the extra factor of R.
 * 
 * TODO: Change to a full inverse and use Karatsuba's multiplication
 * rather than this word-at-a-time.
 */
#ifndef bniMontReduce_32
void
bniMontReduce_32(BNWORD32 *n, BNWORD32 const *mod, unsigned const mlen,
                BNWORD32 inv)
{
	BNWORD32 t;
	BNWORD32 c = 0;
	unsigned len = mlen;

	/* inv must be the negative inverse of mod's least significant word */
	ASSERT((BNWORD32)(inv * BIGLITTLE(mod[-1],mod[0])) == (BNWORD32)-1);

	ASSERT(len);

	do {
		t = bniMulAdd1_32(n, mod, mlen, inv * BIGLITTLE(n[-1],n[0]));
		c += bniAdd1_32(BIGLITTLE(n-mlen,n+mlen), len, t);
		BIGLITTLE(--n,++n);
	} while (--len);

	/*
	 * All that adding can cause an overflow past the modulus size,
	 * but it's unusual, and never by much, so a subtraction loop
	 * is the right way to deal with it.
	 * This subtraction happens infrequently - I've only ever seen it
	 * invoked once per reduction, and then just under 22.5% of the time.
	 */
	while (c)
		c -= bniSubN_32(n, mod, mlen);
	while (bniCmp_32(n, mod, mlen) >= 0)
		(void)bniSubN_32(n, mod, mlen);
}
#endif /* !bniMontReduce_32 */

/*
 * A couple of helpers that you might want to implement atomically
 * in asm sometime.
 */
#ifndef bniMontMul_32
/*
 * Multiply "num1" by "num2", modulo "mod", all of length "len", and
 * place the result in the high half of "prod".  "inv" is the inverse
 * of the least-significant word of the modulus, modulo 2^32.
 * This uses numbers in Montgomery form.  Reduce using "len" and "inv".
 *
 * This is implemented as a macro to win on compilers that don't do
 * inlining, since it's so trivial.
 */
#define bniMontMul_32(prod, n1, n2, mod, len, inv) \
	(bniMulX_32(prod, n1, n2, len), bniMontReduce_32(prod, mod, len, inv))
#endif /* !bniMontMul_32 */

#ifndef bniMontSquare_32
/*
 * Square "num", modulo "mod", both of length "len", and place the result
 * in the high half of "prod".  "inv" is the inverse of the least-significant
 * word of the modulus, modulo 2^32.
 * This uses numb
上一页 1 2 3 45
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -