📄 bignumber.cpp

📁 绝对好东东
💻 CPP
📖 第 1 页 / 共 5 页
字号:
/*
 * An optimized version of the above, for shifts of 1.
 * Some machines can use add-with-carry tricks for this.
 */
#ifndef bniDouble_32
BNWORD32
bniDouble_32(BNWORD32 *num, unsigned len)
{
	BNWORD32 x, carry;

	carry = 0;
	while (len--) {
		BIG(--num;)
		x = *num;
		*num = (x<<1) | carry;
		LITTLE(num++;)
		carry = x >> (32-1);
	}
	return carry;
}
#endif /* !bniDouble_32 */

/*
 * Shift n words right "shift" bits.  0 < shift < 32.  Returns the
 * carry, any bits shifted off the right-hand side (0 <= carry < 2^shift).
 */
#ifndef bniRshift_32
BNWORD32
bniRshift_32(BNWORD32 *num, unsigned len, unsigned shift)
{
	BNWORD32 x, carry = 0;

	ASSERT(shift > 0);
	ASSERT(shift < 32);

	BIGLITTLE(num -= len, num += len);

	while (len--) {
		LITTLE(--num;)
		x = *num;
		*num = (x>>shift) | carry;
		BIG(num++;)
		carry = x << (32-shift);
	}
	return carry >> (32-shift);
}
#endif /* !bniRshift_32 */

/* 
 * Multiply two numbers of the given lengths.  prod and num2 may overlap,
 * provided that the low len1 bits of prod are free.  (This corresponds
 * nicely to the place the result is returned from bniMontReduce_32.)
 *
 * TODO: Use Karatsuba multiply.  The overlap constraints may have
 * to get rewhacked.
 */
#ifndef bniMul_32
void
bniMul_32(BNWORD32 *prod, BNWORD32 const *num1, unsigned len1,
                          BNWORD32 const *num2, unsigned len2)
{
	/* Special case of zero */
	if (!len1 || !len2) {
		bniZero_32(prod, len1+len2);
		return;
	}

	/* Multiply first word */
	bniMulN1_32(prod, num1, len1, BIGLITTLE(*--num2,*num2++));

	/*
	 * Add in subsequent words, storing the most significant word,
	 * which is new each time.
	 */
	while (--len2) {
		BIGLITTLE(--prod,prod++);
		BIGLITTLE(*(prod-len1-1),*(prod+len1)) =
			bniMulAdd1_32(prod, num1, len1,
				BIGLITTLE(*--num2,*num2++));
	}
}
#endif /* !bniMul_32 */

/*
 * bniMulX_32 is a square multiply - both inputs are the same length.
 * It's normally just a macro wrapper around the general multiply,
 * but might be implementable in assembly more efficiently (such as
 * when product scanning).
 */
#ifndef bniMulX_32
#if defined(BNWORD64) && PRODUCT_SCAN
/*
 * Test code to see whether product scanning is any faster.  It seems
 * to make the C code slower, so PRODUCT_SCAN is not defined.
 */
static void
bniMulX_32(BNWORD32 *prod, BNWORD32 const *num1, BNWORD32 const *num2,
	unsigned len)
{
	BNWORD64 x, y;
	BNWORD32 const *p1, *p2;
	unsigned carry;
	unsigned i, j;

	/* Special case of zero */
	if (!len)
		return;

	x = (BNWORD64)BIGLITTLE(num1[-1] * num2[-1], num1[0] * num2[0]);
	BIGLITTLE(*--prod, *prod++) = (BNWORD32)x;
	x >>= 32;

	for (i = 1; i < len; i++) {
		carry = 0;
		p1 = num1;
		p2 = BIGLITTLE(num2-i-1,num2+i+1);
		for (j = 0; j <= i; j++) {
			BIG(y = (BNWORD64)*--p1 * *p2++;)
			LITTLE(y = (BNWORD64)*p1++ * *--p2;)
			x += y;
			carry += (x < y);
		}
		BIGLITTLE(*--prod,*prod++) = (BNWORD32)x;
		x = (x >> 32) | (BNWORD64)carry << 32;
	}
	for (i = 1; i < len; i++) {
		carry = 0;
		p1 = BIGLITTLE(num1-i,num1+i);
		p2 = BIGLITTLE(num2-len,num2+len);
		for (j = i; j < len; j++) {
			BIG(y = (BNWORD64)*--p1 * *p2++;)
			LITTLE(y = (BNWORD64)*p1++ * *--p2;)
			x += y;
			carry += (x < y);
		}
		BIGLITTLE(*--prod,*prod++) = (BNWORD32)x;
		x = (x >> 32) | (BNWORD64)carry << 32;
	}
	
	BIGLITTLE(*--prod,*prod) = (BNWORD32)x;
}
#else /* !defined(BNWORD64) || !PRODUCT_SCAN */
/* Default trivial macro definition */
#define bniMulX_32(prod, num1, num2, len) bniMul_32(prod, num1, len, num2, len)
#endif /* !defined(BNWORD64) || !PRODUCT_SCAN */
#endif /* !lbmMulX_32 */

#if !defined(bniMontMul_32) && defined(BNWORD64) && PRODUCT_SCAN
/*
 * Test code for product-scanning multiply.  This seems to slow the C
 * code down rather than speed it up.
 * This does a multiply and Montgomery reduction together, using the
 * same loops.  The outer loop scans across the product, twice.
 * The first pass computes the low half of the product and the
 * Montgomery multipliers.  These are stored in the product array,
 * which contains no data as of yet.  x and carry add up the columns
 * and propagate carries forward.
 *
 * The second half multiplies the upper half, adding in the modulus
 * times the Montgomery multipliers.  The results of this multiply
 * are stored.
 */
static void
bniMontMul_32(BNWORD32 *prod, BNWORD32 const *num1, BNWORD32 const *num2,
	BNWORD32 const *mod, unsigned len, BNWORD32 inv)
{
	BNWORD64 x, y;
	BNWORD32 const *p1, *p2, *pm;
	BNWORD32 *pp;
	BNWORD32 t;
	unsigned carry;
	unsigned i, j;

	/* Special case of zero */
	if (!len)
		return;

	/*
	 * This computes directly into the high half of prod, so just
	 * shift the pointer and consider prod only "len" elements long
	 * for the rest of the code.
	 */
	BIGLITTLE(prod -= len, prod += len);

	/* Pass 1 - compute Montgomery multipliers */
	/* First iteration can have certain simplifications. */
	x = (BNWORD64)BIGLITTLE(num1[-1] * num2[-1], num1[0] * num2[0]);
	BIGLITTLE(prod[-1], prod[0]) = t = inv * (BNWORD32)x;
	y = (BNWORD64)t * BIGLITTLE(mod[-1],mod[0]);
	x += y;
	/* Note: GCC 2.6.3 has a bug if you try to eliminate "carry" */
	carry = (x < y);
	ASSERT((BNWORD32)x == 0);
	x = x >> 32 | (BNWORD64)carry << 32;

	for (i = 1; i < len; i++) {
		carry = 0;
		p1 = num1;
		p2 = BIGLITTLE(num2-i-1,num2+i+1);
		pp = prod;
		pm = BIGLITTLE(mod-i-1,mod+i+1);
		for (j = 0; j < i; j++) {
			y = (BNWORD64)BIGLITTLE(*--p1 * *p2++, *p1++ * *--p2);
			x += y;
			carry += (x < y);
			y = (BNWORD64)BIGLITTLE(*--pp * *pm++, *pp++ * *--pm);
			x += y;
			carry += (x < y);
		}
		y = (BNWORD64)BIGLITTLE(p1[-1] * p2[0], p1[0] * p2[-1]);
		x += y;
		carry += (x < y);
		ASSERT(BIGLITTLE(pp == prod-i, pp == prod+i));
		BIGLITTLE(pp[-1], pp[0]) = t = inv * (BNWORD32)x;
		ASSERT(BIGLITTLE(pm == mod-1, pm == mod+1));
		y = (BNWORD64)t * BIGLITTLE(pm[0],pm[-1]);
		x += y;
		carry += (x < y);
		ASSERT((BNWORD32)x == 0);
		x = x >> 32 | (BNWORD64)carry << 32;
	}

	/* Pass 2 - compute reduced product and store */
	for (i = 1; i < len; i++) {
		carry = 0;
		p1 = BIGLITTLE(num1-i,num1+i);
		p2 = BIGLITTLE(num2-len,num2+len);
		pm = BIGLITTLE(mod-i,mod+i);
		pp = BIGLITTLE(prod-len,prod+len);
		for (j = i; j < len; j++) {
			y = (BNWORD64)BIGLITTLE(*--p1 * *p2++, *p1++ * *--p2);
			x += y;
			carry += (x < y);
			y = (BNWORD64)BIGLITTLE(*--pm * *pp++, *pm++ * *--pp);
			x += y;
			carry += (x < y);
		}
		ASSERT(BIGLITTLE(pm == mod-len, pm == mod+len));
		ASSERT(BIGLITTLE(pp == prod-i, pp == prod+i));
		BIGLITTLE(pp[0],pp[-1]) = (BNWORD32)x;
		x = (x >> 32) | (BNWORD64)carry << 32;
	}

	/* Last round of second half, simplified. */
	BIGLITTLE(*(prod-len),*(prod+len-1)) = (BNWORD32)x;
	carry = (x >> 32);

	while (carry)
		carry -= bniSubN_32(prod, mod, len);
	while (bniCmp_32(prod, mod, len) >= 0)
		(void)bniSubN_32(prod, mod, len);
}
/* Suppress later definition */
#define bniMontMul_32 bniMontMul_32
#endif

#if !defined(bniSquare_32) && defined(BNWORD64) && PRODUCT_SCAN
/*
 * Trial code for product-scanning squaring.  This seems to slow the C
 * code down rather than speed it up.
 */
void
bniSquare_32(BNWORD32 *prod, BNWORD32 const *num, unsigned len)
{
	BNWORD64 x, y, z;
	BNWORD32 const *p1, *p2;
	unsigned carry;
	unsigned i, j;

	/* Special case of zero */
	if (!len)
		return;

	/* Word 0 of product */
	x = (BNWORD64)BIGLITTLE(num[-1] * num[-1], num[0] * num[0]);
	BIGLITTLE(*--prod, *prod++) = (BNWORD32)x;
	x >>= 32;

	/* Words 1 through len-1 */
	for (i = 1; i < len; i++) {
		carry = 0;
		y = 0;
		p1 = num;
		p2 = BIGLITTLE(num-i-1,num+i+1);
		for (j = 0; j < (i+1)/2; j++) {
			BIG(z = (BNWORD64)*--p1 * *p2++;)
			LITTLE(z = (BNWORD64)*p1++ * *--p2;)
			y += z;
			carry += (y < z);
		}
		y += z = y;
		carry += carry + (y < z);
		if ((i & 1) == 0) {
			ASSERT(BIGLITTLE(p1-1 == p2, p1 == p2-1));
			BIG(z = (BNWORD64)*p2 * *p2;)
			LITTLE(z = (BNWORD64)*p1 * *p1;)
			y += z;
			carry += (y < z);
		}
		x += y;
		carry += (x < y);
		BIGLITTLE(*--prod,*prod++) = (BNWORD32)x;
		x = (x >> 32) | (BNWORD64)carry << 32;
	}
	/* Words len through 2*len-2 */
	for (i = 1; i < len; i++) {
		carry = 0;
		y = 0;
		p1 = BIGLITTLE(num-i,num+i);
		p2 = BIGLITTLE(num-len,num+len);
		for (j = 0; j < (len-i)/2; j++) {
			BIG(z = (BNWORD64)*--p1 * *p2++;)
			LITTLE(z = (BNWORD64)*p1++ * *--p2;)
			y += z;
			carry += (y < z);
		}
		y += z = y;
		carry += carry + (y < z);
		if ((len-i) & 1) {
			ASSERT(BIGLITTLE(p1-1 == p2, p1 == p2-1));
			BIG(z = (BNWORD64)*p2 * *p2;)
			LITTLE(z = (BNWORD64)*p1 * *p1;)
			y += z;
			carry += (y < z);
		}
		x += y;
		carry += (x < y);
		BIGLITTLE(*--prod,*prod++) = (BNWORD32)x;
		x = (x >> 32) | (BNWORD64)carry << 32;
	}
	
	/* Word 2*len-1 */
	BIGLITTLE(*--prod,*prod) = (BNWORD32)x;
}
/* Suppress later definition */
#define bniSquare_32 bniSquare_32
#endif

/*
 * Square a number, using optimized squaring to reduce the number of
 * primitive multiples that are executed.  There may not be any
 * overlap of the input and output.
 *
 * Technique: Consider the partial products in the multiplication
 * of "abcde" by itself:
 *
 *               a  b  c  d  e
 *            *  a  b  c  d  e
 *          ==================
 *              ae be ce de ee
 *           ad bd cd dd de
 *        ac bc cc cd ce
 *     ab bb bc bd be
 *  aa ab ac ad ae
 *
 * Note that everything above the main diagonal:
 *              ae be ce de = (abcd) * e
 *           ad bd cd       = (abc) * d
 *        ac bc             = (ab) * c
 *     ab                   = (a) * b
 *
 * is a copy of everything below the main diagonal:
 *                       de
 *                 cd ce
 *           bc bd be
 *     ab ac ad ae
 *
 * Thus, the sum is 2 * (off the diagonal) + diagonal.
 *
 * This is accumulated beginning with the diagonal (which
 * consist of the squares of the digits of the input), which is then
 * divided by two, the off-diagonal added, and multiplied by two
 * again.  The low bit is simply a copy of the low bit of the
 * input, so it doesn't need special care.
 *
 * TODO: Merge the shift by 1 with the squaring loop.
 * TODO: Use Karatsuba.  (a*W+b)^2 = a^2 * (W^2+W) + b^2 * (W+1) - (a-b)^2 * W.
 */
#ifndef bniSquare_32
void
bniSquare_32(BNWORD32 *prod, BNWORD32 const *num, unsigned len)
{
	BNWORD32 t;
	BNWORD32 *prodx = prod;		/* Working copy of the argument */
	BNWORD32 const *numx = num;	/* Working copy of the argument */
	unsigned lenx = len;		/* Working copy of the argument */

	if (!len)
		return;

	/* First, store all the squares */
	while (lenx--) {
#ifdef mul32_ppmm
		BNWORD32 ph, pl;
		t = BIGLITTLE(*--numx,*numx++);
		mul32_ppmm(ph,pl,t,t);
		BIGLITTLE(*--prodx,*prodx++) = pl;
		BIGLITTLE(*--prodx,*prodx++) = ph;
#elif defined(BNWORD64) /* use BNWORD64 */
		BNWORD64 p;
		t = BIGLITTLE(*--numx,*numx++);
		p = (BNWORD64)t * t;
		BIGLITTLE(*--prodx,*prodx++) = (BNWORD32)p;
		BIGLITTLE(*--prodx,*prodx++) = (BNWORD32)(p>>32);
#else	/* Use bniMulN1_32 */
		t = BIGLITTLE(numx[-1],*numx);
		bniMulN1_32(prodx, numx, 1, t);
		BIGLITTLE(--numx,numx++);
		BIGLITTLE(prodx -= 2, prodx += 2);
#endif
	}
	/* Then, shift right 1 bit */
	(void)bniRshift_32(prod, 2*len, 1);

	/* Then, add in the off-diagonal sums */
	lenx = len;
	numx = num;
	prodx = prod;
	while (--lenx) {
		t = BIGLITTLE(*--numx,*numx++);
		BIGLITTLE(--prodx,prodx++);
		t = bniMulAdd1_32(prodx, numx, lenx, t);
		bniAdd1_32(BIGLITTLE(prodx-lenx,prodx+lenx), lenx+1, t);
		BIGLITTLE(--prodx,prodx++);
	}

	/* Shift it back up */
	bniDouble_32(prod, 2*len);

	/* And set the low bit appropriately */
	BIGLITTLE(prod[-1],prod[0]) |= BIGLITTLE(num[-1],num[0]) & 1;
}
#endif /* !bniSquare_32 */

/*
 * bniNorm_32 - given a number, return a modified length such that the
 * most significant digit is non-zero.  Zero-length input is okay.
 */
#ifndef bniNorm_32
unsigned
bniNorm_32(BNWORD32 const *num, unsigned len)
{
	BIGLITTLE(num -= len,num += len);
	while (len && BIGLITTLE(*num++,*--num) == 0)
		--len;
	return len;
}
#endif /* bniNorm_32 */

/*
 * bniBits_32 - return the number of significant bits in the array.
 * It starts by normalizing the array.  Zero-length input is okay.
 * Then assuming there's anything to it, it fetches the high word,
 * generates a bit length by multiplying the word length by 32, and
 * subtracts off 32/2, 32/4, 32/8, ... bits if the high bits are clear.
 */
#ifndef bniBits_32
unsigned
bniBits_32(BNWORD32 const *num, unsigned len)
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -