📄 bni32.c

📁 著名的加密软件的应用于电子邮件中
💻 C
📖 第 1 页 / 共 5 页
字号:
* times the Montgomery multipliers. The results of this multiply
* are stored.
*/
static void
bniMontMul_32(BNWORD32 *prod, BNWORD32 const *num1, BNWORD32 const *num2,
			BNWORD32 const *mod, unsigned len, BNWORD32 inv)
	{
			BNWORD64 x, y;
			BNWORD32 const *p1, *p2, *pm;
			BNWORD32 *pp;
			BNWORD32 t;
			unsigned carry;
			unsigned i, j;

			/* Special case of zero */
			if (!len)
				 return;

			/*
			* This computes directly into the high half of prod, so just
			* shift the pointer and consider prod only "len" elements long
			* for the rest of the code.
			*/
			BIGLITTLE(prod -= len, prod += len);

			/* Pass 1 - compute Montgomery multipliers */
			/* First iteration can have certain simplifications. */
			x = (BNWORD64)BIGLITTLE(num1[-1] * num2[-1], num1[0] * num2[0]);
			BIGLITTLE(prod[-1], prod[0]) = t = inv * (BNWORD32)x;
			y = (BNWORD64)t * BIGLITTLE(mod[-1],mod[0]);
			x += y;
			/* Note: GCC 2.6.3 has a bug if you try to eliminate "carry" */
			carry = (x < y);
			pgpAssert((BNWORD32)x == 0);
			x = x >> 32 | (BNWORD64)carry << 32;

			for (i = 1; i < len; i++) {
					carry = 0;
					p1 = num1;
					p2 = BIGLITTLE(num2-i-1,num2+i+1);
					pp = prod;
					pm = BIGLITTLE(mod-i-1,mod+i+1);
					for (j = 0; j < i; j++) {
							y = (BNWORD64)BIGLITTLE(*--p1 * *p2++, *p1++ * *--p2);
							x += y;
							carry += (x < y);
							y = (BNWORD64)BIGLITTLE(*--pp * *pm++, *pp++ * *--pm);
							x += y;
							carry += (x < y);
					}
					y = (BNWORD64)BIGLITTLE(p1[-1] * p2[0], p1[0] * p2[-1]);
					x += y;
					carry += (x < y);
					pgpAssert(BIGLITTLE(pp == prod-i, pp == prod+i));
					BIGLITTLE(pp[-1], pp[0]) = t = inv * (BNWORD32)x;
					pgpAssert(BIGLITTLE(pm == mod-1, pm == mod+1));
					y = (BNWORD64)t * BIGLITTLE(pm[0],pm[-1]);
					x += y;
					carry += (x < y);
					pgpAssert((BNWORD32)x == 0);
					x = x >> 32 | (BNWORD64)carry << 32;
			}

			/* Pass 2 - compute reduced product and store */
			for (i = 1; i < len; i++) {
					carry = 0;
					p1 = BIGLITTLE(num1-i,num1+i);
				p2 = BIGLITTLE(num2-len,num2+len);
					pm = BIGLITTLE(mod-i,mod+i);
					pp = BIGLITTLE(prod-len,prod+len);
					for (j = i; j < len; j++) {
						y = (BNWORD64)BIGLITTLE(*--p1 * *p2++, *p1++ * *--p2);
						x += y;
						carry += (x < y);
						y = (BNWORD64)BIGLITTLE(*--pm * *pp++, *pm++ * *--pp);
						x += y;
						carry += (x < y);
					}
					pgpAssert(BIGLITTLE(pm == mod-len, pm == mod+len));
					pgpAssert(BIGLITTLE(pp == prod-i, pp == prod+i));
					BIGLITTLE(pp[0],pp[-1]) = (BNWORD32)x;
					x = (x >> 32) | (BNWORD64)carry << 32;
			}

			/* Last round of second half, simplified. */
			BIGLITTLE(*(prod-len),*(prod+len-1)) = (BNWORD32)x;
			carry = (x >> 32);

			while (carry)
				 carry -= bniSubN_32(prod, mod, len);
			while (bniCmp_32(prod, mod, len) >= 0)
				 (void)bniSubN_32(prod, mod, len);
	}
/* Suppress later definition */
#define bniMontMul_32 bniMontMul_32
#endif

#if !defined(bniSquare_32) && defined(BNWORD64) && PRODUCT_SCAN
/*
* Trial code for product-scanning squaring. This seems to slow the C
* code down rather than speed it up.
*/
void
bniSquare_32(BNWORD32 *prod, BNWORD32 const *num, unsigned len)
	{
			BNWORD64 x, y, z;
			BNWORD32 const *p1, *p2;
			unsigned carry;
			unsigned i, j;

			/* Special case of zero */
			if (!len)
				 return;

/* Word 0 of product */
x = (BNWORD64)BIGLITTLE(num[-1] * num[-1], num[0] * num[0]);
BIGLITTLE(*--prod, *prod++) = (BNWORD32)x;
x >>= 32;

			/* Words 1 through len-1 */
			for (i = 1; i < len; i++) {
					carry = 0;
					y = 0;
					p1 = num;
					p2 = BIGLITTLE(num-i-1,num+i+1);
					for (j = 0; j < (i+1)/2; j++) {
							BIG(z = (BNWORD64)*--p1 * *p2++;)
							LITTLE(z = (BNWORD64)*p1++ * *--p2;)
							y += z;
							carry += (y < z);
					}
					y += z = y;
					carry += carry + (y < z);
					if ((i & 1) == 0) {
							pgpAssert(BIGLITTLE(p1-1 == p2, p1 == p2-1));
						BIG(z = (BNWORD64)*p2 * *p2;)
							LITTLE(z = (BNWORD64)*p1 * *p1;)
							y += z;
							carry += (y < z);
					}
					x += y;
					carry += (x < y);
					BIGLITTLE(*--prod,*prod++) = (BNWORD32)x;
					x = (x >> 32) | (BNWORD64)carry << 32;
			}
			/* Words len through 2*len-2 */
			for (i = 1; i < len; i++) {
					carry = 0;
					y = 0;
					p1 = BIGLITTLE(num-i,num+i);
					p2 = BIGLITTLE(num-len,num+len);
					for (j = 0; j < (len-i)/2; j++) {
							BIG(z = (BNWORD64)*--p1 * *p2++;)
							LITTLE(z = (BNWORD64)*p1++ * *--p2;)
							y += z;
							carry += (y < z);
					}
					y += z = y;
					carry += carry + (y < z);
					if ((len-i) & 1) {
							pgpAssert(BIGLITTLE(p1-1 == p2, p1 == p2-1));
							BIG(z = (BNWORD64)*p2 * *p2;)
							LITTLE(z = (BNWORD64)*p1 * *p1;)
							y += z;
							carry += (y < z);
				}
					x += y;
					carry += (x < y);
					BIGLITTLE(*--prod,*prod++) = (BNWORD32)x;
					x = (x >> 32) | (BNWORD64)carry << 32;
		}
		
			/* Word 2*len-1 */
			BIGLITTLE(*--prod,*prod) = (BNWORD32)x;
	}
/* Suppress later definition */
#define bniSquare_32 bniSquare_32
#endif

/*
* Square a number, using optimized squaring to reduce the number of
* primitive multiples that are executed. There may not be any
* overlap of the input and output.
*
* Technique: Consider the partial products in the multiplication
* of "abcde" by itself:
*
* 	        a  b  c  d  e
* 	     *  a  b  c  d  e
* 	   ==================
* 	       ae be ce de ee
* 	    ad bd cd dd de
* 	 ac bc cc cd ce
*     ab bb bc bd be
*  aa ab ac ad ae
*
* Note that everything above the main diagonal:
*	       ae be ce de = (abcd) * e
*	    ad bd cd       = (abc) * d
*	 ac bc             = (ab) * c
*     ab	           = (a) * b
*
* is a copy of everything below the main diagonal:
*			de
*	         cd ce
*	   bc bd be
*    ab ac ad ae
*
* Thus, the sum is 2 * (off the diagonal) + diagonal.
*
* This is accumulated beginning with the diagonal (which
* consist of the squares of the digits of the input), which is then
* divided by two, the off-diagonal added, and multiplied by two
* again. The low bit is simply a copy of the low bit of the
* input, so it doesn't need special care.
*
* TODO: Merge the shift by 1 with the squaring loop.
* TODO: Use Karatsuba. (a*W+b)^2 = a^2 * (W^2+W) + b^2 * (W+1) - (a-b)^2 * W.
	*/
#ifndef bniSquare_32
void
bniSquare_32(BNWORD32 *prod, BNWORD32 const *num, unsigned len)
	{
			BNWORD32 t;
			BNWORD32 *prodx = prod;			/* Working copy of the argument */
			BNWORD32 const *numx = num;		/* Working copy of the argument */
			unsigned lenx = len;	 		/* Working copy of the argument */

if (!len)
	return;

			/* First, store all the squares */
			while (lenx--) {
#ifdef mul32_ppmm
					BNWORD32 ph, pl;
					t = BIGLITTLE(*--numx,*numx++);
					mul32_ppmm(ph,pl,t,t);
					BIGLITTLE(*--prodx,*prodx++) = pl;
					BIGLITTLE(*--prodx,*prodx++) = ph;
#elif defined(BNWORD64) /* use BNWORD64 */
					BNWORD64 p;
					t = BIGLITTLE(*--numx,*numx++);
					p = (BNWORD64)t * t;
					BIGLITTLE(*--prodx,*prodx++) = (BNWORD32)p;
					BIGLITTLE(*--prodx,*prodx++) = (BNWORD32)(p>>32);
#else	/* Use bniMulN1_32 */
		t = BIGLITTLE(numx[-1],*numx);
		bniMulN1_32(prodx, numx, 1, t);
	BIGLITTLE(--numx,numx++);
	BIGLITTLE(prodx -= 2, prodx += 2);
#endif
}
/* Then, shift right 1 bit */
(void)bniRshift_32(prod, 2*len, 1);

/* Then, add in the off-diagonal sums */
lenx = len;
numx = num;
prodx = prod;
while (--lenx) {
			t = BIGLITTLE(*--numx,*numx++);
			BIGLITTLE(--prodx,prodx++);
			t = bniMulAdd1_32(prodx, numx, lenx, t);
			bniAdd1_32(BIGLITTLE(prodx-lenx,prodx+lenx), lenx+1, t);
			BIGLITTLE(--prodx,prodx++);
	}

/* Shift it back up */
bniDouble_32(prod, 2*len);

/* And set the low bit appropriately */
BIGLITTLE(prod[-1],prod[0]) |= BIGLITTLE(num[-1],num[0]) & 1;
}
#endif /* !bniSquare_32 */

/*
* bniNorm_32 - given a number, return a modified length such that the
* most significant digit is non-zero. Zero-length input is okay.
	*/
#ifndef bniNorm_32
unsigned
bniNorm_32(BNWORD32 const *num, unsigned len)
	{
			BIGLITTLE(num -= len,num += len);
			while (len && BIGLITTLE(*num++,*--num) == 0)
				 --len;
			return len;
	}
#endif /* bniNorm_32 */

/*
* bniBits_32 - return the number of significant bits in the array.
* It starts by normalizing the array. Zero-length input is okay.
* Then assuming there's anything to it, it fetches the high word,
* generates a bit length by multiplying the word length by 32, and
* subtracts off 32/2, 32/4, 32/8, ... bits if the high bits are clear.
	*/
#ifndef bniBits_32
unsigned
bniBits_32(BNWORD32 const *num, unsigned len)
	{
			BNWORD32 t;
			unsigned i;

len = bniNorm_32(num, len);
if (len) {
t = BIGLITTLE(*(num-len),*(num+(len-1)));
pgpAssert(t);
len *= 32;
i = 32/2;
	do {
			if (t >> i)
				 t >>= i;
			else
				 len -= i;
} while ((i /= 2) != 0);
}
return len;
}
#endif /* bniBits_32 */

/*
* If defined, use hand-rolled divide rather than compiler's native.
* If the machine doesn't do it in line, the manual code is probably
* faster, since it can assume normalization and the fact that the
* quotient will fit into 32 bits, which a general 64-bit divide
* in a compiler's run-time library can't do.
*/
#ifndef BN_SLOW_DIVIDE_64
/* Assume that divisors of more than thirty-two bits are slow */
#define BN_SLOW_DIVIDE_64 (64 > 0x20)
#endif

/*
* Return (nh<<32|nl) % d, and place the quotient digit into *q.
* It is guaranteed that nh < d, and that d is normalized (with its high
* bit set). If we have a double-width type, it's easy. If not, ooh,
* yuk!
	*/
#ifndef bniDiv21_32
#if defined(BNWORD64) && !BN_SLOW_DIVIDE_64
BNWORD32
bniDiv21_32(BNWORD32 *q, BNWORD32 nh, BNWORD32 nl, BNWORD32 d)
	{
			BNWORD64 n = (BNWORD64)nh << 32 | nl;

			/* Divisor must be normalized */
			pgpAssert(d >> (32-1) == 1);

*q = n / d;
return n % d;
}
#else
/*
* This is where it gets ugly.
*
* Do the division in two halves, using Algorithm D from section 4.3.1
* of Knuth. Note Theorem B from that section, that the quotient estimate
* is never more than the true quotient, and is never more than two
* too low.
*
* The mapping onto conventional long division is (everything a half word):
*       _____________qh___ql_
* dh dl ) nh.h nh.l nl.h nl.l
*	 - (qh * d)
*	-----------
*	   rrrr rrrr nl.l
*	       - (ql * d)
* 	     -----------
*	       rrrr rrrr
*
* The implicit 3/2-digit d*qh and d*ql subtractors are computed this way:
* First, estimate a q digit so that nh/dh works. Subtracting qh*dh from
* the (nh.h nh.l) list leaves a 1/2-word remainder r. Then compute the
* low part of the subtractor, qh * dl. This also needs to be subtracted
* from (nh.h nh.l nl.h) to get the final remainder. So we take the
* remainder, which is (nh.h nh.l) - qh*dl, shift it and add in nl.h, and
* try to subtract qh * dl from that. Since the remainder is 1/2-word
* long, shifting and adding nl.h results in a single word r.
* It is possible that the remainder we're working with, r, is less than
* the product qh * dl, if we estimated qh too high. The estimation
* technique can produce a qh that is too large (never too small), leading
* to r which is too small. In that case, decrement the digit qh, add
* shifted dh to r (to correct for that error), and subtract dl from the
* product we're comparing r with. That's the "correct" way to do it, but
* just adding dl to r instead of subtracting it from the product is
* equivalent and a lot simpler. You just have to watch out for overflow.
*
* The process is repeated with (rrrr rrrr nl.l) for the low digit of the
* quotient ql.
*
* The various uses of 32/2 for shifts are because of the note about
* automatic editing of this file at the very top of the file.
*/
#define highhalf(x) ( (x) >> 32/2 )
#define lowhalf(x) ( (x) & (((BNWORD32)1 << 32/2)-1) )
BNWORD32
bniDiv21_32(BNWORD32 *q, BNWORD32 nh, BNWORD32 nl, BNWORD32 d)
	{
			BNWORD32 dh = highhalf(d), dl = lowhalf(d);
			BNWORD32 qh, ql, prod, r;

			/* Divisor must be normalized */
			pgpAssert((d >> (32-1)) == 1);

			/* Do first half-word of division */
			qh = nh / dh;
			r = nh % dh;
			prod = qh * dl;

			/*
			* Add next half-word of numerator to remainder and correct.
			* qh may be up to two too large.
			*/
			r = (r << (32/2)) | highhalf(nl);
			if (r < prod) {
				--qh; r += d;
				if (r >= d && r < prod) {
						--qh; r += d;
					}
			}
			r -= prod;

/* Do second half-word of division */
ql = r / dh;
r = r % dh;
prod = ql * dl;
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -