📄 bni32.c

📁 著名的加密软件的应用于电子邮件中
💻 C
📖 第 1 页 / 共 5 页
字号:
上一页 1 2 3 45
					/* Decrement qhat and adjust comparison parameters */
					qhat--;
					if ((r += dh) > dh) {
							t64 -= dl;
							if (t64 > ((BNWORD64)r << 32) + nl)
								 qhat--;
					}
			}
#else /* Use bniMulN1_32 */
			bniMulN1_32(BIGLITTLE(t2+2,t2), &dl, 1, qhat);
			if (t2high > r || (t2high == r && t2low > nl)) {
					/* Decrement qhat and adjust comparison parameters */
					qhat--;
					if ((r += dh) >= dh) {
							t2high -= (t2low < dl);
							t2low -= dl;
							if (t2high > r || (t2high == r && t2low > nl))
								 qhat--;
					}
			}
#endif

			/* Do the multiply and subtract */
			r = bniMulSub1_32(n, d, dlen, qhat);
			/* If there was a borrow, add back once. */
			if (r > nh) {	/* Borrow? */
				 (void)bniAddN_32(n, d, dlen);
				 qhat--;
			}

			/* Remember the first quotient digit. */
			qhigh = qhat;

	/* Now, the main division loop: */
divloop:
			while (qlen--) {

					/* Advance n */
					nh = BIGLITTLE(*(n-dlen),*(n+(dlen-1)));
					BIGLITTLE(++n,--n);
					nm = BIGLITTLE(*(n-dlen),*(n+(dlen-1)));

					if (nh == dh) {
							qhat = ~(BNWORD32)0;
							/* Optimized computation of r = (nh,nm) - qhat * dh */
							r = nh + nm;
							if (r < nh)
								 goto subtract;
					} else {
						pgpAssert(nh < dh);
						r = bniDiv21_32(&qhat, nh, nm, dh);
					}

					nl = BIGLITTLE(*(n-(dlen-1)),*(n+(dlen-2)));
#ifdef mul32_ppmm
				mul32_ppmm(nm, t32, qhat, dl);
					if (nm > r || (nm == r && t32 > nl)) {
							/* Decrement qhat and adjust comparison parameters */
							qhat--;
							if ((r += dh) >= dh) {
								nm -= (t32 < dl);
									t32 -= dl;
									if (nm > r || (nm == r && t32 > nl))
										qhat--;
						}
					}
#elif defined(BNWORD64)
					t64 = (BNWORD64)qhat * dl;
					if (t64 > ((BNWORD64)r<<32) + nl) {
							/* Decrement qhat and adjust comparison parameters */
							qhat--;
							if ((r += dh) >= dh) {
									t64 -= dl;
									if (t64 > ((BNWORD64)r << 32) + nl)
									qhat--;
						}
					}
#else /* Use bniMulN1_32 */
					bniMulN1_32(BIGLITTLE(t2+2,t2), &dl, 1, qhat);
					if (t2high > r || (t2high == r && t2low > nl)) {
							/* Decrement qhat and adjust comparison parameters */
							qhat--;
							if ((r += dh) >= dh) {
									t2high -= (t2low < dl);
									t2low -= dl;
									if (t2high > r || (t2high == r && t2low > nl))
										qhat--;
							}
				}
#endif

				/*
				* As a point of interest, note that it is not worth checking
				* for qhat of 0 or 1 and installing special-case code. These
				* occur with probability 2^-32, so spending 1 cycle to check
				* for them is only worth it if we save more than 2^15 cycles,
				* and a multiply-and-subtract for numbers in the 1024-bit
				* range just doesn't take that long.
				*/
subtract:
					/*
					* n points to the least significant end of the substring
					* of n to be subtracted from. qhat is either exact or
					* one too large. If the subtract gets a borrow, it was
					* one too large and the divisor is added back in. It's
					* a dlen+1 word add which is guaranteed to produce a
					* carry out, so it can be done very simply.
					*/
					r = bniMulSub1_32(n, d, dlen, qhat);
					if (r > nh) {	/* Borrow? */
						(void)bniAddN_32(n, d, dlen);
						qhat--;
					}
					/* Store the quotient digit */
					BIGLITTLE(*q++,*--q) = qhat;
			}
			/* Tah dah! */

			if (shift) {
				 bniRshift_32(d, dlen, shift);
				 bniRshift_32(n, dlen, shift);
			}

			return qhigh;
	}
	#endif

/*
* Find the negative multiplicative inverse of x (x must be odd!) modulo 2^32.
*
* This just performs Newton's iteration until it gets the
* inverse. The initial estimate is always correct to 3 bits, and
* sometimes 4. The number of valid bits doubles each iteration.
* (To prove it, assume x * y == 1 (mod 2^n), and introduce a variable
* for the error mod 2^2n. x * y == 1 + k*2^n (mod 2^2n) and follow
* the iteration through.)
	*/
#ifndef bniMontInv1_32
BNWORD32
bniMontInv1_32(BNWORD32 const x)
{
BNWORD32 y = x, z;

			pgpAssert(x & 1);

while ((z = x*y) != 1)
y *= 2 - z;
return -y;
}
#endif /* !bniMontInv1_32 */

#if defined(BNWORD64) && PRODUCT_SCAN
/*
* Test code for product-scanning Montgomery reduction.
* This seems to slow the C code down rather than speed it up.
*
* The first loop computes the Montgomery multipliers, storing them over
* the low half of the number n.
*
* The second half multiplies the upper half, adding in the modulus
* times the Montgomery multipliers. The results of this multiply
* are stored.
*/
void
bniMontReduce_32(BNWORD32 *n, BNWORD32 const *mod, unsigned mlen, BNWORD32 inv)
	{
			BNWORD64 x, y;
		BNWORD32 const *pm;
		BNWORD32 *pn;
			BNWORD32 t;
			unsigned carry;
			unsigned i, j;

			/* Special case of zero */
			if (!mlen)
				 return;

			/* Pass 1 - compute Montgomery multipliers */
			/* First iteration can have certain simplifications. */
			t = BIGLITTLE(n[-1],n[0]);
			x = t;
		t *= inv;
			BIGLITTLE(n[-1], n[0]) = t;
			x += (BNWORD64)t * BIGLITTLE(mod[-1],mod[0]); /* Can't overflow */
			pgpAssert((BNWORD32)x == 0);
			x = x >> 32;

			for (i = 1; i < mlen; i++) {
					carry = 0;
					pn = n;
					pm = BIGLITTLE(mod-i-1,mod+i+1);
					for (j = 0; j < i; j++) {
						y = (BNWORD64)BIGLITTLE(*--pn * *pm++, *pn++ * *--pm);
						x += y;
						carry += (x < y);
					}
					pgpAssert(BIGLITTLE(pn == n-i, pn == n+i));
					y = t = BIGLITTLE(pn[-1], pn[0]);
					x += y;
					carry += (x < y);
					BIGLITTLE(pn[-1], pn[0]) = t = inv * (BNWORD32)x;
					pgpAssert(BIGLITTLE(pm == mod-1, pm == mod+1));
					y = (BNWORD64)t * BIGLITTLE(pm[0],pm[-1]);
					x += y;
				carry += (x < y);
					pgpAssert((BNWORD32)x == 0);
x = x >> 32 | (BNWORD64)carry << 32;
}

BIGLITTLE(n -= mlen, n += mlen);

/* Pass 2 - compute upper words and add to n */
for (i = 1; i < mlen; i++) {
			carry = 0;
			pm = BIGLITTLE(mod-i,mod+i);
			pn = n;
			for (j = i; j < mlen; j++) {
				 y = (BNWORD64)BIGLITTLE(*--pm * *pn++, *pm++ * *--pn);
				 x += y;
				 carry += (x < y);
			}
			pgpAssert(BIGLITTLE(pm == mod-mlen, pm == mod+mlen));
			pgpAssert(BIGLITTLE(pn == n+mlen-i, pn == n-mlen+i));
			y = t = BIGLITTLE(*(n-i),*(n+i-1));
			x += y;
			carry += (x < y);
			BIGLITTLE(*(n-i),*(n+i-1)) = (BNWORD32)x;
			x = (x >> 32) | (BNWORD64)carry << 32;
	}

			/* Last round of second half, simplified. */
			t = BIGLITTLE(*(n-mlen),*(n+mlen-1));
			x += t;
		BIGLITTLE(*(n-mlen),*(n+mlen-1)) = (BNWORD32)x;
			carry = (unsigned)(x >> 32);

			while (carry)
			carry -= bniSubN_32(n, mod, mlen);
			while (bniCmp_32(n, mod, mlen) >= 0)
				 (void)bniSubN_32(n, mod, mlen);
	}
#define bniMontReduce_32 bniMontReduce_32
#endif

/*
* Montgomery reduce n, modulo mod. This reduces modulo mod and divides by
* 2^(32*mlen). Returns the result in the *top* mlen words of the argument n.
* This is ready for another multiplication using bniMul_32.
*
* Montgomery representation is a very useful way to encode numbers when
* you're doing lots of modular reduction. What you do is pick a multiplier
* R which is relatively prime to the modulus and very easy to divide by.
* Since the modulus is odd, R is closen as a power of 2, so the division
* is a shift. In fact, it's a shift of an integral number of words,
* so the shift can be implicit - just drop the low-order words.
*
* Now, choose R *larger* than the modulus m, 2^(32*mlen). Then convert
* all numbers a, b, etc. to Montgomery form M(a), M(b), etc using the
* relationship M(a) = a*R mod m, M(b) = b*R mod m, etc. Note that:
* - The Montgomery form of a number depends on the modulus m.
* A fixed modulus m is assumed throughout this discussion.
* - Since R is relaitvely prime to m, multiplication by R is invertible;
* no information about the numbers is lost, they're just scrambled.
* - Adding (and subtracting) numbers in this form works just as usual.
* M(a+b) = (a+b)*R mod m = (a*R + b*R) mod m = (M(a) + M(b)) mod m
* - Multiplying numbers in this form produces a*b*R*R. The problem
* is to divide out the excess factor of R, modulo m as well as to
* reduce to the given length mlen. It turns out that this can be
* done *faster* than a normal divide, which is where the speedup
* in Montgomery division comes from.
*
* Normal reduction chooses a most-significant quotient digit q and then
* subtracts q*m from the number to be reduced. Choosing q is tricky
* and involved (just look at bniDiv_32 to see!) and is usually
* imperfect, requiring a check for correction after the subtraction.
*
* Montgomery reduction *adds* a multiple of m to the *low-order* part
* of the number to be reduced. This multiple is chosen to make the
* low-order part of the number come out to zero. This can be done
* with no trickery or error using a precomputed inverse of the modulus.
* In this code, the "part" is one word, but any width can be used.
*
* Repeating this step sufficiently often results in a value which
* is a multiple of R (a power of two, remember) but is still (since
* the additions were to the low-order part and thus did not increase
* the value of the number being reduced very much) still not much
* larger than m*R. Then implicitly divide by R and subtract off
* m until the result is in the correct range.
*
* Since the low-order part being cancelled is less than R, the
* multiple of m added must have a multiplier which is at most R-1.
* Assuming that the input is at most m*R-1, the final number is
* at most m*(2*R-1)-1 = 2*m*R - m - 1, so subtracting m once from
* the high-order part, equivalent to subtracting m*R from the
* while number, produces a result which is at most m*R - m - 1,
* which divided by R is at most m-1.
*
* To convert *to* Montgomery form, you need a regular remainder
* routine, although you can just compute R*R (mod m) and do the
* conversion using Montgomery multiplication. To convert *from*
* Montgomery form, just Montgomery reduce the number to
* remove the extra factor of R.
*
* TODO: Change to a full inverse and use Karatsuba's multiplication
* rather than this word-at-a-time.
	*/
#ifndef bniMontReduce_32
void
bniMontReduce_32(BNWORD32 *n, BNWORD32 const *mod, unsigned const mlen,
BNWORD32 inv)
{
BNWORD32 t;
BNWORD32 c = 0;
unsigned len = mlen;

/* inv must be the negative inverse of mod's least significant word */
pgpAssert((BNWORD32)(inv * BIGLITTLE(mod[-1],mod[0])) == (BNWORD32)-1);

pgpAssert(len);

			do {
				 t = bniMulAdd1_32(n, mod, mlen, inv * BIGLITTLE(n[-1],n[0]));
	c += bniAdd1_32(BIGLITTLE(n-mlen,n+mlen), len, t);
	BIGLITTLE(--n,++n);
} while (--len);

			/*
			* All that adding can cause an overflow past the modulus size,
			* but it's unusual, and never by much, so a subtraction loop
			* is the right way to deal with it.
			* This subtraction happens infrequently - I've only ever seen it
			* invoked once per reduction, and then just under 22.5% of the time.
			*/
			while (c)
				 c -= bniSubN_32(n, mod, mlen);
			while (bniCmp_32(n, mod, mlen) >= 0)
				 (void)bniSubN_32(n, mod, mlen);
	}
#endif /* !bniMontReduce_32 */

/*
* A couple of helpers that you might want to implement atomically
* in asm sometime.
*/
#ifndef bniMontMul_32
/*
* Multiply "num1" by "num2", modulo "mod", all of length "len", and
* place the result in the high half of "prod". "inv" is the inverse
* of the least-significant word of the modulus, modulo 2^32.
* This uses numbers in Montgomery form. Reduce using "len" and "inv".
*
* This is implemented as a macro to win on compilers that don't do
* inlining, since it's so trivial.
*/
#define bniMontMul_32(prod, n1, n2, mod, len, inv) \
	(bniMulX_32(prod, n1, n2, len), bniMontReduce_32(prod, mod, len, inv))
#endif /* !bniMontMul_32 */

#ifndef bniMontSquare_32
/*
* Square "num", modulo "mod", both of length "len", and place the result
* in the high half of "prod". "inv" is the inverse of the least-significant
* word of the modulus, modulo 2^32.
* This uses numbers in Montgomery form. Reduce using "len" and "inv".
*
* This is implemented as a macro to win on compilers that don't do
* inlining, since it's so trivial.
*/
#define bniMontSquare_32(prod, n, mod, len, inv) \
	(bniSquare_32(prod, n, len), bniMontReduce_32(prod, mod, len, inv))
	
#endif /* !bniMontSquare_32 */

/*
* Convert a number to Montgomery form - requires mlen + nlen words
* of memory in "n".
	*/
void
bniToMont_32(BNWORD32 *n, unsigned nlen, BNWORD32 *mod, unsigned mlen)
{
/* Move n up "mlen" words */
bniCopy_32(BIGLITTLE(n-mlen,n+mlen), n, nlen);
bniZero_32(n, mlen);
/* Do the division - dump the quotient in the high-order words */
(void)bniDiv_32(BIGLITTLE(n-mlen,n+mlen), n, mlen+nlen, mod, mlen);
}

/*
* Convert from Montgomery form. Montgomery reduction is all that is
* needed.
*/
void
bniFromMont_32(BNWORD32 *n, BNWORD32 *mod, unsigned len)
	{
			/* Zero the high words of n */
			bniZero_32(BIGLITTLE(n-len,n+len), len);
			bniMontReduce_32(n, mod, len, bniMontInv1_32(mod[BIGLITTLE(-1,0)]));
			/* Move n down len words */
			bniCopy_32(n, BIGLITTLE(n-len,n+len), len);
	}

/*
* The windowed exponentiation algorithm, precomputes a table of odd
* powers of n up to 2^k. See the comment in bnExpMod_32 below for
* an explanation of how it actually works works.
*
* It takes 2^(k-1)-1 multiplies to compute the table, and (e-1)/(k+1)
* multiplies (on average) to perform the exponentiation. To minimize
* the sum, k must vary with e. The optimal window sizes vary with the
* exponent length. Here are some selected values and the boundary cases.
* (An underscore _ has been inserted into some of the numbers to ensure
* that magic strings like 32 do not appear in this table. It should be
* ignored.)
*
* At e =	1 bits, k=1 (0.000000) is best
* At e =	2 bits, k=1 (0.500000) is best
* At e =	4 bits, k=1 (1.500000) is best
* At e =	8 bits, k=2 (3.333333) < k=1	(3.500000)
* At e = 1_6 bits, k=2 (6.000000) is best
* At e = 26 bits, k=3 (9.250000) < k=2		(9.333333)
* At e = 3_2 bits, k=3 (10.750000) is best
* At e = 6_4 bits, k=3 (18.750000) is best
* At e =	82 bits, k=4 (23.200000) < k=3 (23.250000)
* At e =	128 bits, k=4 (3_2.4
上一页 1 2 3 45
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -