📄 bni16.c

📁 PGP—Pretty Good Privacy
💻 C
📖 第 1 页 / 共 5 页
字号:
上一页 1 2 3 45
		/* Decrement qhat and adjust comparison parameters */
		qhat--;
		if ((r += dh) >= dh) {
			t2high -= (t2low < dl);
			t2low -= dl;
			if (t2high > r || (t2high == r && t2low > nl))
				qhat--;
		}
	}
#endif

	/* Do the multiply and subtract */
	r = bniMulSub1_16(n, d, dlen, qhat);
	/* If there was a borrow, add back once. */
	if (r > nh) {	/* Borrow? */
		(void)bniAddN_16(n, d, dlen);
		qhat--;
	}

	/* Remember the first quotient digit. */
	qhigh = qhat;

	/* Now, the main division loop: */
divloop:
	while (qlen--) {

		/* Advance n */
		nh = BIGLITTLE(*(n-dlen),*(n+(dlen-1)));
		BIGLITTLE(++n,--n);
		nm = BIGLITTLE(*(n-dlen),*(n+(dlen-1)));

		if (nh == dh) {
			qhat = ~(BNWORD16)0;
			/* Optimized computation of r = (nh,nm) - qhat * dh */
			r = nh + nm;
			if (r < nh)
				goto subtract;
		} else {
			pgpAssert(nh < dh);
			r = bniDiv21_16(&qhat, nh, nm, dh);
		}

		nl = BIGLITTLE(*(n-(dlen-1)),*(n+(dlen-2)));
#ifdef mul16_ppmm
		mul16_ppmm(nm, t16, qhat, dl);
		if (nm > r || (nm == r && t16 > nl)) {
			/* Decrement qhat and adjust comparison parameters */
			qhat--;
			if ((r += dh) >= dh) {
				nm -= (t16 < dl);
				t16 -= dl;
				if (nm > r || (nm == r && t16 > nl))
					qhat--;
			}
		}
#elif defined(BNWORD32)
		t32 = (BNWORD32)qhat * dl;
		if (t32 > ((BNWORD32)r<<16) + nl) {
			/* Decrement qhat and adjust comparison parameters */
			qhat--;
			if ((r += dh) >= dh) {
				t32 -= dl;
				if (t32 > ((BNWORD32)r << 16) + nl)
					qhat--;
			}
		}
#else /* Use bniMulN1_16 */
		bniMulN1_16(BIGLITTLE(t2+2,t2), &dl, 1, qhat);
		if (t2high > r || (t2high == r && t2low > nl)) {
			/* Decrement qhat and adjust comparison parameters */
			qhat--;
			if ((r += dh) >= dh) {
				t2high -= (t2low < dl);
				t2low -= dl;
				if (t2high > r || (t2high == r && t2low > nl))
					qhat--;
			}
		}
#endif

		/*
		 * As a point of interest, note that it is not worth checking
		 * for qhat of 0 or 1 and installing special-case code.  These
		 * occur with probability 2^-16, so spending 1 cycle to check
		 * for them is only worth it if we save more than 2^15 cycles,
		 * and a multiply-and-subtract for numbers in the 1024-bit
		 * range just doesn't take that long.
		 */
subtract:
		/*
		 * n points to the least significant end of the substring
		 * of n to be subtracted from.  qhat is either exact or
		 * one too large.  If the subtract gets a borrow, it was
		 * one too large and the divisor is added back in.  It's
		 * a dlen+1 word add which is guaranteed to produce a
		 * carry out, so it can be done very simply.
		 */
		r = bniMulSub1_16(n, d, dlen, qhat);
		if (r > nh) {	/* Borrow? */
			(void)bniAddN_16(n, d, dlen);
			qhat--;
		}
		/* Store the quotient digit */
		BIGLITTLE(*q++,*--q) = qhat;
	}
	/* Tah dah! */

	if (shift) {
		bniRshift_16(d, dlen, shift);
		bniRshift_16(n, dlen, shift);
	}

	return qhigh;
}
#endif

/*
 * Find the negative multiplicative inverse of x (x must be odd!) modulo 2^16.
 *
 * This just performs Newton's iteration until it gets the
 * inverse.  The initial estimate is always correct to 3 bits, and
 * sometimes 4.  The number of valid bits doubles each iteration.
 * (To prove it, assume x * y == 1 (mod 2^n), and introduce a variable
 * for the error mod 2^2n.  x * y == 1 + k*2^n (mod 2^2n) and follow
 * the iteration through.)
 */
#ifndef bniMontInv1_16
BNWORD16
bniMontInv1_16(BNWORD16 const x)
{
        BNWORD16 y = x, z;

	pgpAssert(x & 1);
 
        while ((z = x*y) != 1)
                y *= 2 - z;
        return -y;
}
#endif /* !bniMontInv1_16 */

#if defined(BNWORD32) && PRODUCT_SCAN
/*
 * Test code for product-scanning Montgomery reduction.
 * This seems to slow the C code down rather than speed it up.
 *
 * The first loop computes the Montgomery multipliers, storing them over
 * the low half of the number n.
 *
 * The second half multiplies the upper half, adding in the modulus
 * times the Montgomery multipliers.  The results of this multiply
 * are stored.
 */
void
bniMontReduce_16(BNWORD16 *n, BNWORD16 const *mod, unsigned mlen, BNWORD16 inv)
{
	BNWORD32 x, y;
	BNWORD16 const *pm;
	BNWORD16 *pn;
	BNWORD16 t;
	unsigned carry;
	unsigned i, j;

	/* Special case of zero */
	if (!mlen)
		return;

	/* Pass 1 - compute Montgomery multipliers */
	/* First iteration can have certain simplifications. */
	t = BIGLITTLE(n[-1],n[0]);
	x = t;
	t *= inv;
	BIGLITTLE(n[-1], n[0]) = t;
	x += (BNWORD32)t * BIGLITTLE(mod[-1],mod[0]); /* Can't overflow */
	pgpAssert((BNWORD16)x == 0);
	x = x >> 16;

	for (i = 1; i < mlen; i++) {
		carry = 0;
		pn = n;
		pm = BIGLITTLE(mod-i-1,mod+i+1);
		for (j = 0; j < i; j++) {
			y = (BNWORD32)BIGLITTLE(*--pn * *pm++, *pn++ * *--pm);
			x += y;
			carry += (x < y);
		}
		pgpAssert(BIGLITTLE(pn == n-i, pn == n+i));
		y = t = BIGLITTLE(pn[-1], pn[0]);
		x += y;
		carry += (x < y);
		BIGLITTLE(pn[-1], pn[0]) = t = inv * (BNWORD16)x;
		pgpAssert(BIGLITTLE(pm == mod-1, pm == mod+1));
		y = (BNWORD32)t * BIGLITTLE(pm[0],pm[-1]);
		x += y;
		carry += (x < y);
		pgpAssert((BNWORD16)x == 0);
		x = x >> 16 | (BNWORD32)carry << 16;
	}

	BIGLITTLE(n -= mlen, n += mlen);

	/* Pass 2 - compute upper words and add to n */
	for (i = 1; i < mlen; i++) {
		carry = 0;
		pm = BIGLITTLE(mod-i,mod+i);
		pn = n;
		for (j = i; j < mlen; j++) {
			y = (BNWORD32)BIGLITTLE(*--pm * *pn++, *pm++ * *--pn);
			x += y;
			carry += (x < y);
		}
		pgpAssert(BIGLITTLE(pm == mod-mlen, pm == mod+mlen));
		pgpAssert(BIGLITTLE(pn == n+mlen-i, pn == n-mlen+i));
		y = t = BIGLITTLE(*(n-i),*(n+i-1));
		x += y;
		carry += (x < y);
		BIGLITTLE(*(n-i),*(n+i-1)) = (BNWORD16)x;
		x = (x >> 16) | (BNWORD32)carry << 16;
	}

	/* Last round of second half, simplified. */
	t = BIGLITTLE(*(n-mlen),*(n+mlen-1));
	x += t;
	BIGLITTLE(*(n-mlen),*(n+mlen-1)) = (BNWORD16)x;
	carry = (unsigned)(x >> 16);

	while (carry)
		carry -= bniSubN_16(n, mod, mlen);
	while (bniCmp_16(n, mod, mlen) >= 0)
		(void)bniSubN_16(n, mod, mlen);
}
#define bniMontReduce_16 bniMontReduce_16
#endif

/*
 * Montgomery reduce n, modulo mod.  This reduces modulo mod and divides by
 * 2^(16*mlen).  Returns the result in the *top* mlen words of the argument n.
 * This is ready for another multiplication using bniMul_16.
 *
 * Montgomery representation is a very useful way to encode numbers when
 * you're doing lots of modular reduction.  What you do is pick a multiplier
 * R which is relatively prime to the modulus and very easy to divide by.
 * Since the modulus is odd, R is closen as a power of 2, so the division
 * is a shift.  In fact, it's a shift of an integral number of words,
 * so the shift can be implicit - just drop the low-order words.
 *
 * Now, choose R *larger* than the modulus m, 2^(16*mlen).  Then convert
 * all numbers a, b, etc. to Montgomery form M(a), M(b), etc using the
 * relationship M(a) = a*R mod m, M(b) = b*R mod m, etc.  Note that:
 * - The Montgomery form of a number depends on the modulus m.
 *   A fixed modulus m is assumed throughout this discussion.
 * - Since R is relaitvely prime to m, multiplication by R is invertible;
 *   no information about the numbers is lost, they're just scrambled.
 * - Adding (and subtracting) numbers in this form works just as usual.
 *   M(a+b) = (a+b)*R mod m = (a*R + b*R) mod m = (M(a) + M(b)) mod m
 * - Multiplying numbers in this form produces a*b*R*R.  The problem
 *   is to divide out the excess factor of R, modulo m as well as to
 *   reduce to the given length mlen.  It turns out that this can be
 *   done *faster* than a normal divide, which is where the speedup
 *   in Montgomery division comes from.
 *
 * Normal reduction chooses a most-significant quotient digit q and then
 * subtracts q*m from the number to be reduced.  Choosing q is tricky
 * and involved (just look at bniDiv_16 to see!) and is usually
 * imperfect, requiring a check for correction after the subtraction.
 *
 * Montgomery reduction *adds* a multiple of m to the *low-order* part
 * of the number to be reduced.  This multiple is chosen to make the
 * low-order part of the number come out to zero.  This can be done
 * with no trickery or error using a precomputed inverse of the modulus.
 * In this code, the "part" is one word, but any width can be used.
 *
 * Repeating this step sufficiently often results in a value which
 * is a multiple of R (a power of two, remember) but is still (since
 * the additions were to the low-order part and thus did not increase
 * the value of the number being reduced very much) still not much
 * larger than m*R.  Then implicitly divide by R and subtract off
 * m until the result is in the correct range.
 *
 * Since the low-order part being cancelled is less than R, the
 * multiple of m added must have a multiplier which is at most R-1.
 * Assuming that the input is at most m*R-1, the final number is
 * at most m*(2*R-1)-1 = 2*m*R - m - 1, so subtracting m once from
 * the high-order part, equivalent to subtracting m*R from the
 * while number, produces a result which is at most m*R - m - 1,
 * which divided by R is at most m-1.
 *
 * To convert *to* Montgomery form, you need a regular remainder
 * routine, although you can just compute R*R (mod m) and do the
 * conversion using Montgomery multiplication.  To convert *from*
 * Montgomery form, just Montgomery reduce the number to
 * remove the extra factor of R.
 * 
 * TODO: Change to a full inverse and use Karatsuba's multiplication
 * rather than this word-at-a-time.
 */
#ifndef bniMontReduce_16
void
bniMontReduce_16(BNWORD16 *n, BNWORD16 const *mod, unsigned const mlen,
                BNWORD16 inv)
{
	BNWORD16 t;
	BNWORD16 c = 0;
	unsigned len = mlen;

	/* inv must be the negative inverse of mod's least significant word */
	pgpAssert((BNWORD16)(inv * BIGLITTLE(mod[-1],mod[0])) == (BNWORD16)-1);

	pgpAssert(len);

	do {
		t = bniMulAdd1_16(n, mod, mlen, inv * BIGLITTLE(n[-1],n[0]));
		c += bniAdd1_16(BIGLITTLE(n-mlen,n+mlen), len, t);
		BIGLITTLE(--n,++n);
	} while (--len);

	/*
	 * All that adding can cause an overflow past the modulus size,
	 * but it's unusual, and never by much, so a subtraction loop
	 * is the right way to deal with it.
	 * This subtraction happens infrequently - I've only ever seen it
	 * invoked once per reduction, and then just under 22.5% of the time.
	 */
	while (c)
		c -= bniSubN_16(n, mod, mlen);
	while (bniCmp_16(n, mod, mlen) >= 0)
		(void)bniSubN_16(n, mod, mlen);
}
#endif /* !bniMontReduce_16 */

/*
 * A couple of helpers that you might want to implement atomically
 * in asm sometime.
 */
#ifndef bniMontMul_16
/*
 * Multiply "num1" by "num2", modulo "mod", all of length "len", and
 * place the result in the high half of "prod".  "inv" is the inverse
 * of the least-significant word of the modulus, modulo 2^16.
 * This uses numbers in Montgomery form.  Reduce using "len" and "inv".
 *
 * This is implemented as a macro to win on compilers that don't do
 * inlining, since it's so trivial.
 */
#define bniMontMul_16(prod, n1, n2, mod, len, inv) \
	(bniMulX_16(prod, n1, n2, len), bniMontReduce_16(prod, mod, len, inv))
#endif /* !bniMontMul_16 */

#ifndef bniMontSquare_16
/*
 * Square "num", modulo "mod", both of length "len", and place the result
 * in the high half of "prod".  "inv" is the inverse of the least-significant
 * word of the modulus, modulo 2^16.
 * This uses numbers in Montgomery form.  Reduce using "len" and "inv".
 *
 * This is implemented as a macro to win on compilers that don't do
 * inlining, since it's so trivial.
 */
#define bniMontSquare_16(prod, n, mod, len, inv) \
	(bniSquare_16(prod, n, len), bniMontReduce_16(prod, mod, len, inv))
	
#endif /* !bniMontSquare_16 */

/*
 * Convert a number to Montgomery form - requires mlen + nlen words
 * of memory in "n".
 */
void
bniToMont_16(BNWORD16 *n, unsigned nlen, BNWORD16 *mod, unsigned mlen)
{
	/* Move n up "mlen" words */
	bniCopy_16(BIGLITTLE(n-mlen,n+mlen), n, nlen);
	bniZero_16(n, mlen);
	/* Do the division - dump the quotient in the high-order words */
	(void)bniDiv_16(BIGLITTLE(n-mlen,n+mlen), n, mlen+nlen, mod, mlen);
}

/*
 * Convert from Montgomery form.  Montgomery reduction is all that is
 * needed.
 */
void
bniFromMont_16(BNWORD16 *n, BNWORD16 *mod, unsigned len)
{
	/* Zero the high words of n */
	bniZero_16(BIGLITTLE(n-len,n+len), len);
	bniMontReduce_16(n, mod, len, bniMontInv1_16(mod[BIGLITTLE(-1,0)]));
	/* Move n down len words */
	bniCopy_16(n, BIGLITTLE(n-len,n+len), len);
}

/*
 * The windowed exponentiation algorithm, precomputes a table of odd
 * powers of n up to 2^k.  See the comment in bnExpMod_16 below for
 * an explanation of how it actually works works.
 *
 * It takes 2^(k-1)-1 multiplies to compute the table, and (e-1)/(k+1)
 * multiplies (on average) to perform the exponentiation.  To minimize
 * the sum, k must vary with e.  The optimal window sizes vary with the
 * exponent length.  Here are some selected values and the boundary cases.
 * (An underscore _ has been inserted into some of the numbers to ensure
 * that magic strings like 16 do not appear in this table.  It should be
 * ignored.)
 *
 * At e =    1 bits, k=1   (0.000000) is best
 * At e =    2 bits, k=1   (0.500000) is best
 * At e =    4 bits, k=1   (1.500000) is best
 * At e =    8 bits, k=2   (3.333333) < k=1   (3.500000)
 * At e =  1_6 bits, k=2   (6.000000) is best
 * At e =   26 bits, k=3   (9.250000) < k=2   (9.333333)
 * At e =  3_2 bits, k=3  (10.750000) is best
 * At e =  6_4 bits, k=3  (18.750000) is best
 * At e =   82 bits, k=4  (23.200000) < k=3  (23.250000)
 * At e =  128 bits, k=4 (3_2.400000) is best
 * At e =  242 bits, k=5  (55.1_66667) < k=4 (55.200000)
 * At e =  256 bits, k=5  (57.500000) is best
 * At e =  512 bits, k=5 (100.1_66667) is best
 * At e =  674 bits, k=6 (127.142857) < k=5 (127.1_66667)
 * At e = 1024 bits, k=6 (177.142857) is best
 * At e = 1794 bits, k=7 (287.125000) < k=6 (287.142857)
 * At e = 2048 bits, k=7 (318.875000) is best
 * At e = 4096 bits, k=7 (574.875000) is best
 *
 * The numbers in parentheses are the expected number of multiplications
 * needed to do the computation.  The normal russian-peasant modular
 * exponentiation technique always uses (e-1)/2.  For exponents as
 * small as 192 bits (below the range of current factoring algorithms),
 * half of the multiplies are eliminated, 45.2 as opposed to the naive
 *
上一页 1 2 3 45
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -