📄 bni32.c

📁 vc环境下的pgp源码
💻 C
📖 第 1 页 / 共 5 页
字号:
#ifndef bniCmp_32
/*
 * bniCmp_32: compare two bignums of equal length, returning the sign of
 * num1 - num2. (-1, 0 or +1).
 * 
 * Technique: Change the little-endian pointers to big-endian pointers
 * and compare from the most-significant end until a difference if found.
 * When it is, figure out the sign of the difference and return it.
 */
int
bniCmp_32(BNWORD32 const *num1, BNWORD32 const *num2, unsigned len)
{
	BIGLITTLE(num1 -= len, num1 += len);
	BIGLITTLE(num2 -= len, num2 += len);

	while (len--) {
		if (BIGLITTLE(*num1++ != *num2++, *--num1 != *--num2)) {
			if (BIGLITTLE(num1[-1] < num2[-1], *num1 < *num2))
				return -1;
			else
				return 1;
		}
	}
	return 0;
}
#endif /* !bniCmp_32 */

/*
 * mul32_ppmmaa(ph,pl,x,y,a,b) is an optional routine that
 * computes (ph,pl) = x * y + a + b.  mul32_ppmma and mul32_ppmm
 * are simpler versions.  If you want to be lazy, all of these
 * can be defined in terms of the others, so here we create any
 * that have not been defined in terms of the ones that have been.
 */

/* Define ones with fewer a's in terms of ones with more a's */
#if !defined(mul32_ppmma) && defined(mul32_ppmmaa)
#define mul32_ppmma(ph,pl,x,y,a) mul32_ppmmaa(ph,pl,x,y,a,0)
#endif

#if !defined(mul32_ppmm) && defined(mul32_ppmma)
#define mul32_ppmm(ph,pl,x,y) mul32_ppmma(ph,pl,x,y,0)
#endif

/*
 * Use this definition to test the mul32_ppmm-based operations on machines
 * that do not provide mul32_ppmm.  Change the final "0" to a "1" to
 * enable it.
 */
#if !defined(mul32_ppmm) && defined(BNWORD64) && 0	/* Debugging */
#define mul32_ppmm(ph,pl,x,y) \
	({BNWORD64 _ = (BNWORD64)(x)*(y); (pl) = _; (ph) = _>>32;})
#endif

#if defined(mul32_ppmm) && !defined(mul32_ppmma)
#define mul32_ppmma(ph,pl,x,y,a) \
	(mul32_ppmm(ph,pl,x,y), (ph) += ((pl) += (a)) < (a))
#endif

#if defined(mul32_ppmma) && !defined(mul32_ppmmaa)
#define mul32_ppmmaa(ph,pl,x,y,a,b) \
	(mul32_ppmma(ph,pl,x,y,a), (ph) += ((pl) += (b)) < (b))
#endif

/*
 * bniMulN1_32: Multiply an n-word input by a 1-word input and store the
 * n+1-word product.  This uses either the mul32_ppmm and mul32_ppmma
 * macros, or C multiplication with the BNWORD64 type.  This uses mul32_ppmma
 * if available, assuming you won't bother defining it unless you can do
 * better than the normal multiplication.
 */
#ifndef bniMulN1_32
#ifdef bniMulAdd1_32	/* If we have this asm primitive, use it. */
void
bniMulN1_32(BNWORD32 *out, BNWORD32 const *in, unsigned len, BNWORD32 k)
{
	bniZero_32(out, len);
	BIGLITTLE(*(out-len),*(out+len)) = bniMulAdd1_32(out, in, len, k);
}
#elif defined(mul32_ppmm)
void
bniMulN1_32(BNWORD32 *out, BNWORD32 const *in, unsigned len, BNWORD32 k)
{
	BNWORD32 prod, carry, carryin;

	pgpAssert(len > 0);

	BIG(--out;--in;);
	mul32_ppmm(carry, *out, *in, k);
	LITTLE(out++;in++;)

	while (--len) {
		BIG(--out;--in;)
		carryin = carry;
		mul32_ppmma(carry, *out, *in, k, carryin);
		LITTLE(out++;in++;)
	}
	BIGLITTLE(*--out,*out) = carry;
}
#elif defined(BNWORD64)
void
bniMulN1_32(BNWORD32 *out, BNWORD32 const *in, unsigned len, BNWORD32 k)
{
	BNWORD64 p;

	pgpAssert(len > 0);

	p = (BNWORD64)BIGLITTLE(*--in,*in++) * k;
	BIGLITTLE(*--out,*out++) = (BNWORD32)p;

	while (--len) {
		p = (BNWORD64)BIGLITTLE(*--in,*in++) * k + (BNWORD32)(p >> 32);
		BIGLITTLE(*--out,*out++) = (BNWORD32)p;
	}
	BIGLITTLE(*--out,*out) = (BNWORD32)(p >> 32);
}
#else
#error No 32x32 -> 64 multiply available for 32-bit bignum package
#endif
#endif /* bniMulN1_32 */

/*
 * bniMulAdd1_32: Multiply an n-word input by a 1-word input and add the
 * low n words of the product to the destination.  *Returns the n+1st word
 * of the product.*  (That turns out to be more convenient than adding
 * it into the destination and dealing with a possible unit carry out
 * of *that*.)  This uses either the mul32_ppmma and mul32_ppmmaa macros,
 * or C multiplication with the BNWORD64 type.
 *
 * If you're going to write assembly primitives, this is the one to
 * start with.  It is by far the most commonly called function.
 */
#ifndef bniMulAdd1_32
#if defined(mul32_ppmm)
BNWORD32
bniMulAdd1_32(BNWORD32 *out, BNWORD32 const *in, unsigned len, BNWORD32 k)
{
	BNWORD32 prod, carry, carryin;

	pgpAssert(len > 0);

	BIG(--out;--in;);
	carryin = *out;
	mul32_ppmma(carry, *out, *in, k, carryin);
	LITTLE(out++;in++;)

	while (--len) {
		BIG(--out;--in;);
		carryin = carry;
		mul32_ppmmaa(carry, prod, *in, k, carryin, *out);
		*out = prod;
		LITTLE(out++;in++;)
	}

	return carry;
}
#elif defined(BNWORD64)
BNWORD32
bniMulAdd1_32(BNWORD32 *out, BNWORD32 const *in, unsigned len, BNWORD32 k)
{
	BNWORD64 p;

	pgpAssert(len > 0);

	p = (BNWORD64)BIGLITTLE(*--in,*in++) * k + BIGLITTLE(*--out,*out);
	BIGLITTLE(*out,*out++) = (BNWORD32)p;

	while (--len) {
		p = (BNWORD64)BIGLITTLE(*--in,*in++) * k +
		    (BNWORD32)(p >> 32) + BIGLITTLE(*--out,*out);
		BIGLITTLE(*out,*out++) = (BNWORD32)p;
	}

	return (BNWORD32)(p >> 32);
}
#else
#error No 32x32 -> 64 multiply available for 32-bit bignum package
#endif
#endif /* bniMulAdd1_32 */

/*
 * bniMulSub1_32: Multiply an n-word input by a 1-word input and subtract the
 * n-word product from the destination.  Returns the n+1st word of the product.
 * This uses either the mul32_ppmm and mul32_ppmma macros, or
 * C multiplication with the BNWORD64 type.
 *
 * This is rather uglier than adding, but fortunately it's only used in
 * division which is not used too heavily.
 */
#ifndef bniMulSub1_32
#if defined(mul32_ppmm)
BNWORD32
bniMulSub1_32(BNWORD32 *out, BNWORD32 const *in, unsigned len, BNWORD32 k)
{
	BNWORD32 prod, carry, carryin;

	pgpAssert(len > 0);

	BIG(--in;)
	mul32_ppmm(carry, prod, *in, k);
	LITTLE(in++;)
	carry += (BIGLITTLE(*--out,*out++) -= prod) > (BNWORD32)~prod;

	while (--len) {
		BIG(--in;);
		carryin = carry;
		mul32_ppmma(carry, prod, *in, k, carryin);
		LITTLE(in++;)
		carry += (BIGLITTLE(*--out,*out++) -= prod) > (BNWORD32)~prod;
	}

	return carry;
}
#elif defined(BNWORD64)
BNWORD32
bniMulSub1_32(BNWORD32 *out, BNWORD32 const *in, unsigned len, BNWORD32 k)
{
	BNWORD64 p;
	BNWORD32 carry, t;

	pgpAssert(len > 0);

	p = (BNWORD64)BIGLITTLE(*--in,*in++) * k;
	t = BIGLITTLE(*--out,*out);
	carry = (BNWORD32)(p>>32) +
			((BIGLITTLE(*out,*out++)=t-(BNWORD32)p) > t);

	while (--len) {
		p = (BNWORD64)BIGLITTLE(*--in,*in++) * k + carry;
		t = BIGLITTLE(*--out,*out);
		carry = (BNWORD32)(p>>32) +
			( (BIGLITTLE(*out,*out++)=t-(BNWORD32)p) > t );
	}

	return carry;
}
#else
#error No 32x32 -> 64 multiply available for 32-bit bignum package
#endif
#endif /* !bniMulSub1_32 */

/*
 * Shift n words left "shift" bits.  0 < shift < 32.  Returns the
 * carry, any bits shifted off the left-hand side (0 <= carry < 2^shift).
 */
#ifndef bniLshift_32
BNWORD32
bniLshift_32(BNWORD32 *num, unsigned len, unsigned shift)
{
	BNWORD32 x, carry;

	pgpAssert(shift > 0);
	pgpAssert(shift < 32);

	carry = 0;
	while (len--) {
		BIG(--num;)
		x = *num;
		*num = (x<<shift) | carry;
		LITTLE(num++;)
		carry = x >> (32-shift);
	}
	return carry;
}
#endif /* !bniLshift_32 */

/*
 * An optimized version of the above, for shifts of 1.
 * Some machines can use add-with-carry tricks for this.
 */
#ifndef bniDouble_32
BNWORD32
bniDouble_32(BNWORD32 *num, unsigned len)
{
	BNWORD32 x, carry;

	carry = 0;
	while (len--) {
		BIG(--num;)
		x = *num;
		*num = (x<<1) | carry;
		LITTLE(num++;)
		carry = x >> (32-1);
	}
	return carry;
}
#endif /* !bniDouble_32 */

/*
 * Shift n words right "shift" bits.  0 < shift < 32.  Returns the
 * carry, any bits shifted off the right-hand side (0 <= carry < 2^shift).
 */
#ifndef bniRshift_32
BNWORD32
bniRshift_32(BNWORD32 *num, unsigned len, unsigned shift)
{
	BNWORD32 x, carry = 0;

	pgpAssert(shift > 0);
	pgpAssert(shift < 32);

	BIGLITTLE(num -= len, num += len);

	while (len--) {
		LITTLE(--num;)
		x = *num;
		*num = (x>>shift) | carry;
		BIG(num++;)
		carry = x << (32-shift);
	}
	return carry >> (32-shift);
}
#endif /* !bniRshift_32 */

/* 
 * Multiply two numbers of the given lengths.  prod and num2 may overlap,
 * provided that the low len1 bits of prod are free.  (This corresponds
 * nicely to the place the result is returned from bniMontReduce_32.)
 *
 * TODO: Use Karatsuba multiply.  The overlap constraints may have
 * to get rewhacked.
 */
#ifndef bniMul_32
void
bniMul_32(BNWORD32 *prod, BNWORD32 const *num1, unsigned len1,
                          BNWORD32 const *num2, unsigned len2)
{
	/* Special case of zero */
	if (!len1 || !len2) {
		bniZero_32(prod, len1+len2);
		return;
	}

	/* Multiply first word */
	bniMulN1_32(prod, num1, len1, BIGLITTLE(*--num2,*num2++));

	/*
	 * Add in subsequent words, storing the most significant word,
	 * which is new each time.
	 */
	while (--len2) {
		BIGLITTLE(--prod,prod++);
		BIGLITTLE(*(prod-len1-1),*(prod+len1)) =
			bniMulAdd1_32(prod, num1, len1,
				BIGLITTLE(*--num2,*num2++));
	}
}
#endif /* !bniMul_32 */

/*
 * bniMulX_32 is a square multiply - both inputs are the same length.
 * It's normally just a macro wrapper around the general multiply,
 * but might be implementable in assembly more efficiently (such as
 * when product scanning).
 */
#ifndef bniMulX_32
#if defined(BNWORD64) && PRODUCT_SCAN
/*
 * Test code to see whether product scanning is any faster.  It seems
 * to make the C code slower, so PRODUCT_SCAN is not defined.
 */
static void
bniMulX_32(BNWORD32 *prod, BNWORD32 const *num1, BNWORD32 const *num2,
	unsigned len)
{
	BNWORD64 x, y;
	BNWORD32 const *p1, *p2;
	unsigned carry;
	unsigned i, j;

	/* Special case of zero */
	if (!len)
		return;

	x = (BNWORD64)BIGLITTLE(num1[-1] * num2[-1], num1[0] * num2[0]);
	BIGLITTLE(*--prod, *prod++) = (BNWORD32)x;
	x >>= 32;

	for (i = 1; i < len; i++) {
		carry = 0;
		p1 = num1;
		p2 = BIGLITTLE(num2-i-1,num2+i+1);
		for (j = 0; j <= i; j++) {
			BIG(y = (BNWORD64)*--p1 * *p2++;)
			LITTLE(y = (BNWORD64)*p1++ * *--p2;)
			x += y;
			carry += (x < y);
		}
		BIGLITTLE(*--prod,*prod++) = (BNWORD32)x;
		x = (x >> 32) | (BNWORD64)carry << 32;
	}
	for (i = 1; i < len; i++) {
		carry = 0;
		p1 = BIGLITTLE(num1-i,num1+i);
		p2 = BIGLITTLE(num2-len,num2+len);
		for (j = i; j < len; j++) {
			BIG(y = (BNWORD64)*--p1 * *p2++;)
			LITTLE(y = (BNWORD64)*p1++ * *--p2;)
			x += y;
			carry += (x < y);
		}
		BIGLITTLE(*--prod,*prod++) = (BNWORD32)x;
		x = (x >> 32) | (BNWORD64)carry << 32;
	}
	
	BIGLITTLE(*--prod,*prod) = (BNWORD32)x;
}
#else /* !defined(BNWORD64) || !PRODUCT_SCAN */
/* Default trivial macro definition */
#define bniMulX_32(prod, num1, num2, len) bniMul_32(prod, num1, len, num2, len)
#endif /* !defined(BNWORD64) || !PRODUCT_SCAN */
#endif /* !lbmMulX_32 */

#if !defined(bniMontMul_32) && defined(BNWORD64) && PRODUCT_SCAN
/*
 * Test code for product-scanning multiply.  This seems to slow the C
 * code down rather than speed it up.
 * This does a multiply and Montgomery reduction together, using the
 * same loops.  The outer loop scans across the product, twice.
 * The first pass computes the low half of the product and the
 * Montgomery multipliers.  These are stored in the product array,
 * which contains no data as of yet.  x and carry add up the columns
 * and propagate carries forward.
 *
 * The second half multiplies the upper half, adding in the modulus
 * times the Montgomery multipliers.  The results of this multiply
 * are stored.
 */
static void
bniMontMul_32(BNWORD32 *prod, BNWORD32 const *num1, BNWORD32 const *num2,
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -