📄 bni64.c

📁 vc环境下的pgp源码
💻 C
📖 第 1 页 / 共 5 页
字号:
 * and compare from the most-significant end until a difference if found.
 * When it is, figure out the sign of the difference and return it.
 */
int
bniCmp_64(BNWORD64 const *num1, BNWORD64 const *num2, unsigned len)
{
	BIGLITTLE(num1 -= len, num1 += len);
	BIGLITTLE(num2 -= len, num2 += len);

	while (len--) {
		if (BIGLITTLE(*num1++ != *num2++, *--num1 != *--num2)) {
			if (BIGLITTLE(num1[-1] < num2[-1], *num1 < *num2))
				return -1;
			else
				return 1;
		}
	}
	return 0;
}
#endif /* !bniCmp_64 */

/*
 * mul64_ppmmaa(ph,pl,x,y,a,b) is an optional routine that
 * computes (ph,pl) = x * y + a + b.  mul64_ppmma and mul64_ppmm
 * are simpler versions.  If you want to be lazy, all of these
 * can be defined in terms of the others, so here we create any
 * that have not been defined in terms of the ones that have been.
 */

/* Define ones with fewer a's in terms of ones with more a's */
#if !defined(mul64_ppmma) && defined(mul64_ppmmaa)
#define mul64_ppmma(ph,pl,x,y,a) mul64_ppmmaa(ph,pl,x,y,a,0)
#endif

#if !defined(mul64_ppmm) && defined(mul64_ppmma)
#define mul64_ppmm(ph,pl,x,y) mul64_ppmma(ph,pl,x,y,0)
#endif

/*
 * Use this definition to test the mul64_ppmm-based operations on machines
 * that do not provide mul64_ppmm.  Change the final "0" to a "1" to
 * enable it.
 */
#if !defined(mul64_ppmm) && defined(BNWORD128) && 0	/* Debugging */
#define mul64_ppmm(ph,pl,x,y) \
	({BNWORD128 _ = (BNWORD128)(x)*(y); (pl) = _; (ph) = _>>64;})
#endif

#if defined(mul64_ppmm) && !defined(mul64_ppmma)
#define mul64_ppmma(ph,pl,x,y,a) \
	(mul64_ppmm(ph,pl,x,y), (ph) += ((pl) += (a)) < (a))
#endif

#if defined(mul64_ppmma) && !defined(mul64_ppmmaa)
#define mul64_ppmmaa(ph,pl,x,y,a,b) \
	(mul64_ppmma(ph,pl,x,y,a), (ph) += ((pl) += (b)) < (b))
#endif

/*
 * bniMulN1_64: Multiply an n-word input by a 1-word input and store the
 * n+1-word product.  This uses either the mul64_ppmm and mul64_ppmma
 * macros, or C multiplication with the BNWORD128 type.  This uses mul64_ppmma
 * if available, assuming you won't bother defining it unless you can do
 * better than the normal multiplication.
 */
#ifndef bniMulN1_64
#ifdef bniMulAdd1_64	/* If we have this asm primitive, use it. */
void
bniMulN1_64(BNWORD64 *out, BNWORD64 const *in, unsigned len, BNWORD64 k)
{
	bniZero_64(out, len);
	BIGLITTLE(*(out-len),*(out+len)) = bniMulAdd1_64(out, in, len, k);
}
#elif defined(mul64_ppmm)
void
bniMulN1_64(BNWORD64 *out, BNWORD64 const *in, unsigned len, BNWORD64 k)
{
	BNWORD64 prod, carry, carryin;

	pgpAssert(len > 0);

	BIG(--out;--in;);
	mul64_ppmm(carry, *out, *in, k);
	LITTLE(out++;in++;)

	while (--len) {
		BIG(--out;--in;)
		carryin = carry;
		mul64_ppmma(carry, *out, *in, k, carryin);
		LITTLE(out++;in++;)
	}
	BIGLITTLE(*--out,*out) = carry;
}
#elif defined(BNWORD128)
void
bniMulN1_64(BNWORD64 *out, BNWORD64 const *in, unsigned len, BNWORD64 k)
{
	BNWORD128 p;

	pgpAssert(len > 0);

	p = (BNWORD128)BIGLITTLE(*--in,*in++) * k;
	BIGLITTLE(*--out,*out++) = (BNWORD64)p;

	while (--len) {
		p = (BNWORD128)BIGLITTLE(*--in,*in++) * k + (BNWORD64)(p >> 64);
		BIGLITTLE(*--out,*out++) = (BNWORD64)p;
	}
	BIGLITTLE(*--out,*out) = (BNWORD64)(p >> 64);
}
#else
#error No 64x64 -> 128 multiply available for 64-bit bignum package
#endif
#endif /* bniMulN1_64 */

/*
 * bniMulAdd1_64: Multiply an n-word input by a 1-word input and add the
 * low n words of the product to the destination.  *Returns the n+1st word
 * of the product.*  (That turns out to be more convenient than adding
 * it into the destination and dealing with a possible unit carry out
 * of *that*.)  This uses either the mul64_ppmma and mul64_ppmmaa macros,
 * or C multiplication with the BNWORD128 type.
 *
 * If you're going to write assembly primitives, this is the one to
 * start with.  It is by far the most commonly called function.
 */
#ifndef bniMulAdd1_64
#if defined(mul64_ppmm)
BNWORD64
bniMulAdd1_64(BNWORD64 *out, BNWORD64 const *in, unsigned len, BNWORD64 k)
{
	BNWORD64 prod, carry, carryin;

	pgpAssert(len > 0);

	BIG(--out;--in;);
	carryin = *out;
	mul64_ppmma(carry, *out, *in, k, carryin);
	LITTLE(out++;in++;)

	while (--len) {
		BIG(--out;--in;);
		carryin = carry;
		mul64_ppmmaa(carry, prod, *in, k, carryin, *out);
		*out = prod;
		LITTLE(out++;in++;)
	}

	return carry;
}
#elif defined(BNWORD128)
BNWORD64
bniMulAdd1_64(BNWORD64 *out, BNWORD64 const *in, unsigned len, BNWORD64 k)
{
	BNWORD128 p;

	pgpAssert(len > 0);

	p = (BNWORD128)BIGLITTLE(*--in,*in++) * k + BIGLITTLE(*--out,*out);
	BIGLITTLE(*out,*out++) = (BNWORD64)p;

	while (--len) {
		p = (BNWORD128)BIGLITTLE(*--in,*in++) * k +
		    (BNWORD64)(p >> 64) + BIGLITTLE(*--out,*out);
		BIGLITTLE(*out,*out++) = (BNWORD64)p;
	}

	return (BNWORD64)(p >> 64);
}
#else
#error No 64x64 -> 128 multiply available for 64-bit bignum package
#endif
#endif /* bniMulAdd1_64 */

/*
 * bniMulSub1_64: Multiply an n-word input by a 1-word input and subtract the
 * n-word product from the destination.  Returns the n+1st word of the product.
 * This uses either the mul64_ppmm and mul64_ppmma macros, or
 * C multiplication with the BNWORD128 type.
 *
 * This is rather uglier than adding, but fortunately it's only used in
 * division which is not used too heavily.
 */
#ifndef bniMulSub1_64
#if defined(mul64_ppmm)
BNWORD64
bniMulSub1_64(BNWORD64 *out, BNWORD64 const *in, unsigned len, BNWORD64 k)
{
	BNWORD64 prod, carry, carryin;

	pgpAssert(len > 0);

	BIG(--in;)
	mul64_ppmm(carry, prod, *in, k);
	LITTLE(in++;)
	carry += (BIGLITTLE(*--out,*out++) -= prod) > (BNWORD64)~prod;

	while (--len) {
		BIG(--in;);
		carryin = carry;
		mul64_ppmma(carry, prod, *in, k, carryin);
		LITTLE(in++;)
		carry += (BIGLITTLE(*--out,*out++) -= prod) > (BNWORD64)~prod;
	}

	return carry;
}
#elif defined(BNWORD128)
BNWORD64
bniMulSub1_64(BNWORD64 *out, BNWORD64 const *in, unsigned len, BNWORD64 k)
{
	BNWORD128 p;
	BNWORD64 carry, t;

	pgpAssert(len > 0);

	p = (BNWORD128)BIGLITTLE(*--in,*in++) * k;
	t = BIGLITTLE(*--out,*out);
	carry = (BNWORD64)(p>>64) +
			((BIGLITTLE(*out,*out++)=t-(BNWORD64)p) > t);

	while (--len) {
		p = (BNWORD128)BIGLITTLE(*--in,*in++) * k + carry;
		t = BIGLITTLE(*--out,*out);
		carry = (BNWORD64)(p>>64) +
			( (BIGLITTLE(*out,*out++)=t-(BNWORD64)p) > t );
	}

	return carry;
}
#else
#error No 64x64 -> 128 multiply available for 64-bit bignum package
#endif
#endif /* !bniMulSub1_64 */

/*
 * Shift n words left "shift" bits.  0 < shift < 64.  Returns the
 * carry, any bits shifted off the left-hand side (0 <= carry < 2^shift).
 */
#ifndef bniLshift_64
BNWORD64
bniLshift_64(BNWORD64 *num, unsigned len, unsigned shift)
{
	BNWORD64 x, carry;

	pgpAssert(shift > 0);
	pgpAssert(shift < 64);

	carry = 0;
	while (len--) {
		BIG(--num;)
		x = *num;
		*num = (x<<shift) | carry;
		LITTLE(num++;)
		carry = x >> (64-shift);
	}
	return carry;
}
#endif /* !bniLshift_64 */

/*
 * An optimized version of the above, for shifts of 1.
 * Some machines can use add-with-carry tricks for this.
 */
#ifndef bniDouble_64
BNWORD64
bniDouble_64(BNWORD64 *num, unsigned len)
{
	BNWORD64 x, carry;

	carry = 0;
	while (len--) {
		BIG(--num;)
		x = *num;
		*num = (x<<1) | carry;
		LITTLE(num++;)
		carry = x >> (64-1);
	}
	return carry;
}
#endif /* !bniDouble_64 */

/*
 * Shift n words right "shift" bits.  0 < shift < 64.  Returns the
 * carry, any bits shifted off the right-hand side (0 <= carry < 2^shift).
 */
#ifndef bniRshift_64
BNWORD64
bniRshift_64(BNWORD64 *num, unsigned len, unsigned shift)
{
	BNWORD64 x, carry = 0;

	pgpAssert(shift > 0);
	pgpAssert(shift < 64);

	BIGLITTLE(num -= len, num += len);

	while (len--) {
		LITTLE(--num;)
		x = *num;
		*num = (x>>shift) | carry;
		BIG(num++;)
		carry = x << (64-shift);
	}
	return carry >> (64-shift);
}
#endif /* !bniRshift_64 */

/* 
 * Multiply two numbers of the given lengths.  prod and num2 may overlap,
 * provided that the low len1 bits of prod are free.  (This corresponds
 * nicely to the place the result is returned from bniMontReduce_64.)
 *
 * TODO: Use Karatsuba multiply.  The overlap constraints may have
 * to get rewhacked.
 */
#ifndef bniMul_64
void
bniMul_64(BNWORD64 *prod, BNWORD64 const *num1, unsigned len1,
                          BNWORD64 const *num2, unsigned len2)
{
	/* Special case of zero */
	if (!len1 || !len2) {
		bniZero_64(prod, len1+len2);
		return;
	}

	/* Multiply first word */
	bniMulN1_64(prod, num1, len1, BIGLITTLE(*--num2,*num2++));

	/*
	 * Add in subsequent words, storing the most significant word,
	 * which is new each time.
	 */
	while (--len2) {
		BIGLITTLE(--prod,prod++);
		BIGLITTLE(*(prod-len1-1),*(prod+len1)) =
			bniMulAdd1_64(prod, num1, len1,
				BIGLITTLE(*--num2,*num2++));
	}
}
#endif /* !bniMul_64 */

/*
 * bniMulX_64 is a square multiply - both inputs are the same length.
 * It's normally just a macro wrapper around the general multiply,
 * but might be implementable in assembly more efficiently (such as
 * when product scanning).
 */
#ifndef bniMulX_64
#if defined(BNWORD128) && PRODUCT_SCAN
/*
 * Test code to see whether product scanning is any faster.  It seems
 * to make the C code slower, so PRODUCT_SCAN is not defined.
 */
static void
bniMulX_64(BNWORD64 *prod, BNWORD64 const *num1, BNWORD64 const *num2,
	unsigned len)
{
	BNWORD128 x, y;
	BNWORD64 const *p1, *p2;
	unsigned carry;
	unsigned i, j;

	/* Special case of zero */
	if (!len)
		return;

	x = (BNWORD128)BIGLITTLE(num1[-1] * num2[-1], num1[0] * num2[0]);
	BIGLITTLE(*--prod, *prod++) = (BNWORD64)x;
	x >>= 64;

	for (i = 1; i < len; i++) {
		carry = 0;
		p1 = num1;
		p2 = BIGLITTLE(num2-i-1,num2+i+1);
		for (j = 0; j <= i; j++) {
			BIG(y = (BNWORD128)*--p1 * *p2++;)
			LITTLE(y = (BNWORD128)*p1++ * *--p2;)
			x += y;
			carry += (x < y);
		}
		BIGLITTLE(*--prod,*prod++) = (BNWORD64)x;
		x = (x >> 64) | (BNWORD128)carry << 64;
	}
	for (i = 1; i < len; i++) {
		carry = 0;
		p1 = BIGLITTLE(num1-i,num1+i);
		p2 = BIGLITTLE(num2-len,num2+len);
		for (j = i; j < len; j++) {
			BIG(y = (BNWORD128)*--p1 * *p2++;)
			LITTLE(y = (BNWORD128)*p1++ * *--p2;)
			x += y;
			carry += (x < y);
		}
		BIGLITTLE(*--prod,*prod++) = (BNWORD64)x;
		x = (x >> 64) | (BNWORD128)carry << 64;
	}
	
	BIGLITTLE(*--prod,*prod) = (BNWORD64)x;
}
#else /* !defined(BNWORD128) || !PRODUCT_SCAN */
/* Default trivial macro definition */
#define bniMulX_64(prod, num1, num2, len) bniMul_64(prod, num1, len, num2, len)
#endif /* !defined(BNWORD128) || !PRODUCT_SCAN */
#endif /* !lbmMulX_64 */

#if !defined(bniMontMul_64) && defined(BNWORD128) && PRODUCT_SCAN
/*
 * Test code for product-scanning multiply.  This seems to slow the C
 * code down rather than speed it up.
 * This does a multiply and Montgomery reduction together, using the
 * same loops.  The outer loop scans across the product, twice.
 * The first pass computes the low half of the product and the
 * Montgomery multipliers.  These are stored in the product array,
 * which contains no data as of yet.  x and carry add up the columns
 * and propagate carries forward.
 *
 * The second half multiplies the upper half, adding in the modulus
 * times the Montgomery multipliers.  The results of this multiply
 * are stored.
 */
static void
bniMontMul_64(BNWORD64 *prod, BNWORD64 const *num1, BNWORD64 const *num2,
	BNWORD64 const *mod, unsigned len, BNWORD64 inv)
{
	BNWORD128 x, y;
	BNWORD64 const *p1, *p2, *pm;
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -