📄 bignumber.cpp
字号:
dh = dh << shift | dl >> (32-shift);
dl <<= shift;
}
for (shift = 0; (dh & (BNWORD32)1 << 32-1-shift)) == 0;
shift++)
;
if (shift) {
}
dh = dh << shift | dl >> (32-shift);
shift = 0;
while (dh
}
#endif
dh = BIGLITTLE(*(d-dlen),*(d+(dlen-1)));
ASSERT(dh);
/* Normalize the divisor */
shift = 0;
r = dh;
i = 32/2;
do {
if (r >> i)
r >>= i;
else
shift += i;
} while ((i /= 2) != 0);
nh = 0;
if (shift) {
bniLshift_32(d, dlen, shift);
dh = BIGLITTLE(*(d-dlen),*(d+(dlen-1)));
nh = bniLshift_32(n, nlen, shift);
}
/* Assert that dh is now normalized */
ASSERT(dh >> (32-1));
/* Also get the second-most significant word of the divisor */
dl = BIGLITTLE(*(d-(dlen-1)),*(d+(dlen-2)));
/*
* Adjust pointers: n to point to least significant end of first
* first subtract, and q to one the most-significant end of the
* quotient array.
*/
BIGLITTLE(n -= qlen,n += qlen);
BIGLITTLE(q -= qlen,q += qlen);
/* Fetch the most significant stored word of the dividend */
nm = BIGLITTLE(*(n-dlen),*(n+(dlen-1)));
/*
* Compute the first digit of the quotient, based on the
* first two words of the dividend (the most significant of which
* is the overflow word h).
*/
if (nh) {
ASSERT(nh < dh);
r = bniDiv21_32(&qhat, nh, nm, dh);
} else if (nm >= dh) {
qhat = nm/dh;
r = nm % dh;
} else { /* Quotient is zero */
qhigh = 0;
goto divloop;
}
/* Now get the third most significant word of the dividend */
nl = BIGLITTLE(*(n-(dlen-1)),*(n+(dlen-2)));
/*
* Correct qhat, the estimate of quotient digit.
* qhat can only be high, and at most two words high,
* so the loop can be unrolled and abbreviated.
*/
#ifdef mul32_ppmm
mul32_ppmm(nm, t32, qhat, dl);
if (nm > r || (nm == r && t32 > nl)) {
/* Decrement qhat and adjust comparison parameters */
qhat--;
if ((r += dh) >= dh) {
nm -= (t32 < dl);
t32 -= dl;
if (nm > r || (nm == r && t32 > nl))
qhat--;
}
}
#elif defined(BNWORD64)
t64 = (BNWORD64)qhat * dl;
if (t64 > ((BNWORD64)r << 32) + nl) {
/* Decrement qhat and adjust comparison parameters */
qhat--;
if ((r += dh) > dh) {
t64 -= dl;
if (t64 > ((BNWORD64)r << 32) + nl)
qhat--;
}
}
#else /* Use bniMulN1_32 */
bniMulN1_32(BIGLITTLE(t2+2,t2), &dl, 1, qhat);
if (t2high > r || (t2high == r && t2low > nl)) {
/* Decrement qhat and adjust comparison parameters */
qhat--;
if ((r += dh) >= dh) {
t2high -= (t2low < dl);
t2low -= dl;
if (t2high > r || (t2high == r && t2low > nl))
qhat--;
}
}
#endif
/* Do the multiply and subtract */
r = bniMulSub1_32(n, d, dlen, qhat);
/* If there was a borrow, add back once. */
if (r > nh) { /* Borrow? */
(void)bniAddN_32(n, d, dlen);
qhat--;
}
/* Remember the first quotient digit. */
qhigh = qhat;
/* Now, the main division loop: */
divloop:
while (qlen--) {
/* Advance n */
nh = BIGLITTLE(*(n-dlen),*(n+(dlen-1)));
BIGLITTLE(++n,--n);
nm = BIGLITTLE(*(n-dlen),*(n+(dlen-1)));
if (nh == dh) {
qhat = ~(BNWORD32)0;
/* Optimized computation of r = (nh,nm) - qhat * dh */
r = nh + nm;
if (r < nh)
goto subtract;
} else {
ASSERT(nh < dh);
r = bniDiv21_32(&qhat, nh, nm, dh);
}
nl = BIGLITTLE(*(n-(dlen-1)),*(n+(dlen-2)));
#ifdef mul32_ppmm
mul32_ppmm(nm, t32, qhat, dl);
if (nm > r || (nm == r && t32 > nl)) {
/* Decrement qhat and adjust comparison parameters */
qhat--;
if ((r += dh) >= dh) {
nm -= (t32 < dl);
t32 -= dl;
if (nm > r || (nm == r && t32 > nl))
qhat--;
}
}
#elif defined(BNWORD64)
t64 = (BNWORD64)qhat * dl;
if (t64 > ((BNWORD64)r<<32) + nl) {
/* Decrement qhat and adjust comparison parameters */
qhat--;
if ((r += dh) >= dh) {
t64 -= dl;
if (t64 > ((BNWORD64)r << 32) + nl)
qhat--;
}
}
#else /* Use bniMulN1_32 */
bniMulN1_32(BIGLITTLE(t2+2,t2), &dl, 1, qhat);
if (t2high > r || (t2high == r && t2low > nl)) {
/* Decrement qhat and adjust comparison parameters */
qhat--;
if ((r += dh) >= dh) {
t2high -= (t2low < dl);
t2low -= dl;
if (t2high > r || (t2high == r && t2low > nl))
qhat--;
}
}
#endif
/*
* As a point of interest, note that it is not worth checking
* for qhat of 0 or 1 and installing special-case code. These
* occur with probability 2^-32, so spending 1 cycle to check
* for them is only worth it if we save more than 2^15 cycles,
* and a multiply-and-subtract for numbers in the 1024-bit
* range just doesn't take that long.
*/
subtract:
/*
* n points to the least significant end of the substring
* of n to be subtracted from. qhat is either exact or
* one too large. If the subtract gets a borrow, it was
* one too large and the divisor is added back in. It's
* a dlen+1 word add which is guaranteed to produce a
* carry out, so it can be done very simply.
*/
r = bniMulSub1_32(n, d, dlen, qhat);
if (r > nh) { /* Borrow? */
(void)bniAddN_32(n, d, dlen);
qhat--;
}
/* Store the quotient digit */
BIGLITTLE(*q++,*--q) = qhat;
}
/* Tah dah! */
if (shift) {
bniRshift_32(d, dlen, shift);
bniRshift_32(n, dlen, shift);
}
return qhigh;
}
#endif
/*
* Find the negative multiplicative inverse of x (x must be odd!) modulo 2^32.
*
* This just performs Newton's iteration until it gets the
* inverse. The initial estimate is always correct to 3 bits, and
* sometimes 4. The number of valid bits doubles each iteration.
* (To prove it, assume x * y == 1 (mod 2^n), and introduce a variable
* for the error mod 2^2n. x * y == 1 + k*2^n (mod 2^2n) and follow
* the iteration through.)
*/
#ifndef bniMontInv1_32
BNWORD32
bniMontInv1_32(BNWORD32 const x)
{
BNWORD32 y = x, z;
ASSERT(x & 1);
while ((z = x*y) != 1)
y *= 2 - z;
return -y;
}
#endif /* !bniMontInv1_32 */
#if defined(BNWORD64) && PRODUCT_SCAN
/*
* Test code for product-scanning Montgomery reduction.
* This seems to slow the C code down rather than speed it up.
*
* The first loop computes the Montgomery multipliers, storing them over
* the low half of the number n.
*
* The second half multiplies the upper half, adding in the modulus
* times the Montgomery multipliers. The results of this multiply
* are stored.
*/
void
bniMontReduce_32(BNWORD32 *n, BNWORD32 const *mod, unsigned mlen, BNWORD32 inv)
{
BNWORD64 x, y;
BNWORD32 const *pm;
BNWORD32 *pn;
BNWORD32 t;
unsigned carry;
unsigned i, j;
/* Special case of zero */
if (!mlen)
return;
/* Pass 1 - compute Montgomery multipliers */
/* First iteration can have certain simplifications. */
t = BIGLITTLE(n[-1],n[0]);
x = t;
t *= inv;
BIGLITTLE(n[-1], n[0]) = t;
x += (BNWORD64)t * BIGLITTLE(mod[-1],mod[0]); /* Can't overflow */
ASSERT((BNWORD32)x == 0);
x = x >> 32;
for (i = 1; i < mlen; i++) {
carry = 0;
pn = n;
pm = BIGLITTLE(mod-i-1,mod+i+1);
for (j = 0; j < i; j++) {
y = (BNWORD64)BIGLITTLE(*--pn * *pm++, *pn++ * *--pm);
x += y;
carry += (x < y);
}
ASSERT(BIGLITTLE(pn == n-i, pn == n+i));
y = t = BIGLITTLE(pn[-1], pn[0]);
x += y;
carry += (x < y);
BIGLITTLE(pn[-1], pn[0]) = t = inv * (BNWORD32)x;
ASSERT(BIGLITTLE(pm == mod-1, pm == mod+1));
y = (BNWORD64)t * BIGLITTLE(pm[0],pm[-1]);
x += y;
carry += (x < y);
ASSERT((BNWORD32)x == 0);
x = x >> 32 | (BNWORD64)carry << 32;
}
BIGLITTLE(n -= mlen, n += mlen);
/* Pass 2 - compute upper words and add to n */
for (i = 1; i < mlen; i++) {
carry = 0;
pm = BIGLITTLE(mod-i,mod+i);
pn = n;
for (j = i; j < mlen; j++) {
y = (BNWORD64)BIGLITTLE(*--pm * *pn++, *pm++ * *--pn);
x += y;
carry += (x < y);
}
ASSERT(BIGLITTLE(pm == mod-mlen, pm == mod+mlen));
ASSERT(BIGLITTLE(pn == n+mlen-i, pn == n-mlen+i));
y = t = BIGLITTLE(*(n-i),*(n+i-1));
x += y;
carry += (x < y);
BIGLITTLE(*(n-i),*(n+i-1)) = (BNWORD32)x;
x = (x >> 32) | (BNWORD64)carry << 32;
}
/* Last round of second half, simplified. */
t = BIGLITTLE(*(n-mlen),*(n+mlen-1));
x += t;
BIGLITTLE(*(n-mlen),*(n+mlen-1)) = (BNWORD32)x;
carry = (unsigned)(x >> 32);
while (carry)
carry -= bniSubN_32(n, mod, mlen);
while (bniCmp_32(n, mod, mlen) >= 0)
(void)bniSubN_32(n, mod, mlen);
}
#define bniMontReduce_32 bniMontReduce_32
#endif
/*
* Montgomery reduce n, modulo mod. This reduces modulo mod and divides by
* 2^(32*mlen). Returns the result in the *top* mlen words of the argument n.
* This is ready for another multiplication using bniMul_32.
*
* Montgomery representation is a very useful way to encode numbers when
* you're doing lots of modular reduction. What you do is pick a multiplier
* R which is relatively prime to the modulus and very easy to divide by.
* Since the modulus is odd, R is closen as a power of 2, so the division
* is a shift. In fact, it's a shift of an integral number of words,
* so the shift can be implicit - just drop the low-order words.
*
* Now, choose R *larger* than the modulus m, 2^(32*mlen). Then convert
* all numbers a, b, etc. to Montgomery form M(a), M(b), etc using the
* relationship M(a) = a*R mod m, M(b) = b*R mod m, etc. Note that:
* - The Montgomery form of a number depends on the modulus m.
* A fixed modulus m is assumed throughout this discussion.
* - Since R is relaitvely prime to m, multiplication by R is invertible;
* no information about the numbers is lost, they're just scrambled.
* - Adding (and subtracting) numbers in this form works just as usual.
* M(a+b) = (a+b)*R mod m = (a*R + b*R) mod m = (M(a) + M(b)) mod m
* - Multiplying numbers in this form produces a*b*R*R. The problem
* is to divide out the excess factor of R, modulo m as well as to
* reduce to the given length mlen. It turns out that this can be
* done *faster* than a normal divide, which is where the speedup
* in Montgomery division comes from.
*
* Normal reduction chooses a most-significant quotient digit q and then
* subtracts q*m from the number to be reduced. Choosing q is tricky
* and involved (just look at bniDiv_32 to see!) and is usually
* imperfect, requiring a check for correction after the subtraction.
*
* Montgomery reduction *adds* a multiple of m to the *low-order* part
* of the number to be reduced. This multiple is chosen to make the
* low-order part of the number come out to zero. This can be done
* with no trickery or error using a precomputed inverse of the modulus.
* In this code, the "part" is one word, but any width can be used.
*
* Repeating this step sufficiently often results in a value which
* is a multiple of R (a power of two, remember) but is still (since
* the additions were to the low-order part and thus did not increase
* the value of the number being reduced very much) still not much
* larger than m*R. Then implicitly divide by R and subtract off
* m until the result is in the correct range.
*
* Since the low-order part being cancelled is less than R, the
* multiple of m added must have a multiplier which is at most R-1.
* Assuming that the input is at most m*R-1, the final number is
* at most m*(2*R-1)-1 = 2*m*R - m - 1, so subtracting m once from
* the high-order part, equivalent to subtracting m*R from the
* while number, produces a result which is at most m*R - m - 1,
* which divided by R is at most m-1.
*
* To convert *to* Montgomery form, you need a regular remainder
* routine, although you can just compute R*R (mod m) and do the
* conversion using Montgomery multiplication. To convert *from*
* Montgomery form, just Montgomery reduce the number to
* remove the extra factor of R.
*
* TODO: Change to a full inverse and use Karatsuba's multiplication
* rather than this word-at-a-time.
*/
#ifndef bniMontReduce_32
void
bniMontReduce_32(BNWORD32 *n, BNWORD32 const *mod, unsigned const mlen,
BNWORD32 inv)
{
BNWORD32 t;
BNWORD32 c = 0;
unsigned len = mlen;
/* inv must be the negative inverse of mod's least significant word */
ASSERT((BNWORD32)(inv * BIGLITTLE(mod[-1],mod[0])) == (BNWORD32)-1);
ASSERT(len);
do {
t = bniMulAdd1_32(n, mod, mlen, inv * BIGLITTLE(n[-1],n[0]));
c += bniAdd1_32(BIGLITTLE(n-mlen,n+mlen), len, t);
BIGLITTLE(--n,++n);
} while (--len);
/*
* All that adding can cause an overflow past the modulus size,
* but it's unusual, and never by much, so a subtraction loop
* is the right way to deal with it.
* This subtraction happens infrequently - I've only ever seen it
* invoked once per reduction, and then just under 22.5% of the time.
*/
while (c)
c -= bniSubN_32(n, mod, mlen);
while (bniCmp_32(n, mod, mlen) >= 0)
(void)bniSubN_32(n, mod, mlen);
}
#endif /* !bniMontReduce_32 */
/*
* A couple of helpers that you might want to implement atomically
* in asm sometime.
*/
#ifndef bniMontMul_32
/*
* Multiply "num1" by "num2", modulo "mod", all of length "len", and
* place the result in the high half of "prod". "inv" is the inverse
* of the least-significant word of the modulus, modulo 2^32.
* This uses numbers in Montgomery form. Reduce using "len" and "inv".
*
* This is implemented as a macro to win on compilers that don't do
* inlining, since it's so trivial.
*/
#define bniMontMul_32(prod, n1, n2, mod, len, inv) \
(bniMulX_32(prod, n1, n2, len), bniMontReduce_32(prod, mod, len, inv))
#endif /* !bniMontMul_32 */
#ifndef bniMontSquare_32
/*
* Square "num", modulo "mod", both of length "len", and place the result
* in the high half of "prod". "inv" is the inverse of the least-significant
* word of the modulus, modulo 2^32.
* This uses numb
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -