📄 libtommath.c
字号:
/* If x < 0, add b**(k+1) to it */ if (mp_cmp_d (x, 0) == MP_LT) { mp_set (&q, 1); if ((res = mp_lshd (&q, um + 1)) != MP_OKAY) { goto CLEANUP; } if ((res = mp_add (x, &q, x)) != MP_OKAY) { goto CLEANUP; } } /* Back off if it's too big */ while (mp_cmp (x, m) != MP_LT) { if ((res = s_mp_sub (x, m, x)) != MP_OKAY) { goto CLEANUP; } } CLEANUP: mp_clear (&q); return res;}/* multiplies |a| * |b| and only computes upto digs digits of result * HAC pp. 595, Algorithm 14.12 Modified so you can control how * many digits of output are created. */static int s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs){ mp_int t; int res, pa, pb, ix, iy; mp_digit u; mp_word r; mp_digit tmpx, *tmpt, *tmpy; /* can we use the fast multiplier? */ if (((digs) < MP_WARRAY) && MIN (a->used, b->used) < (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) { return fast_s_mp_mul_digs (a, b, c, digs); } if ((res = mp_init_size (&t, digs)) != MP_OKAY) { return res; } t.used = digs; /* compute the digits of the product directly */ pa = a->used; for (ix = 0; ix < pa; ix++) { /* set the carry to zero */ u = 0; /* limit ourselves to making digs digits of output */ pb = MIN (b->used, digs - ix); /* setup some aliases */ /* copy of the digit from a used within the nested loop */ tmpx = a->dp[ix]; /* an alias for the destination shifted ix places */ tmpt = t.dp + ix; /* an alias for the digits of b */ tmpy = b->dp; /* compute the columns of the output and propagate the carry */ for (iy = 0; iy < pb; iy++) { /* compute the column as a mp_word */ r = ((mp_word)*tmpt) + ((mp_word)tmpx) * ((mp_word)*tmpy++) + ((mp_word) u); /* the new column is the lower part of the result */ *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK)); /* get the carry word from the result */ u = (mp_digit) (r >> ((mp_word) DIGIT_BIT)); } /* set carry if it is placed below digs */ if (ix + iy < digs) { *tmpt = u; } } mp_clamp (&t); mp_exch (&t, c); mp_clear (&t); return MP_OKAY;}/* Fast (comba) multiplier * * This is the fast column-array [comba] multiplier. It is * designed to compute the columns of the product first * then handle the carries afterwards. This has the effect * of making the nested loops that compute the columns very * simple and schedulable on super-scalar processors. * * This has been modified to produce a variable number of * digits of output so if say only a half-product is required * you don't have to compute the upper half (a feature * required for fast Barrett reduction). * * Based on Algorithm 14.12 on pp.595 of HAC. * */static int fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs){ int olduse, res, pa, ix, iz; mp_digit W[MP_WARRAY]; register mp_word _W; /* grow the destination as required */ if (c->alloc < digs) { if ((res = mp_grow (c, digs)) != MP_OKAY) { return res; } } /* number of output digits to produce */ pa = MIN(digs, a->used + b->used); /* clear the carry */ _W = 0; for (ix = 0; ix < pa; ix++) { int tx, ty; int iy; mp_digit *tmpx, *tmpy; /* get offsets into the two bignums */ ty = MIN(b->used-1, ix); tx = ix - ty; /* setup temp aliases */ tmpx = a->dp + tx; tmpy = b->dp + ty; /* this is the number of times the loop will iterrate, essentially while (tx++ < a->used && ty-- >= 0) { ... } */ iy = MIN(a->used-tx, ty+1); /* execute loop */ for (iz = 0; iz < iy; ++iz) { _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--); } /* store term */ W[ix] = ((mp_digit)_W) & MP_MASK; /* make next carry */ _W = _W >> ((mp_word)DIGIT_BIT); } /* setup dest */ olduse = c->used; c->used = pa; { register mp_digit *tmpc; tmpc = c->dp; for (ix = 0; ix < pa+1; ix++) { /* now extract the previous digit [below the carry] */ *tmpc++ = W[ix]; } /* clear unused digits [that existed in the old copy of c] */ for (; ix < olduse; ix++) { *tmpc++ = 0; } } mp_clamp (c); return MP_OKAY;}/* init an mp_init for a given size */static int mp_init_size (mp_int * a, int size){ int x; /* pad size so there are always extra digits */ size += (MP_PREC * 2) - (size % MP_PREC); /* alloc mem */ a->dp = OPT_CAST(mp_digit) XMALLOC (sizeof (mp_digit) * size); if (a->dp == NULL) { return MP_MEM; } /* set the members */ a->used = 0; a->alloc = size; a->sign = MP_ZPOS; /* zero the digits */ for (x = 0; x < size; x++) { a->dp[x] = 0; } return MP_OKAY;}/* low level squaring, b = a*a, HAC pp.596-597, Algorithm 14.16 */static int s_mp_sqr (mp_int * a, mp_int * b){ mp_int t; int res, ix, iy, pa; mp_word r; mp_digit u, tmpx, *tmpt; pa = a->used; if ((res = mp_init_size (&t, 2*pa + 1)) != MP_OKAY) { return res; } /* default used is maximum possible size */ t.used = 2*pa + 1; for (ix = 0; ix < pa; ix++) { /* first calculate the digit at 2*ix */ /* calculate double precision result */ r = ((mp_word) t.dp[2*ix]) + ((mp_word)a->dp[ix])*((mp_word)a->dp[ix]); /* store lower part in result */ t.dp[ix+ix] = (mp_digit) (r & ((mp_word) MP_MASK)); /* get the carry */ u = (mp_digit)(r >> ((mp_word) DIGIT_BIT)); /* left hand side of A[ix] * A[iy] */ tmpx = a->dp[ix]; /* alias for where to store the results */ tmpt = t.dp + (2*ix + 1); for (iy = ix + 1; iy < pa; iy++) { /* first calculate the product */ r = ((mp_word)tmpx) * ((mp_word)a->dp[iy]); /* now calculate the double precision result, note we use * addition instead of *2 since it's easier to optimize */ r = ((mp_word) *tmpt) + r + r + ((mp_word) u); /* store lower part */ *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK)); /* get carry */ u = (mp_digit)(r >> ((mp_word) DIGIT_BIT)); } /* propagate upwards */ while (u != ((mp_digit) 0)) { r = ((mp_word) *tmpt) + ((mp_word) u); *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK)); u = (mp_digit)(r >> ((mp_word) DIGIT_BIT)); } } mp_clamp (&t); mp_exch (&t, b); mp_clear (&t); return MP_OKAY;}/* multiplies |a| * |b| and does not compute the lower digs digits * [meant to get the higher part of the product] */static int s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs){ mp_int t; int res, pa, pb, ix, iy; mp_digit u; mp_word r; mp_digit tmpx, *tmpt, *tmpy; /* can we use the fast multiplier? */#ifdef BN_FAST_S_MP_MUL_HIGH_DIGS_C if (((a->used + b->used + 1) < MP_WARRAY) && MIN (a->used, b->used) < (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) { return fast_s_mp_mul_high_digs (a, b, c, digs); }#endif if ((res = mp_init_size (&t, a->used + b->used + 1)) != MP_OKAY) { return res; } t.used = a->used + b->used + 1; pa = a->used; pb = b->used; for (ix = 0; ix < pa; ix++) { /* clear the carry */ u = 0; /* left hand side of A[ix] * B[iy] */ tmpx = a->dp[ix]; /* alias to the address of where the digits will be stored */ tmpt = &(t.dp[digs]); /* alias for where to read the right hand side from */ tmpy = b->dp + (digs - ix); for (iy = digs - ix; iy < pb; iy++) { /* calculate the double precision result */ r = ((mp_word)*tmpt) + ((mp_word)tmpx) * ((mp_word)*tmpy++) + ((mp_word) u); /* get the lower part */ *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK)); /* carry the carry */ u = (mp_digit) (r >> ((mp_word) DIGIT_BIT)); } *tmpt = u; } mp_clamp (&t); mp_exch (&t, c); mp_clear (&t); return MP_OKAY;}#ifdef BN_MP_MONTGOMERY_SETUP_C/* setups the montgomery reduction stuff */static intmp_montgomery_setup (mp_int * n, mp_digit * rho){ mp_digit x, b;/* fast inversion mod 2**k * * Based on the fact that * * XA = 1 (mod 2**n) => (X(2-XA)) A = 1 (mod 2**2n) * => 2*X*A - X*X*A*A = 1 * => 2*(1) - (1) = 1 */ b = n->dp[0]; if ((b & 1) == 0) { return MP_VAL; } x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */ x *= 2 - b * x; /* here x*a==1 mod 2**8 */#if !defined(MP_8BIT) x *= 2 - b * x; /* here x*a==1 mod 2**16 */#endif#if defined(MP_64BIT) || !(defined(MP_8BIT) || defined(MP_16BIT)) x *= 2 - b * x; /* here x*a==1 mod 2**32 */#endif#ifdef MP_64BIT x *= 2 - b * x; /* here x*a==1 mod 2**64 */#endif /* rho = -1/m mod b */ *rho = (unsigned long)(((mp_word)1 << ((mp_word) DIGIT_BIT)) - x) & MP_MASK; return MP_OKAY;}#endif#ifdef BN_FAST_MP_MONTGOMERY_REDUCE_C/* computes xR**-1 == x (mod N) via Montgomery Reduction * * This is an optimized implementation of montgomery_reduce * which uses the comba method to quickly calculate the columns of the * reduction. * * Based on Algorithm 14.32 on pp.601 of HAC.*/int fast_mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho){ int ix, res, olduse; mp_word W[MP_WARRAY]; /* get old used count */ olduse = x->used; /* grow a as required */ if (x->alloc < n->used + 1) { if ((res = mp_grow (x, n->used + 1)) != MP_OKAY) { return res; } } /* first we have to get the digits of the input into * an array of double precision words W[...] */ { register mp_word *_W; register mp_digit *tmpx; /* alias for the W[] array */ _W = W; /* alias for the digits of x*/ tmpx = x->dp; /* copy the digits of a into W[0..a->used-1] */ for (ix = 0; ix < x->used; ix++) { *_W++ = *tmpx++; } /* zero the high words of W[a->used..m->used*2] */ for (; ix < n->used * 2 + 1; ix++) { *_W++ = 0; } } /* now we proceed to zero successive digits * from the least significant upwards */ for (ix = 0; ix < n->used; ix++) { /* mu = ai * m' mod b * * We avoid a double precision multiplication (which isn't required) * by casting the value down to a mp_digit. Note this requires * that W[ix-1] have the carry cleared (see after the inner loop) */ register mp_digit mu; mu = (mp_digit) (((W[ix] & MP_MASK) * rho) & MP_MASK); /* a = a + mu * m * b**i * * This is computed in place and on the fly. The multiplication * by b**i is handled by offseting which columns the results * are added to. * * Note the comba method normally doesn't handle carries in the * inner loop In this case we fix the carry from the previous * column since the Montgomery reduction requires digits of the * result (so far) [see above] to work. This is * handled by fixing up one carry after the inner loop. The * carry fixups are done in order so after these loops the * first m->used words of W[] have the carries fixed */ { register int iy; register mp_digit *tmpn; register mp_word *_W; /* alias for the digits of the modulus */ tmpn = n->dp; /* Alias for the columns set by an offset of ix */ _W = W + ix; /* inner loop */ for (iy = 0; iy < n->used; iy++) { *_W++ += ((mp_word)mu) * ((mp_word)*tmpn++); } } /* now fix carry for next digit, W[ix+1] */ W[ix + 1] += W[ix] >> ((mp_word) DIGIT_BIT); } /* now we have to propagate the carries and * shift the words downward [all those least * significant digits we zeroed]. */ { register mp_digit *tmpx; register mp_word *_W, *_W1; /* nox fix rest of carries */ /* alias for current word */ _W1 = W + ix; /* alias for next word, where the carry goes */ _W = W + ++ix; for (; ix <= n->used * 2 + 1; ix++) { *_W++ += *_W1++ >> ((mp_word) DIGIT_BIT); } /* copy out, A = A/b**n * * The result is A/b**n but instead of converting from an * array of mp_word to mp_digit than calling mp_rshd * we just copy them in the right order */ /* alias for destination word */ tmpx = x->dp; /* alias for shifted double precision result */ _W = W + n->used; for (ix = 0; ix < n->used + 1; ix++) { *tmpx++ = (mp_digit)(*_W++ & ((mp_word) MP_MASK)); } /* zero oldused digits, if the input a was larger than * m->used+1 we'll have to clear the digits */ for (; ix < olduse; ix++) { *tmpx++ = 0; } } /* set the max used and clamp */ x->used = n->used + 1; mp_clamp (x); /* if A >= m then A = A - m */ if (mp_cmp_mag (x, n) != MP_LT) { return s_mp_sub (x, n, x); } return MP_OKAY;}#endif#ifdef BN_MP_MUL_2_C/* b = a*2 */static int mp_mul_2(mp_int * a, mp_int * b){ int x, res, oldused; /* grow to accomodate result */ if (b->alloc < a->used + 1) { if ((res = mp_grow (b, a->used + 1)) != MP_OKAY) { return res; } } oldused = b->used; b->used = a->used; { register mp_digit r, rr, *tmpa, *tmpb; /* alias for source */ tmpa = a->dp; /* alias for dest */ tmpb = b->dp; /* carry */ r = 0; for (x = 0; x < a->used; x++) { /* get what will be the *next* carry bit from the * MSB of the current digit */ rr = *tmpa >> ((mp_digit)(DIGIT_BIT - 1)); /* now shift up this digit, add in the carry [from the previous] */ *tmpb++ = ((*tmpa++ << ((mp_digit)1)) | r) & MP_MASK; /* copy the carry that would be from the source * digit into the next iteration */ r = rr; } /* new leadin
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -