📄 libtommath.c
字号:
if ((err = redux (&res, P, &mu)) != MP_OKAY) { goto LBL_RES; } } /* then multiply */ if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) { goto LBL_RES; } if ((err = redux (&res, P, &mu)) != MP_OKAY) { goto LBL_RES; } /* empty window and reset */ bitcpy = 0; bitbuf = 0; mode = 1; } } /* if bits remain then square/multiply */ if (mode == 2 && bitcpy > 0) { /* square then multiply if the bit is set */ for (x = 0; x < bitcpy; x++) { if ((err = mp_sqr (&res, &res)) != MP_OKAY) { goto LBL_RES; } if ((err = redux (&res, P, &mu)) != MP_OKAY) { goto LBL_RES; } bitbuf <<= 1; if ((bitbuf & (1 << winsize)) != 0) { /* then multiply */ if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) { goto LBL_RES; } if ((err = redux (&res, P, &mu)) != MP_OKAY) { goto LBL_RES; } } } } mp_exch (&res, Y); err = MP_OKAY;LBL_RES:mp_clear (&res);LBL_MU:mp_clear (&mu);LBL_M: mp_clear(&M[1]); for (x = 1<<(winsize-1); x < (1 << winsize); x++) { mp_clear (&M[x]); } return err;}/* computes b = a*a */static int mp_sqr (mp_int * a, mp_int * b){ int res;#ifdef BN_MP_TOOM_SQR_C /* use Toom-Cook? */ if (a->used >= TOOM_SQR_CUTOFF) { res = mp_toom_sqr(a, b); /* Karatsuba? */ } else #endif#ifdef BN_MP_KARATSUBA_SQR_Cif (a->used >= KARATSUBA_SQR_CUTOFF) { res = mp_karatsuba_sqr (a, b); } else #endif {#ifdef BN_FAST_S_MP_SQR_C /* can we use the fast comba multiplier? */ if ((a->used * 2 + 1) < MP_WARRAY && a->used < (1 << (sizeof(mp_word) * CHAR_BIT - 2*DIGIT_BIT - 1))) { res = fast_s_mp_sqr (a, b); } else#endif#ifdef BN_S_MP_SQR_C res = s_mp_sqr (a, b);#else#error mp_sqr could fail res = MP_VAL;#endif } b->sign = MP_ZPOS; return res;}/* reduces a modulo n where n is of the form 2**p - d This differs from reduce_2k since "d" can be larger than a single digit.*/static int mp_reduce_2k_l(mp_int *a, mp_int *n, mp_int *d){ mp_int q; int p, res; if ((res = mp_init(&q)) != MP_OKAY) { return res; } p = mp_count_bits(n); top: /* q = a/2**p, a = a mod 2**p */ if ((res = mp_div_2d(a, p, &q, a)) != MP_OKAY) { goto ERR; } /* q = q * d */ if ((res = mp_mul(&q, d, &q)) != MP_OKAY) { goto ERR; } /* a = a + q */ if ((res = s_mp_add(a, &q, a)) != MP_OKAY) { goto ERR; } if (mp_cmp_mag(a, n) != MP_LT) { s_mp_sub(a, n, a); goto top; } ERR: mp_clear(&q); return res;}/* determines the setup value */static int mp_reduce_2k_setup_l(mp_int *a, mp_int *d){ int res; mp_int tmp; if ((res = mp_init(&tmp)) != MP_OKAY) { return res; } if ((res = mp_2expt(&tmp, mp_count_bits(a))) != MP_OKAY) { goto ERR; } if ((res = s_mp_sub(&tmp, a, d)) != MP_OKAY) { goto ERR; } ERR: mp_clear(&tmp); return res;}/* computes a = 2**b * * Simple algorithm which zeroes the int, grows it then just sets one bit * as required. */static int mp_2expt (mp_int * a, int b){ int res; /* zero a as per default */ mp_zero (a); /* grow a to accomodate the single bit */ if ((res = mp_grow (a, b / DIGIT_BIT + 1)) != MP_OKAY) { return res; } /* set the used count of where the bit will go */ a->used = b / DIGIT_BIT + 1; /* put the single bit in its place */ a->dp[b / DIGIT_BIT] = ((mp_digit)1) << (b % DIGIT_BIT); return MP_OKAY;}/* pre-calculate the value required for Barrett reduction * For a given modulus "b" it calulates the value required in "a" */static int mp_reduce_setup (mp_int * a, mp_int * b){ int res; if ((res = mp_2expt (a, b->used * 2 * DIGIT_BIT)) != MP_OKAY) { return res; } return mp_div (a, b, a, NULL);}/* reduces x mod m, assumes 0 < x < m**2, mu is * precomputed via mp_reduce_setup. * From HAC pp.604 Algorithm 14.42 */static int mp_reduce (mp_int * x, mp_int * m, mp_int * mu){ mp_int q; int res, um = m->used; /* q = x */ if ((res = mp_init_copy (&q, x)) != MP_OKAY) { return res; } /* q1 = x / b**(k-1) */ mp_rshd (&q, um - 1); /* according to HAC this optimization is ok */ if (((unsigned long) um) > (((mp_digit)1) << (DIGIT_BIT - 1))) { if ((res = mp_mul (&q, mu, &q)) != MP_OKAY) { goto CLEANUP; } } else {#ifdef BN_S_MP_MUL_HIGH_DIGS_C if ((res = s_mp_mul_high_digs (&q, mu, &q, um)) != MP_OKAY) { goto CLEANUP; }#elif defined(BN_FAST_S_MP_MUL_HIGH_DIGS_C) if ((res = fast_s_mp_mul_high_digs (&q, mu, &q, um)) != MP_OKAY) { goto CLEANUP; }#else { #error mp_reduce would always fail res = MP_VAL; goto CLEANUP; }#endif } /* q3 = q2 / b**(k+1) */ mp_rshd (&q, um + 1); /* x = x mod b**(k+1), quick (no division) */ if ((res = mp_mod_2d (x, DIGIT_BIT * (um + 1), x)) != MP_OKAY) { goto CLEANUP; } /* q = q * m mod b**(k+1), quick (no division) */ if ((res = s_mp_mul_digs (&q, m, &q, um + 1)) != MP_OKAY) { goto CLEANUP; } /* x = x - q */ if ((res = mp_sub (x, &q, x)) != MP_OKAY) { goto CLEANUP; } /* If x < 0, add b**(k+1) to it */ if (mp_cmp_d (x, 0) == MP_LT) { mp_set (&q, 1); if ((res = mp_lshd (&q, um + 1)) != MP_OKAY) { goto CLEANUP; } if ((res = mp_add (x, &q, x)) != MP_OKAY) { goto CLEANUP; } } /* Back off if it's too big */ while (mp_cmp (x, m) != MP_LT) { if ((res = s_mp_sub (x, m, x)) != MP_OKAY) { goto CLEANUP; } } CLEANUP: mp_clear (&q); return res;}/* multiplies |a| * |b| and only computes upto digs digits of result * HAC pp. 595, Algorithm 14.12 Modified so you can control how * many digits of output are created. */static int s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs){ mp_int t; int res, pa, pb, ix, iy; mp_digit u; mp_word r; mp_digit tmpx, *tmpt, *tmpy; /* can we use the fast multiplier? */ if (((digs) < MP_WARRAY) && MIN (a->used, b->used) < (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) { return fast_s_mp_mul_digs (a, b, c, digs); } if ((res = mp_init_size (&t, digs)) != MP_OKAY) { return res; } t.used = digs; /* compute the digits of the product directly */ pa = a->used; for (ix = 0; ix < pa; ix++) { /* set the carry to zero */ u = 0; /* limit ourselves to making digs digits of output */ pb = MIN (b->used, digs - ix); /* setup some aliases */ /* copy of the digit from a used within the nested loop */ tmpx = a->dp[ix]; /* an alias for the destination shifted ix places */ tmpt = t.dp + ix; /* an alias for the digits of b */ tmpy = b->dp; /* compute the columns of the output and propagate the carry */ for (iy = 0; iy < pb; iy++) { /* compute the column as a mp_word */ r = ((mp_word)*tmpt) + ((mp_word)tmpx) * ((mp_word)*tmpy++) + ((mp_word) u); /* the new column is the lower part of the result */ *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK)); /* get the carry word from the result */ u = (mp_digit) (r >> ((mp_word) DIGIT_BIT)); } /* set carry if it is placed below digs */ if (ix + iy < digs) { *tmpt = u; } } mp_clamp (&t); mp_exch (&t, c); mp_clear (&t); return MP_OKAY;}/* Fast (comba) multiplier * * This is the fast column-array [comba] multiplier. It is * designed to compute the columns of the product first * then handle the carries afterwards. This has the effect * of making the nested loops that compute the columns very * simple and schedulable on super-scalar processors. * * This has been modified to produce a variable number of * digits of output so if say only a half-product is required * you don't have to compute the upper half (a feature * required for fast Barrett reduction). * * Based on Algorithm 14.12 on pp.595 of HAC. * */static int fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs){ int olduse, res, pa, ix, iz; mp_digit W[MP_WARRAY]; register mp_word _W; /* grow the destination as required */ if (c->alloc < digs) { if ((res = mp_grow (c, digs)) != MP_OKAY) { return res; } } /* number of output digits to produce */ pa = MIN(digs, a->used + b->used); /* clear the carry */ _W = 0; for (ix = 0; ix < pa; ix++) { int tx, ty; int iy; mp_digit *tmpx, *tmpy; /* get offsets into the two bignums */ ty = MIN(b->used-1, ix); tx = ix - ty; /* setup temp aliases */ tmpx = a->dp + tx; tmpy = b->dp + ty; /* this is the number of times the loop will iterrate, essentially while (tx++ < a->used && ty-- >= 0) { ... } */ iy = MIN(a->used-tx, ty+1); /* execute loop */ for (iz = 0; iz < iy; ++iz) { _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--); } /* store term */ W[ix] = ((mp_digit)_W) & MP_MASK; /* make next carry */ _W = _W >> ((mp_word)DIGIT_BIT); } /* setup dest */ olduse = c->used; c->used = pa; { register mp_digit *tmpc; tmpc = c->dp; for (ix = 0; ix < pa+1; ix++) { /* now extract the previous digit [below the carry] */ *tmpc++ = W[ix]; } /* clear unused digits [that existed in the old copy of c] */ for (; ix < olduse; ix++) { *tmpc++ = 0; } } mp_clamp (c); return MP_OKAY;}/* init an mp_init for a given size */static int mp_init_size (mp_int * a, int size){ int x; /* pad size so there are always extra digits */ size += (MP_PREC * 2) - (size % MP_PREC); /* alloc mem */ a->dp = OPT_CAST(mp_digit) XMALLOC (sizeof (mp_digit) * size); if (a->dp == NULL) { return MP_MEM; } /* set the members */ a->used = 0; a->alloc = size; a->sign = MP_ZPOS; /* zero the digits */ for (x = 0; x < size; x++) { a->dp[x] = 0; } return MP_OKAY;}/* low level squaring, b = a*a, HAC pp.596-597, Algorithm 14.16 */static int s_mp_sqr (mp_int * a, mp_int * b){ mp_int t; int res, ix, iy, pa; mp_word r; mp_digit u, tmpx, *tmpt; pa = a->used; if ((res = mp_init_size (&t, 2*pa + 1)) != MP_OKAY) { return res; } /* default used is maximum possible size */ t.used = 2*pa + 1; for (ix = 0; ix < pa; ix++) { /* first calculate the digit at 2*ix */ /* calculate double precision result */ r = ((mp_word) t.dp[2*ix]) + ((mp_word)a->dp[ix])*((mp_word)a->dp[ix]); /* store lower part in result */ t.dp[ix+ix] = (mp_digit) (r & ((mp_word) MP_MASK)); /* get the carry */ u = (mp_digit)(r >> ((mp_word) DIGIT_BIT)); /* left hand side of A[ix] * A[iy] */ tmpx = a->dp[ix]; /* alias for where to store the results */ tmpt = t.dp + (2*ix + 1); for (iy = ix + 1; iy < pa; iy++) { /* first calculate the product */ r = ((mp_word)tmpx) * ((mp_word)a->dp[iy]); /* now calculate the double precision result, note we use * addition instead of *2 since it's easier to optimize */ r = ((mp_word) *tmpt) + r + r + ((mp_word) u); /* store lower part */ *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK)); /* get carry */ u = (mp_digit)(r >> ((mp_word) DIGIT_BIT)); } /* propagate upwards */ while (u != ((mp_digit) 0)) { r = ((mp_word) *tmpt) + ((mp_word) u); *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK)); u = (mp_digit)(r >> ((mp_word) DIGIT_BIT)); } } mp_clamp (&t); mp_exch (&t, b); mp_clear (&t); return MP_OKAY;}/* multiplies |a| * |b| and does not compute the lower digs digits * [meant to get the higher part of the product] */static int s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs){ mp_int t; int res, pa, pb, ix, iy; mp_digit u; mp_word r; mp_digit tmpx, *tmpt, *tmpy; /* can we use the fast multiplier? */#ifdef BN_FAST_S_MP_MUL_HIGH_DIGS_C if (((a->used + b->used + 1) < MP_WARRAY) && MIN (a->used, b->used) < (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) { return fast_s_mp_mul_high_digs (a, b, c, digs); }#endif if ((res = mp_init_size (&t, a->used + b->used + 1)) != MP_OKAY) { return res; } t.used = a->used + b->used + 1; pa = a->used; pb = b->used; for (ix = 0; ix < pa; ix++) { /* clear the carry */ u = 0; /* left hand side of A[ix] * B[iy] */ tmpx = a->dp[ix]; /* alias to the address of where the digits will be stored */ tmpt = &(t.dp[digs]); /* alias for where to read the right hand side from */ tmpy = b->dp + (digs - ix); for (iy = digs - ix; iy < pb; iy++) { /* calculate the double precision result */ r = ((mp_word)*tmpt) + ((mp_word)tmpx) * ((mp_word)*tmpy++) + ((mp_word) u); /* get the lower part */ *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK)); /* carry the carry */ u = (mp_digit) (r >> ((mp_word) DIGIT_BIT)); } *tmpt = u; } mp_clamp (&t); mp_exch (&t, c); mp_clear (&t); return MP_OKAY;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -