📄 mpilib.cpp
字号:
Also refined by Zhahai Stewart to reduce the number of subtracts.
Modified by Raymond Brand to reduce the number of SLOP_BITS by 1.
It performs a multiply combined with a modulo operation, without
going into "double precision". It is faster than the Russian peasant
method, and still works well on machines that lack a fast hardware
multiply instruction.
*/
/* The following support functions, macros, and data structures
are used only by Merritt's modmult algorithm... */
/* Shift r1 1 whole unit to the left. Used only by modmult function. */
static void mp_lshift_unit(register unitptr r1)
{
register short precision;
register unitptr r2;
precision = global_precision;
make_msbptr(r1, precision);
r2 = r1;
while (--precision)
*post_lowerunit(r1) = *pre_lowerunit(r2);
*r1 = 0;
} /* mp_lshift_unit */
/* moduli_buf contains shifted images of the modulus, set by stage_modulus */
static unit moduli_buf[UNITSIZE - 1][MAX_UNIT_PRECISION] =
{0};
static unitptr moduli[UNITSIZE] = /* contains pointers into moduli_buf */
{0, moduli_buf[0], moduli_buf[1], moduli_buf[2],
moduli_buf[3], moduli_buf[4], moduli_buf[5], moduli_buf[6]
#ifndef UNIT8
,moduli_buf[7], moduli_buf[8], moduli_buf[9], moduli_buf[10],
moduli_buf[11], moduli_buf[12], moduli_buf[13], moduli_buf[14]
#ifndef UNIT16 /* and not UNIT8 */
,moduli_buf[15], moduli_buf[16], moduli_buf[17], moduli_buf[18],
moduli_buf[19], moduli_buf[20], moduli_buf[21], moduli_buf[22],
moduli_buf[23], moduli_buf[24], moduli_buf[25], moduli_buf[26],
moduli_buf[27], moduli_buf[28], moduli_buf[29], moduli_buf[30]
#endif /* UNIT16 and UNIT8 not defined */
#endif /* UNIT8 not defined */
};
/*
* To optimize msubs, need following 2 unit arrays, each filled
* with the most significant unit and next-to-most significant unit
* of the preshifted images of the modulus.
*/
static unit msu_moduli[UNITSIZE] =
{0}; /* most signif. unit */
static unit nmsu_moduli[UNITSIZE] =
{0}; /* next-most signif. unit */
/*
* mpdbuf contains preshifted images of the multiplicand, mod n.
* It is used only by mp_modmult. It could be staticly declared
* inside of mp_modmult, but we put it outside mp_modmult so that
* it can be wiped clean by modmult_burn(), which is called at the
* end of mp_modexp. This is so that no sensitive data is left in
* memory after the program exits.
*/
static unit mpdbuf[UNITSIZE - 1][MAX_UNIT_PRECISION] =
{0};
/*
* Computes UNITSIZE images of r, each shifted left 1 more bit.
* Used only by modmult function.
*/
static void stage_mp_images(unitptr images[UNITSIZE], unitptr r)
{
short int i;
images[0] = r; /* no need to move the first image, just copy ptr */
for (i = 1; i < UNITSIZE; i++) {
mp_move(images[i], images[i - 1]);
mp_shift_left(images[i]);
msub(images[i], moduli[0]);
}
} /* stage_mp_images */
/*
* Computes UNITSIZE images of modulus, each shifted left 1 more bit.
* Before calling mp_modmult, you must first stage the moduli images by
* calling stage_modulus. n is the pointer to the modulus.
* Assumes that global_precision has already been adjusted to the
* size of the modulus, plus SLOP_BITS.
*/
int stage_merritt_modulus(const unit * n)
{
short int i;
unitptr msu; /* ptr to most significant unit, for faster msubs */
moduli[0] = n; /* no need to move the first image, just copy ptr */
/* used by optimized msubs macro... */
msu = msbptr(n, global_precision); /* needed by msubs */
msu_moduli[0] = *post_lowerunit(msu);
nmsu_moduli[0] = *msu;
for (i = 1; i < UNITSIZE; i++) {
mp_move(moduli[i], moduli[i - 1]);
mp_shift_left(moduli[i]);
/* used by optimized msubs macro... */
msu = msbptr(moduli[i], global_precision); /* needed by msubs */
msu_moduli[i] = *post_lowerunit(msu); /* for faster msubs */
nmsu_moduli[i] = *msu;
}
return 0; /* normal return */
} /* stage_merritt_modulus */
/* The following macros, sniffadd and msubs, are used by modmult... */
#define sniffadd(i) if (*multiplier & power_of_2(i)) mp_add(prod,mpd[i])
/* Unoptimized msubs macro (msubs0) follows... */
/* #define msubs0(i) msub(prod,moduli[i])
*/
/*
* To optimize msubs, compare the most significant units of the
* product and the shifted modulus before deciding to call the full
* mp_compare routine. Better still, compare the upper two units
* before deciding to call mp_compare.
* Optimization of msubs relies heavily on standard C left-to-right
* parsimonious evaluation of logical expressions.
*/
/* Partially-optimized msubs macro (msubs1) follows... */
/* #define msubs1(i) if ( \
((p_m = (*msu_prod-msu_moduli[i])) >= 0) && \
(p_m || (mp_compare(prod,moduli[i]) >= 0) ) \
) mp_sub(prod,moduli[i])
*/
/* Fully-optimized msubs macro (msubs2) follows... */
#define msubs(i) if (((p_m = *msu_prod-msu_moduli[i])>0) || ( \
(p_m==0) && ( (*nmsu_prod>nmsu_moduli[i]) || ( \
(*nmsu_prod==nmsu_moduli[i]) && ((mp_compare(prod,moduli[i]) >= 0)) ))) ) \
mp_sub(prod,moduli[i])
/*
* Performs combined multiply/modulo operation.
* Computes: prod = (multiplicand*multiplier) mod modulus
* WARNING: All the arguments must be less than the modulus!
* Assumes the modulus has been predefined by first calling
* stage_modulus.
*/
int merritt_modmult(register unitptr prod,
unitptr multiplicand, register unitptr multiplier)
{
/* p_m, msu_prod, and nmsu_prod are used by the optimized msubs macro... */
register signedunit p_m;
register unitptr msu_prod; /* ptr to most significant unit of product */
register unitptr nmsu_prod; /* next-most signif. unit of product */
short mprec; /* precision of multiplier, in units */
/* Array mpd contains a list of pointers to preshifted images of
the multiplicand: */
static unitptr mpd[UNITSIZE] =
{
0, mpdbuf[0], mpdbuf[1], mpdbuf[2],
mpdbuf[3], mpdbuf[4], mpdbuf[5], mpdbuf[6]
#ifndef UNIT8
,mpdbuf[7], mpdbuf[8], mpdbuf[9], mpdbuf[10],
mpdbuf[11], mpdbuf[12], mpdbuf[13], mpdbuf[14]
#ifndef UNIT16 /* and not UNIT8 */
,mpdbuf[15], mpdbuf[16], mpdbuf[17], mpdbuf[18],
mpdbuf[19], mpdbuf[20], mpdbuf[21], mpdbuf[22],
mpdbuf[23], mpdbuf[24], mpdbuf[25], mpdbuf[26],
mpdbuf[27], mpdbuf[28], mpdbuf[29], mpdbuf[30]
#endif /* UNIT16 and UNIT8 not defined */
#endif /* UNIT8 not defined */
};
/* Compute preshifted images of multiplicand, mod n: */
stage_mp_images(mpd, multiplicand);
/* To optimize msubs, set up msu_prod and nmsu_prod: */
msu_prod = msbptr(prod, global_precision); /* Get ptr to MSU of prod */
nmsu_prod = msu_prod;
post_lowerunit(nmsu_prod); /* Get next-MSU of prod */
/*
* To understand this algorithm, it would be helpful to first
* study the conventional Russian peasant modmult algorithm.
* This one does about the same thing as Russian peasant, but
* is organized differently to save some steps. It loops
* through the multiplier a word (unit) at a time, instead of
* a bit at a time. It word-shifts the product instead of
* bit-shifting it, so it should be faster. It also does about
* half as many subtracts as Russian peasant.
*/
mp_init(prod, 0); /* Initialize product to 0. */
/*
* The way mp_modmult is actually used in cryptographic
* applications, there will NEVER be a zero multiplier or
* multiplicand. So there is no need to optimize for that
* condition.
*/
/* if (testeq(multiplicand,0))
return 0; *//* zero multiplicand means zero product */
/* Normalize and compute number of units in multiplier first: */
normalize(multiplier, mprec);
if (mprec == 0) /* if precision of multiplier is 0 */
return 0; /* zero multiplier means zero product */
make_msbptr(multiplier, mprec); /* start at MSU of multiplier */
while (mprec--) { /* Loop for the number of units in the multiplier */
/*
* Shift the product one whole unit to the left.
* This is part of the multiply phase of modmult.
*/
mp_lshift_unit(prod);
/*
* The product may have grown by as many as UNITSIZE
* bits. That's why we have global_precision set to the
* size of the modulus plus UNITSIZE slop bits.
* Now reduce the product back down by conditionally
* subtracting the preshifted images of the modulus.
* This is part of the modulo reduction phase of modmult.
* The following loop is unrolled for speed, using macros...
for (i=UNITSIZE-1; i>=LOG_UNITSIZE; i--)
if (mp_compare(prod,moduli[i]) >= 0)
mp_sub(prod,moduli[i]);
*/
#ifndef UNIT8
#ifndef UNIT16 /* and not UNIT8 */
msubs(31);
msubs(30);
msubs(29);
msubs(28);
msubs(27);
msubs(26);
msubs(25);
msubs(24);
msubs(23);
msubs(22);
msubs(21);
msubs(20);
msubs(19);
msubs(18);
msubs(17);
msubs(16);
#endif /* not UNIT16 and not UNIT8 */
msubs(15);
msubs(14);
msubs(13);
msubs(12);
msubs(11);
msubs(10);
msubs(9);
msubs(8);
#endif /* not UNIT8 */
msubs(7);
msubs(6);
msubs(5);
#ifndef UNIT32
msubs(4);
#ifndef UNIT16
msubs(3);
#endif
#endif
/*
* Sniff each bit in the current unit of the multiplier,
* and conditionally add the corresponding preshifted
* image of the multiplicand to the product.
* This is also part of the multiply phase of modmult.
*
* The following loop is unrolled for speed, using macros...
for (i=UNITSIZE-1; i>=0; i--)
if (*multiplier & power_of_2(i))
mp_add(prod,mpd[i]);
*/
#ifndef UNIT8
#ifndef UNIT16 /* and not UNIT8 */
sniffadd(31);
sniffadd(30);
sniffadd(29);
sniffadd(28);
sniffadd(27);
sniffadd(26);
sniffadd(25);
sniffadd(24);
sniffadd(23);
sniffadd(22);
sniffadd(21);
sniffadd(20);
sniffadd(19);
sniffadd(18);
sniffadd(17);
sniffadd(16);
#endif /* not UNIT16 and not UNIT8 */
sniffadd(15);
sniffadd(14);
sniffadd(13);
sniffadd(12);
sniffadd(11);
sniffadd(10);
sniffadd(9);
sniffadd(8);
#endif /* not UNIT8 */
sniffadd(7);
sniffadd(6);
sniffadd(5);
sniffadd(4);
sniffadd(3);
sniffadd(2);
sniffadd(1);
sniffadd(0);
/*
* The product may have grown by as many as LOG_UNITSIZE+1
* bits.
* Now reduce the product back down by conditionally
* subtracting LOG_UNITSIZE+1 preshifted images of the
* modulus. This is the modulo reduction phase of modmult.
*
* The following loop is unrolled for speed, using macros...
for (i=LOG_UNITSIZE; i>=0; i--)
if (mp_compare(prod,moduli[i]) >= 0)
mp_sub(prod,moduli[i]);
*/
#ifndef UNIT8
#ifndef UNIT16
msubs(5);
#endif
msubs(4);
#endif
msubs(3);
msubs(2);
msubs(1);
msubs(0);
/* Bump pointer to next lower unit of multiplier: */
post_lowerunit(multiplier);
} /* Loop for the number of units in the
multiplier */
return 0; /* normal return */
} /* merritt_modmult */
#undef msubs
#undef sniffadd
/*
* Merritt's mp_modmult function leaves some internal tables in memory,
* so we have to call modmult_burn() at the end of mp_modexp.
* This is so that no cryptographically sensitive data is left in memory
* after the program exits.
*/
/* Alias for modmult_burn, merritt_burn() is called only by mp_modexp. */
void merritt_burn(void)
{
unitfill0(&(mpdbuf[0][0]), (UNITSIZE - 1) * MAX_UNIT_PRECISION);
unitfill0(&(moduli_buf[0][0]), (UNITSIZE - 1) * MAX_UNIT_PRECISION);
unitfill0(msu_moduli, UNITSIZE);
unitfill0(nmsu_moduli, UNITSIZE);
} /* merritt_burn() */
/******* end of Merritt's MODMULT stuff. *******/
/*=========================================================================*/
#endif /* MERRITT */
#ifdef UPTON_OR_SMITH /* Used by Upton's and Smith's
modmult algorithms */
/*
* Jimmy Upton's multiprecision modmult algorithm in C.
* Performs a multiply combined with a modulo operation.
*
* The following support functions and data structures
* are used only by Upton's modmult algorithm...
*/
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -