📄 mpilib.cpp
字号:
short munit_prec; /* global_precision expressed in MULTUNITs */
/*
* Note that since the SPARC CPU has no hardware integer multiply
* instruction, there is not that much advantage in having an
* assembly version of mp_smul on that machine. It might be faster
* to use Merritt's modmult instead of Upton's modmult on the SPARC.
*/
/*
* Multiply the single-word multiplier times the multiprecision integer
* in multiplicand, accumulating result in prod. The resulting
* multiprecision prod will be 1 word longer than the multiplicand.
* multiplicand is munit_prec words long. We add into prod, so caller
* should zero it out first. For best results, this time-critical
* function should be implemented in assembly.
* NOTE: Unlike other functions in the multiprecision arithmetic
* library, both multiplicand and prod are pointing at the LSB,
* regardless of byte order of the machine. On an 80x86, this makes
* no difference. But if this assembly function is implemented
* on a 680x0, it becomes important.
*/
/*
* Note that this has been modified from the previous version to allow
* better support for Smith's modmult:
* The final carry bit is added to the existing product
* array, rather than simply stored.
*/
#ifndef mp_smul
void mp_smul(MULTUNIT * prod, MULTUNIT * multiplicand, MULTUNIT multiplier)
{
short i;
unsigned long p, carry;
carry = 0;
for (i = 0; i < munit_prec; ++i) {
p = (unsigned long) multiplier **post_higherunit(multiplicand);
p += *prod + carry;
*post_higherunit(prod) = (MULTUNIT) p;
carry = p >> MULTUNITSIZE;
}
/* Add carry to the next higher word of product / dividend */
*prod += (MULTUNIT) carry;
} /* mp_smul */
#endif
/*
* mp_dmul is a double-precision multiply multiplicand times multiplier,
* result into prod. prod must be pointing at a "double precision"
* buffer. E.g. If global_precision is 10 words, prod must be
* pointing at a 20-word buffer.
*/
#ifndef mp_dmul
void mp_dmul(unitptr prod, const unit * multiplicand, const unit * multiplier)
{
register int i;
register MULTUNIT *p_multiplicand, *p_multiplier;
register MULTUNIT *prodp;
unitfill0(prod, global_precision * 2); /* Pre-zero prod */
/* Calculate precision in units of MULTUNIT */
munit_prec = global_precision * UNITSIZE / MULTUNITSIZE;
p_multiplicand = (MULTUNIT *) multiplicand;
p_multiplier = (MULTUNIT *) multiplier;
prodp = (MULTUNIT *) prod;
make_lsbptr(p_multiplicand, munit_prec);
make_lsbptr(p_multiplier, munit_prec);
make_lsbptr(prodp, munit_prec * 2);
/* Multiply multiplicand by each word in multiplier, accumulating prod: */
for (i = 0; i < munit_prec; ++i)
mp_smul(post_higherunit(prodp),
p_multiplicand, *post_higherunit(p_multiplier));
} /* mp_dmul */
#endif /* mp_dmul */
static unit ALIGN modulus[MAX_UNIT_PRECISION];
static short nbits; /* number of modulus significant bits */
#endif /* UPTON_OR_SMITH */
#ifdef UPTON
/*
* These scratchpad arrays are used only by upton_modmult (mp_modmult).
* Some of them could be staticly declared inside of mp_modmult, but we
* put them outside mp_modmult so that they can be wiped clean by
* modmult_burn(), which is called at the end of mp_modexp. This is
* so that no sensitive data is left in memory after the program exits.
*/
static unit ALIGN reciprocal[MAX_UNIT_PRECISION];
static unit ALIGN dhi[MAX_UNIT_PRECISION];
static unit ALIGN d_data[MAX_UNIT_PRECISION * 2]; /* double-wide
register */
static unit ALIGN e_data[MAX_UNIT_PRECISION * 2]; /* double-wide
register */
static unit ALIGN f_data[MAX_UNIT_PRECISION * 2]; /* double-wide
register */
static short nbitsDivUNITSIZE;
static short nbitsModUNITSIZE;
/*
* stage_upton_modulus() is aliased to stage_modulus().
* Prepare for an Upton modmult. Calculate the reciprocal of modulus,
* and save both. Note that reciprocal will have one more bit than
* modulus.
* Assumes that global_precision has already been adjusted to the
* size of the modulus, plus SLOP_BITS.
*/
int stage_upton_modulus(const unit * n)
{
mp_move(modulus, n);
mp_recip(reciprocal, modulus);
nbits = countbits(modulus);
nbitsDivUNITSIZE = nbits / UNITSIZE;
nbitsModUNITSIZE = nbits % UNITSIZE;
return 0; /* normal return */
} /* stage_upton_modulus */
/*
* Upton's algorithm performs a multiply combined with a modulo operation.
* Computes: prod = (multiplicand*multiplier) mod modulus
* WARNING: All the arguments must be less than the modulus!
* References global unitptr modulus and reciprocal.
* The reciprocal of modulus is 1 bit longer than the modulus.
* upton_modmult() is aliased to mp_modmult().
*/
int upton_modmult(unitptr prod, const unit * multiplicand, const unit * multiplier)
{
unitptr d = d_data;
unitptr d1 = d_data;
unitptr e = e_data;
unitptr f = f_data;
short orig_precision;
orig_precision = global_precision;
mp_dmul(d, multiplicand, multiplier);
/* Throw off low nbits of d */
#ifndef HIGHFIRST
d1 = d + nbitsDivUNITSIZE;
#else
d1 = d + orig_precision - nbitsDivUNITSIZE;
#endif
mp_move(dhi, d1); /* Don't screw up d, we need it later */
mp_shift_right_bits(dhi, nbitsModUNITSIZE);
mp_dmul(e, dhi, reciprocal); /* Note - reciprocal has nbits+1 bits */
#ifndef HIGHFIRST
e += nbitsDivUNITSIZE;
#else
e += orig_precision - nbitsDivUNITSIZE;
#endif
mp_shift_right_bits(e, nbitsModUNITSIZE);
mp_dmul(f, e, modulus);
/* Now for the only double-precision call to mpilib: */
set_precision(orig_precision * 2);
mp_sub(d, f);
/* d's precision should be <= orig_precision */
rescale(d, orig_precision * 2, orig_precision);
set_precision(orig_precision);
/* Should never have to do this final subtract more than twice: */
while (mp_compare(d, modulus) > 0)
mp_sub(d, modulus);
mp_move(prod, d);
return 0; /* normal return */
} /* upton_modmult */
/* Upton's mp_modmult function leaves some internal arrays in memory,
so we have to call modmult_burn() at the end of mp_modexp.
This is so that no cryptographically sensitive data is left in memory
after the program exits.
upton_burn() is aliased to modmult_burn().
*/
void upton_burn(void)
{
unitfill0(modulus, MAX_UNIT_PRECISION);
unitfill0(reciprocal, MAX_UNIT_PRECISION);
unitfill0(dhi, MAX_UNIT_PRECISION);
unitfill0(d_data, MAX_UNIT_PRECISION * 2);
unitfill0(e_data, MAX_UNIT_PRECISION * 2);
unitfill0(f_data, MAX_UNIT_PRECISION * 2);
nbits = nbitsDivUNITSIZE = nbitsModUNITSIZE = 0;
} /* upton_burn */
/******* end of Upton's MODMULT stuff. *******/
/*=========================================================================*/
#endif /* UPTON */
#ifdef SMITH /* using Thad Smith's modmult algorithm */
/*
* Thad Smith's implementation of multiprecision modmult algorithm in C.
* Performs a multiply combined with a modulo operation.
* The multiplication is done with mp_dmul, the same as for Upton's
* modmult. The modulus reduction is done by long division, in
* which a trial quotient "digit" is determined, then the product of
* that digit and the divisor are subtracted from the dividend.
*
* In this case, the digit is MULTUNIT in size and the subtraction
* is done by adding the product to the one's complement of the
* dividend, which allows use of the existing mp_smul routine.
*
* The following support functions and data structures
* are used only by Smith's modmult algorithm...
*/
/*
* These scratchpad arrays are used only by smith_modmult (mp_modmult).
* Some of them could be statically declared inside of mp_modmult, but we
* put them outside mp_modmult so that they can be wiped clean by
* modmult_burn(), which is called at the end of mp_modexp. This is
* so that no sensitive data is left in memory after the program exits.
*/
static unit ALIGN ds_data[MAX_UNIT_PRECISION * 2 + 2];
static unit mod_quotient[4];
static unit mod_divisor[4]; /* 2 most signif. MULTUNITs of modulus */
static MULTUNIT *modmpl; /* ptr to modulus least significant
** MULTUNIT */
static int mshift; /* number of bits for
** recip scaling */
static MULTUNIT reciph; /* MSunit of scaled recip */
static MULTUNIT recipl; /* LSunit of scaled recip */
static short modlenMULTUNITS; /* length of modulus in MULTUNITs */
static MULTUNIT mutemp; /* temporary */
/*
* The routines mp_smul and mp_dmul are the same as for UPTON and
* should be coded in assembly. Note, however, that the previous
* Upton's mp_smul version has been modified to compatible with
* Smith's modmult. The new version also still works for Upton's
* modmult.
*/
#ifndef mp_set_recip
#define mp_set_recip(rh,rl,m) /* null */
#else
/* setup routine for external mp_quo_digit */
void mp_set_recip(MULTUNIT rh, MULTUNIT rl, int m);
#endif
MULTUNIT mp_quo_digit(MULTUNIT * dividend);
#ifdef MULTUNIT_SIZE_SAME
#define mp_musubb mp_subb /* use existing routine */
#else /* ! MULTUNIT_SIZE_SAME */
/*
* This performs the same function as mp_subb, but with MULTUNITs.
* Note: Processors without alignment requirements may be able to use
* mp_subb, even though MULTUNITs are smaller than units. In that case,
* use mp_subb, since it would be faster if coded in assembly. Note that
* this implementation won't work for MULTUNITs which are long -- use
* mp_subb in that case.
*/
#ifndef mp_musubb
/*
* multiprecision subtract of MULTUNITs with borrow, r2 from r1, result in r1
* borrow is incoming borrow flag-- value should be 0 or 1
*/
boolean mp_musubb
(register MULTUNIT * r1, register MULTUNIT * r2, register boolean borrow)
{
register ulint x; /* won't work if sizeof(MULTUNIT)==
sizeof(long) */
short precision; /* number of MULTUNITs to subtract */
precision = global_precision * MULTUNITs_perunit;
make_lsbptr(r1, precision);
make_lsbptr(r2, precision);
while (precision--) {
x = (ulint) * r1 - (ulint) * post_higherunit(r2) - (ulint) borrow;
*post_higherunit(r1) = x;
borrow = (((1L << MULTUNITSIZE) & x) != 0L);
}
return (borrow);
} /* mp_musubb */
#endif /* mp_musubb */
#endif /* MULTUNIT_SIZE_SAME */
/*
* The function mp_quo_digit is the heart of Smith's modulo reduction,
* which uses a form of long division. It computes a trial quotient
* "digit" (MULTUNIT-sized digit) by multiplying the three most
* significant MULTUNITs of the dividend by the two most significant
* MULTUNITs of the reciprocal of the modulus. Note that this function
* requires that MULTUNITSIZE * 2 <= sizeof(unsigned long).
*
* An important part of this technique is that the quotient never be
* too small, although it may occasionally be too large. This was
* done to eliminate the need to check and correct for a remainder
* exceeding the divisor. It is easier to check for a negative
* remainder. The following technique rarely needs correction for
* MULTUNITs of at least 16 bits.
*
* The following routine has two implementations:
*
* ASM_PROTOTYPE defined: written to be an executable prototype for
* an efficient assembly language implementation. Note that several
* of the following masks and shifts can be done by directly
* manipulating single precision registers on some architectures.
*
* ASM_PROTOTYPE undefined: a slightly more efficient implementation
* in C. Although this version returns a result larger than the
* optimum (which is corrected in smith_modmult) more often than the
* prototype version, the faster execution in C more than makes up
* for the difference.
*
* Parameter: dividend - points to the most significant MULTUNIT
* of the dividend. Note that dividend actually contains the
* one's complement of the actual dividend value (see comments for
* smith_modmult).
*
* Return: the trial quotient digit resulting from dividing the first
* three MULTUNITs at dividend by the upper two MULTUNITs of the
* modulus.
*/
/* #define ASM_PROTOTYPE *//* undefined: use C-optimized version */
#ifndef mp_quo_digit
MULTUNIT mp_quo_digit(MULTUNIT * dividend)
{
unsigned long q, q0, q1, q2;
unsigned short lsb_factor;
/*
* Compute the least significant product group.
* The last terms of q1 and q2 perform upward rounding, which is
* needed to guarantee that the result not be too small.
*/
q1 = (dividend[tohigher(-2)] ^ MULTUNIT_mask) * (unsigned long) reciph
+ reciph;
q2 = (dividend[tohigher(-1)] ^ MULTUNIT_mask) * (unsigned long) recipl
+ (1L << MULTUNITSIZE);
#ifdef ASM_PROTOTYPE
lsb_factor = 1 & (q1 >> MULTUNITSIZE) & (q2 >> MULTUNITSIZE);
q = q1 + q2;
/* The following statement is equivalent to shifting the sum right
one bit while shifting in the carry bit.
*/
q0 = (q1 > ~q2 ? DMULTUNIT_msb : 0) | (q >> 1);
#else /* optimized C version */
q0 = (q1 >> 1) + (q2 >> 1) + 1;
#endif
/* Compute the middle significant product group. */
q1 = (dividend[tohigher(-1)] ^ MULTUNIT_mask) * (unsigned long) reciph;
q2 = (dividend[0] ^ MULTUNIT_mask) * (unsigned long) recipl;
#ifdef ASM_PROTOTYPE
q = q1 + q2;
q = (q1 > ~q2 ? DMULTUNIT_msb : 0) | (q >> 1);
/* Add in the most significant word of the first group.
The last term takes care of the carry from adding the lsb's
that were shifted out prior to addition.
*/
q = (q0 >> MULTUNITSIZE) + q + (lsb_factor & (q1 ^ q2));
#else /* optimized C version */
q = (q0 >> MULTUNITSIZE) + (q1 >> 1) + (q2 >> 1) + 1;
#endif
/* Compute the most significant term and add in the others */
q = (q >> (MULTUNITSIZE - 2)) +
(((dividend[0] ^ MULTUNIT_mask) * (unsigned long) reciph) << 1);
q >>= mshift;
/* Prevent overflow and then wipe out the intermediate results. */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -