📄 mpilib.cpp

📁 通訊保密編碼library project code.完整library project code!
💻 CPP
📖 第 1 页 / 共 5 页
字号:
short munit_prec;		/* global_precision expressed in MULTUNITs */

/*
 * Note that since the SPARC CPU has no hardware integer multiply
 * instruction, there is not that much advantage in having an
 * assembly version of mp_smul on that machine.  It might be faster
 * to use Merritt's modmult instead of Upton's modmult on the SPARC.
 */

/*
 * Multiply the single-word multiplier times the multiprecision integer
 * in multiplicand, accumulating result in prod.  The resulting
 * multiprecision prod will be 1 word longer than the multiplicand.
 * multiplicand is munit_prec words long.  We add into prod, so caller
 * should zero it out first.  For best results, this time-critical
 * function should be implemented in assembly.
 * NOTE:  Unlike other functions in the multiprecision arithmetic
 * library, both multiplicand and prod are pointing at the LSB,
 * regardless of byte order of the machine.  On an 80x86, this makes
 * no difference.  But if this assembly function is implemented
 * on a 680x0, it becomes important.
 */

/*
 * Note that this has been modified from the previous version to allow
 * better support for Smith's modmult:
 *      The final carry bit is added to the existing product
 *      array, rather than simply stored.
 */

#ifndef mp_smul
void mp_smul(MULTUNIT * prod, MULTUNIT * multiplicand, MULTUNIT multiplier)
{
    short i;
    unsigned long p, carry;

    carry = 0;
    for (i = 0; i < munit_prec; ++i) {
	p = (unsigned long) multiplier **post_higherunit(multiplicand);
	p += *prod + carry;
	*post_higherunit(prod) = (MULTUNIT) p;
	carry = p >> MULTUNITSIZE;
    }
    /* Add carry to the next higher word of product / dividend */
    *prod += (MULTUNIT) carry;
}				/* mp_smul */

#endif

/*
 * mp_dmul is a double-precision multiply multiplicand times multiplier,
 * result into prod.  prod must be pointing at a "double precision"
 * buffer.  E.g. If global_precision is 10 words, prod must be
 * pointing at a 20-word buffer.
 */
#ifndef mp_dmul
void mp_dmul(unitptr prod, const unit * multiplicand, const unit * multiplier)
{
    register int i;
    register MULTUNIT *p_multiplicand, *p_multiplier;
    register MULTUNIT *prodp;


    unitfill0(prod, global_precision * 2);	/* Pre-zero prod */
    /* Calculate precision in units of MULTUNIT */
    munit_prec = global_precision * UNITSIZE / MULTUNITSIZE;
    p_multiplicand = (MULTUNIT *) multiplicand;
    p_multiplier = (MULTUNIT *) multiplier;
    prodp = (MULTUNIT *) prod;
    make_lsbptr(p_multiplicand, munit_prec);
    make_lsbptr(p_multiplier, munit_prec);
    make_lsbptr(prodp, munit_prec * 2);
    /* Multiply multiplicand by each word in multiplier, accumulating prod: */
    for (i = 0; i < munit_prec; ++i)
	mp_smul(post_higherunit(prodp),
		p_multiplicand, *post_higherunit(p_multiplier));
}				/* mp_dmul */
#endif				/* mp_dmul */

static unit ALIGN modulus[MAX_UNIT_PRECISION];
static short nbits;		/* number of modulus significant bits */
#endif				/* UPTON_OR_SMITH */

#ifdef UPTON

/*
 * These scratchpad arrays are used only by upton_modmult (mp_modmult).
 * Some of them could be staticly declared inside of mp_modmult, but we
 * put them outside mp_modmult so that they can be wiped clean by
 * modmult_burn(), which is called at the end of mp_modexp.  This is
 * so that no sensitive data is left in memory after the program exits.
 */

static unit ALIGN reciprocal[MAX_UNIT_PRECISION];
static unit ALIGN dhi[MAX_UNIT_PRECISION];
static unit ALIGN d_data[MAX_UNIT_PRECISION * 2];	/* double-wide
							   register */
static unit ALIGN e_data[MAX_UNIT_PRECISION * 2];	/* double-wide
							   register */
static unit ALIGN f_data[MAX_UNIT_PRECISION * 2];	/* double-wide
							   register */

static short nbitsDivUNITSIZE;
static short nbitsModUNITSIZE;

/*
 * stage_upton_modulus() is aliased to stage_modulus().
 * Prepare for an Upton modmult.  Calculate the reciprocal of modulus,
 * and save both.  Note that reciprocal will have one more bit than
 * modulus.
 * Assumes that global_precision has already been adjusted to the
 * size of the modulus, plus SLOP_BITS.
 */
int stage_upton_modulus(const unit * n)
{
    mp_move(modulus, n);
    mp_recip(reciprocal, modulus);
    nbits = countbits(modulus);
    nbitsDivUNITSIZE = nbits / UNITSIZE;
    nbitsModUNITSIZE = nbits % UNITSIZE;
    return 0;			/* normal return */
}				/* stage_upton_modulus */


/*
 * Upton's algorithm performs a multiply combined with a modulo operation.
 * Computes:  prod = (multiplicand*multiplier) mod modulus
 * WARNING: All the arguments must be less than the modulus!
 * References global unitptr modulus and reciprocal.
 * The reciprocal of modulus is 1 bit longer than the modulus.
 * upton_modmult() is aliased to mp_modmult().
 */
int upton_modmult(unitptr prod, const unit * multiplicand, const unit * multiplier)
{
    unitptr d = d_data;
    unitptr d1 = d_data;
    unitptr e = e_data;
    unitptr f = f_data;
    short orig_precision;

    orig_precision = global_precision;
    mp_dmul(d, multiplicand, multiplier);

    /* Throw off low nbits of d */
#ifndef HIGHFIRST
    d1 = d + nbitsDivUNITSIZE;
#else
    d1 = d + orig_precision - nbitsDivUNITSIZE;
#endif
    mp_move(dhi, d1);		/* Don't screw up d, we need it later */
    mp_shift_right_bits(dhi, nbitsModUNITSIZE);

    mp_dmul(e, dhi, reciprocal); /* Note - reciprocal has nbits+1 bits */

#ifndef HIGHFIRST
    e += nbitsDivUNITSIZE;
#else
    e += orig_precision - nbitsDivUNITSIZE;
#endif
    mp_shift_right_bits(e, nbitsModUNITSIZE);

    mp_dmul(f, e, modulus);

    /* Now for the only double-precision call to mpilib: */
    set_precision(orig_precision * 2);
    mp_sub(d, f);

    /* d's precision should be <= orig_precision */
    rescale(d, orig_precision * 2, orig_precision);
    set_precision(orig_precision);

    /* Should never have to do this final subtract more than twice: */
    while (mp_compare(d, modulus) > 0)
	mp_sub(d, modulus);

    mp_move(prod, d);
    return 0;			/* normal return */
}				/* upton_modmult */

/* Upton's mp_modmult function leaves some internal arrays in memory,
   so we have to call modmult_burn() at the end of mp_modexp.
   This is so that no cryptographically sensitive data is left in memory
   after the program exits.
   upton_burn() is aliased to modmult_burn().
 */
void upton_burn(void)
{
    unitfill0(modulus, MAX_UNIT_PRECISION);
    unitfill0(reciprocal, MAX_UNIT_PRECISION);
    unitfill0(dhi, MAX_UNIT_PRECISION);
    unitfill0(d_data, MAX_UNIT_PRECISION * 2);
    unitfill0(e_data, MAX_UNIT_PRECISION * 2);
    unitfill0(f_data, MAX_UNIT_PRECISION * 2);
    nbits = nbitsDivUNITSIZE = nbitsModUNITSIZE = 0;
}				/* upton_burn */

/******* end of Upton's MODMULT stuff. *******/
/*=========================================================================*/
#endif				/* UPTON */

#ifdef SMITH			/* using Thad Smith's modmult algorithm */

/*
 * Thad Smith's implementation of multiprecision modmult algorithm in C.
 * Performs a multiply combined with a modulo operation.
 * The multiplication is done with mp_dmul, the same as for Upton's
 * modmult.  The modulus reduction is done by long division, in
 * which a trial quotient "digit" is determined, then the product of
 * that digit and the divisor are subtracted from the dividend.
 *
 * In this case, the digit is MULTUNIT in size and the subtraction
 * is done by adding the product to the one's complement of the
 * dividend, which allows use of the existing mp_smul routine.
 *
 * The following support functions and data structures
 * are used only by Smith's modmult algorithm...
 */

/*
 * These scratchpad arrays are used only by smith_modmult (mp_modmult).
 * Some of them could be statically declared inside of mp_modmult, but we
 * put them outside mp_modmult so that they can be wiped clean by
 * modmult_burn(), which is called at the end of mp_modexp.  This is
 * so that no sensitive data is left in memory after the program exits.
 */

static unit ALIGN ds_data[MAX_UNIT_PRECISION * 2 + 2];

static unit mod_quotient[4];
static unit mod_divisor[4];	/* 2 most signif. MULTUNITs of modulus */

static MULTUNIT *modmpl;	/* ptr to modulus least significant
				   ** MULTUNIT                      */
static int mshift;		/* number of bits for
				   ** recip scaling       */
static MULTUNIT reciph;		/* MSunit of scaled recip */
static MULTUNIT recipl;		/* LSunit of scaled recip */

static short modlenMULTUNITS;	/* length of modulus in MULTUNITs */
static MULTUNIT mutemp;		/* temporary */

/*
 * The routines mp_smul and mp_dmul are the same as for UPTON and
 * should be coded in assembly.  Note, however, that the previous
 * Upton's mp_smul version has been modified to compatible with
 * Smith's modmult.  The new version also still works for Upton's
 * modmult.
 */

#ifndef mp_set_recip
#define mp_set_recip(rh,rl,m)	/* null */
#else
/* setup routine for external mp_quo_digit */
void mp_set_recip(MULTUNIT rh, MULTUNIT rl, int m);
#endif
MULTUNIT mp_quo_digit(MULTUNIT * dividend);

#ifdef	MULTUNIT_SIZE_SAME
#define mp_musubb mp_subb	/* use existing routine */
#else				/* ! MULTUNIT_SIZE_SAME */

/*
 * This performs the same function as mp_subb, but with MULTUNITs.
 * Note: Processors without alignment requirements may be able to use
 * mp_subb, even though MULTUNITs are smaller than units.  In that case,
 * use mp_subb, since it would be faster if coded in assembly.  Note that
 * this implementation won't work for MULTUNITs which are long -- use
 * mp_subb in that case.
 */
#ifndef mp_musubb

/*
 * multiprecision subtract of MULTUNITs with borrow, r2 from r1, result in r1
 * borrow is incoming borrow flag-- value should be 0 or 1
 */
boolean mp_musubb
 (register MULTUNIT * r1, register MULTUNIT * r2, register boolean borrow)
{
    register ulint x;		/* won't work if sizeof(MULTUNIT)==
				   sizeof(long)     */
    short precision;		/* number of MULTUNITs to subtract */
    precision = global_precision * MULTUNITs_perunit;
    make_lsbptr(r1, precision);
    make_lsbptr(r2, precision);
    while (precision--) {
	x = (ulint) * r1 - (ulint) * post_higherunit(r2) - (ulint) borrow;
	*post_higherunit(r1) = x;
	borrow = (((1L << MULTUNITSIZE) & x) != 0L);
    }
    return (borrow);
}				/* mp_musubb */
#endif				/* mp_musubb */
#endif				/* MULTUNIT_SIZE_SAME */

/*
 * The function mp_quo_digit is the heart of Smith's modulo reduction,
 * which uses a form of long division.  It computes a trial quotient
 * "digit" (MULTUNIT-sized digit) by multiplying the three most
 * significant MULTUNITs of the dividend by the two most significant
 * MULTUNITs of the reciprocal of the modulus.  Note that this function
 * requires that MULTUNITSIZE * 2 <= sizeof(unsigned long).
 *
 * An important part of this technique is that the quotient never be
 * too small, although it may occasionally be too large.  This was
 * done to eliminate the need to check and correct for a remainder
 * exceeding the divisor.       It is easier to check for a negative
 * remainder.  The following technique rarely needs correction for
 * MULTUNITs of at least 16 bits.
 *
 * The following routine has two implementations:
 *
 * ASM_PROTOTYPE defined: written to be an executable prototype for
 * an efficient assembly language implementation.  Note that several
 * of the following masks and shifts can be done by directly
 * manipulating single precision registers on some architectures.
 *
 * ASM_PROTOTYPE undefined: a slightly more efficient implementation
 * in C.  Although this version returns a result larger than the
 * optimum (which is corrected in smith_modmult) more often than the
 * prototype version, the faster execution in C more than makes up
 * for the difference.
 * 
 * Parameter: dividend - points to the most significant MULTUNIT
 *      of the dividend.  Note that dividend actually contains the 
 *      one's complement of the actual dividend value (see comments for 
 *      smith_modmult).
 * 
 *  Return: the trial quotient digit resulting from dividing the first
 *      three MULTUNITs at dividend by the upper two MULTUNITs of the 
 *      modulus.
 */

/* #define ASM_PROTOTYPE *//* undefined: use C-optimized version */

#ifndef mp_quo_digit
MULTUNIT mp_quo_digit(MULTUNIT * dividend)
{
    unsigned long q, q0, q1, q2;
    unsigned short lsb_factor;

/*
 * Compute the least significant product group.
 * The last terms of q1 and q2 perform upward rounding, which is
 * needed to guarantee that the result not be too small.
 */
    q1 = (dividend[tohigher(-2)] ^ MULTUNIT_mask) * (unsigned long) reciph
	+ reciph;
    q2 = (dividend[tohigher(-1)] ^ MULTUNIT_mask) * (unsigned long) recipl
	+ (1L << MULTUNITSIZE);
#ifdef ASM_PROTOTYPE
    lsb_factor = 1 & (q1 >> MULTUNITSIZE) & (q2 >> MULTUNITSIZE);
    q = q1 + q2;

    /* The following statement is equivalent to shifting the sum right
       one bit while shifting in the carry bit.
     */
    q0 = (q1 > ~q2 ? DMULTUNIT_msb : 0) | (q >> 1);
#else				/* optimized C version */
    q0 = (q1 >> 1) + (q2 >> 1) + 1;
#endif

/*      Compute the middle significant product group.   */

    q1 = (dividend[tohigher(-1)] ^ MULTUNIT_mask) * (unsigned long) reciph;
    q2 = (dividend[0] ^ MULTUNIT_mask) * (unsigned long) recipl;
#ifdef ASM_PROTOTYPE
    q = q1 + q2;
    q = (q1 > ~q2 ? DMULTUNIT_msb : 0) | (q >> 1);

/*      Add in the most significant word of the first group.
   The last term takes care of the carry from adding the lsb's
   that were shifted out prior to addition.
 */
    q = (q0 >> MULTUNITSIZE) + q + (lsb_factor & (q1 ^ q2));
#else				/* optimized C version */
    q = (q0 >> MULTUNITSIZE) + (q1 >> 1) + (q2 >> 1) + 1;
#endif

/*      Compute the most significant term and add in the others */

    q = (q >> (MULTUNITSIZE - 2)) +
	(((dividend[0] ^ MULTUNIT_mask) * (unsigned long) reciph) << 1);
    q >>= mshift;

/*      Prevent overflow and then wipe out the intermediate results. */
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -