📄 mpilib.cpp
字号:
stuff_bit(quotient, bitmask);
}
bump_bitsniffer(quotient, bitmask);
}
mp_init0(remainder); /* burn sensitive data left on stack */
return 0;
} /* mp_recip */
#endif /* UPTON_OR_SMITH */
/* Signed divide, either or both operands may be negative. */
int mp_div(register unitptr remainder, register unitptr quotient,
register unitptr dividend, register unitptr divisor)
{
boolean dvdsign, dsign;
int status;
dvdsign = (boolean) (mp_tstminus(dividend) != 0);
dsign = (boolean) (mp_tstminus(divisor) != 0);
if (dvdsign)
mp_neg(dividend);
if (dsign)
mp_neg(divisor);
status = mp_udiv(remainder, quotient, dividend, divisor);
if (dvdsign)
mp_neg(dividend); /* repair caller's dividend */
if (dsign)
mp_neg(divisor); /* repair caller's divisor */
if (status < 0)
return status; /* divide error? */
if (dvdsign)
mp_neg(remainder);
if (dvdsign ^ dsign)
mp_neg(quotient);
return status;
} /* mp_div */
/*
* This function does a fast divide and mod on a multiprecision dividend
* using a short integer divisor returning a short integer remainder.
* This is an unsigned divide. It treats both operands as positive.
* It is used mainly for faster printing of large numbers in base 10.
*/
word16 mp_shortdiv(register unitptr quotient,
register unitptr dividend, register word16 divisor)
{
int bits;
short dprec;
register unit bitmask;
register word16 remainder;
if (!divisor) /* if divisor == 0 */
return -1; /* zero divisor means divide error */
remainder = 0;
mp_init0(quotient);
/* normalize and compute number of bits in dividend first */
init_bitsniffer(dividend, bitmask, dprec, bits);
/* rescale quotient to same precision (dprec) as dividend */
rescale(quotient, global_precision, dprec);
make_msbptr(quotient, dprec);
while (bits--) {
remainder <<= 1;
if (sniff_bit(dividend, bitmask))
remainder++;
if (remainder >= divisor) {
remainder -= divisor;
stuff_bit(quotient, bitmask);
}
bump_2bitsniffers(dividend, quotient, bitmask);
}
return remainder;
} /* mp_shortdiv */
/* Unsigned divide, treats both operands as positive. */
int mp_mod(register unitptr remainder,
register unitptr dividend, register unitptr divisor)
{
int bits;
short dprec;
register unit bitmask;
if (testeq(divisor, 0))
return -1; /* zero divisor means divide error */
mp_init0(remainder);
/* normalize and compute number of bits in dividend first */
init_bitsniffer(dividend, bitmask, dprec, bits);
while (bits--) {
mp_rotate_left(remainder,
(boolean) (sniff_bit(dividend, bitmask) != 0));
msub(remainder, divisor);
bump_bitsniffer(dividend, bitmask);
}
return 0;
} /* mp_mod */
/*
* This function does a fast mod operation on a multiprecision dividend
* using a short integer modulus returning a short integer remainder.
* This is an unsigned divide. It treats both operands as positive.
* It is used mainly for fast sieve searches for large primes.
*/
word16 mp_shortmod(register unitptr dividend, register word16 divisor)
{
int bits;
short dprec;
register unit bitmask;
register word16 remainder;
if (!divisor) /* if divisor == 0 */
return -1; /* zero divisor means divide error */
remainder = 0;
/* normalize and compute number of bits in dividend first */
init_bitsniffer(dividend, bitmask, dprec, bits);
while (bits--) {
remainder <<= 1;
if (sniff_bit(dividend, bitmask))
remainder++;
if (remainder >= divisor)
remainder -= divisor;
bump_bitsniffer(dividend, bitmask);
}
return remainder;
} /* mp_shortmod */
#ifdef COMB_MULT /* use faster "comb" multiply algorithm */
/* We are skipping this code because it has a bug... */
/*
* Computes multiprecision prod = multiplicand * multiplier
*
* Uses interleaved comb multiply algorithm.
* This improved multiply more than twice as fast as a Russian
* peasant multiply, because it does a lot fewer shifts.
* Must have global_precision set to the size of the multiplicand
* plus UNITSIZE-1 SLOP_BITS. Produces a product that is the sum
* of the lengths of the multiplier and multiplicand.
*
* BUG ALERT: Unfortunately, this code has a bug. It fails for
* some numbers. One such example:
* x= 59DE 60CE 2345 8091 A02B 2A1C DBC3 8BE5
* x*x= 59DE 60CE 2345 26B3 993B 67A5 2499 0B7D
* 52C8 CDC7 AFB3 61C8 243C 741B
* --which is obviously wrong. The answer should be:
* x*x= 1F8C 607B 5EA6 C061 2714 04A9 A0C6 A17A
* C9AB 6095 C62F 3756 3843 E4D0 3950 7AD9
* We'll have to fix this some day. In the meantime, we'll
* just have the compiler skip over this code.
*
* BUG NOTE: Possibly fixed. Needs testing.
*/
int mp_mult(register unitptr prod,
register unitptr multiplicand, register unitptr multiplier)
{
int bits;
register unit bitmask;
unitptr product, mplier, temp;
short mprec, mprec2;
unit mplicand[MAX_UNIT_PRECISION];
/* better clear full width--double precision */
mp_init(prod + tohigher(global_precision), 0);
if (testeq(multiplicand, 0))
return 0; /* zero multiplicand means zero product */
mp_move(mplicand, multiplicand); /* save it from damage */
normalize(multiplier, mprec);
if (!mprec)
return 0;
make_lsbptr(multiplier, mprec);
bitmask = 1; /* start scan at LSB of multiplier */
do { /* UNITSIZE times */
/* do for bits 0-15 */
product = prod;
mplier = multiplier;
mprec2 = mprec;
while (mprec2--) { /* do for each word in multiplier */
if (sniff_bit(mplier, bitmask)) {
if (mp_addc(product, multiplicand, 0)) { /* ripple carry */
/* After 1st time thru, this is rarely encountered. */
temp = msbptr(product, global_precision);
pre_higherunit(temp);
/* temp now points to LSU of carry region. */
unmake_lsbptr(temp, global_precision);
mp_inc(temp);
} /* ripple carry */
}
pre_higherunit(mplier);
pre_higherunit(product);
}
if (!(bitmask <<= 1))
break;
mp_shift_left(multiplicand);
} while (TRUE);
mp_move(multiplicand, mplicand); /* recover */
return 0; /* normal return */
} /* mp_mult */
#endif /* COMB_MULT */
/* Because the "comb" multiply has a bug, we will use the slower
Russian peasant multiply instead. Fortunately, the mp_mult
function is not called from any time-critical code.
*/
/*
* Computes multiprecision prod = multiplicand * multiplier
* Uses "Russian peasant" multiply algorithm.
*/
int mp_mult(register unitptr prod,
register unitptr multiplicand, register unitptr multiplier)
{
int bits;
register unit bitmask;
short mprec;
mp_init(prod, 0); //prod=0;
if (testeq(multiplicand, 0)) //multiplicand==0 ?
return 0; /* zero multiplicand means zero product */
/* normalize and compute number of bits in multiplier first */
init_bitsniffer(multiplier, bitmask, mprec, bits);
while (bits--) {
mp_shift_left(prod);
if (sniff_bit(multiplier, bitmask))
mp_add(prod, multiplicand);
bump_bitsniffer(multiplier, bitmask);
}
return 0;
} /* mp_mult */
/*
* mp_modmult computes a multiprecision multiply combined with a
* modulo operation. This is the most time-critical function in
* this multiprecision arithmetic library for performing modulo
* exponentiation. We experimented with different versions of modmult,
* depending on the machine architecture and performance requirements.
* We will either use a Russian Peasant modmult (peasant_modmult),
* Charlie Merritt's modmult (merritt_modmult), Jimmy Upton's
* modmult (upton_modmult), or Thad Smith's modmult (smith_modmult).
* On machines with a hardware atomic multiply instruction,
* Smith's modmult is fastest. It can utilize assembly subroutines to
* speed up the hardware multiply logic and trial quotient calculation.
* If the machine lacks a fast hardware multiply, Merritt's modmult
* is preferred, which doesn't call any assembly multiply routine.
* We use the alias names mp_modmult, stage_modulus, and modmult_burn
* for the corresponding true names, which depend on what flavor of
* modmult we are using.
*
* Before making the first call to mp_modmult, you must set up the
* modulus-dependant precomputated tables by calling stage_modulus.
* After making all the calls to mp_modmult, you call modmult_burn to
* erase the tables created by stage_modulus that were left in memory.
*/
#ifdef COUNTMULTS
/* "number of modmults" counters, used for performance studies. */
static unsigned int tally_modmults = 0;
static unsigned int tally_modsquares = 0;
#endif /* COUNTMULTS */
#ifdef PEASANT
/* Conventional Russian peasant multiply with modulo algorithm. */
static unitptr pmodulus = 0; /* used only by mp_modmult */
/*
* Must pass modulus to stage_modulus before calling modmult.
* Assumes that global_precision has already been adjusted to the
* size of the modulus, plus SLOP_BITS.
*/
int stage_peasant_modulus(unitptr n)
{ /* For this simple version of modmult, just copy unit pointer. */
pmodulus = n;
return 0; /* normal return */
} /* stage_peasant_modulus */
/* "Russian peasant" multiply algorithm, combined with a modulo
* operation. This is a simple naive replacement for Merritt's
* faster modmult algorithm. References global unitptr "modulus".
* Computes: prod = (multiplicand*multiplier) mod modulus
* WARNING: All the arguments must be less than the modulus!
*/
int peasant_modmult(register unitptr prod,
unitptr multiplicand, register unitptr multiplier)
{
int bits;
register unit bitmask;
short mprec;
mp_init(prod, 0);
/* if (testeq(multiplicand,0))
return 0; *//* zero multiplicand means zero product */
/* normalize and compute number of bits in multiplier first */
init_bitsniffer(multiplier, bitmask, mprec, bits);
while (bits--) {
mp_shift_left(prod);
msub(prod, pmodulus); /* turns mult into modmult */
if (sniff_bit(multiplier, bitmask)) {
mp_add(prod, multiplicand);
msub(prod, pmodulus); /* turns mult into modmult */
}
bump_bitsniffer(multiplier, bitmask);
}
return 0;
} /* peasant_modmult */
/*
* If we are using a version of mp_modmult that uses internal tables
* in memory, we have to call modmult_burn() at the end of mp_modexp.
* This is so that no sensitive data is left in memory after the program
* exits. The Russian peasant method doesn't use any such tables.
*/
/*
* Alias for modmult_burn, called only from mp_modexp(). Destroys
* internal modmult tables. This version does nothing because no
* tables are used by the Russian peasant modmult.
*/
void peasant_burn(void)
{
} /* peasant_burn */
#endif /* PEASANT */
#ifdef MERRITT
/*=========================================================================*/
/*
This is Charlie Merritt's MODMULT algorithm, implemented in C by PRZ.
Also refined by Zhahai Stewart to reduce the number of subtracts.
Modified by Raymond Brand to reduce the number of SLOP_BITS by 1.
It performs a multiply combined with a modulo operation, without
going into "double precision". It is faster than the Russian peasant
method, and still works well on machines that lack a fast hardware
multiply instruction.
*/
/* The following support functions, macros, and data structures
are used only by Merritt's modmult algorithm... */
/* Shift r1 1 whole unit to the left. Used only by modmult function. */
static void mp_lshift_unit(register unitptr r1)
{
register short precision;
register unitptr r2;
precision = global_precision;
make_msbptr(r1, precision);
r2 = r1;
while (--precision)
*post_lowerunit(r1) = *pre_lowerunit(r2);
*r1 = 0;
} /* mp_lshift_unit */
/* moduli_buf contains shifted images of the modulus, set by stage_modulus */
static unit moduli_buf[UNITSIZE - 1][MAX_UNIT_PRECISION] =
{0};
static unitptr moduli[UNITSIZE] = /* contains pointers into moduli_buf */
{0, moduli_buf[0], moduli_buf[1], moduli_buf[2],
moduli_buf[3], moduli_buf[4], moduli_buf[5], moduli_buf[6]
#ifndef UNIT8
,moduli_buf[7], moduli_buf[8], moduli_buf[9], moduli_buf[10],
moduli_buf[11], moduli_buf[12], moduli_buf[13], moduli_buf[14]
#ifndef UNIT16 /* and not UNIT8 */
,moduli_buf[15], moduli_buf[16], moduli_buf[17], moduli_buf[18],
moduli_buf[19], moduli_buf[20], moduli_buf[21], moduli_buf[22],
moduli_buf[23], moduli_buf[24], moduli_buf[25], moduli_buf[26],
moduli_buf[27], moduli_buf[28], moduli_buf[29], moduli_buf[30]
#endif /* UNIT16 and UNIT8 not defined */
#endif /* UNIT8 not defined */
};
/*
* To optimize msubs, need following 2 unit arrays, each filled
* with the most significant unit and next-to-most significant unit
* of the preshifted images of the modulus.
*/
static unit msu_moduli[UNITSIZE] =
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -