bench.cpp
来自「开放源码的编译器open watcom 1.6.0版的源代码」· C++ 代码 · 共 557 行 · 第 1/2 页
CPP
557 行
mp_digit_tc ar3l = NN_LCHUNKU(ar3[i]);
quadh = mul2h*ar2h + mul3h*ar3h;
quadl = mul2l*ar2l + mul3l*ar3l;
quadm = quadh - (mul2h-mul2l)*(ar2h-ar2l)
- (mul3h-mul3l)*(ar3h-ar3l) + quadl;
quadm += NN_HCHUNKU(carry);
quadl += NN_LCHUNKU(carry);
quadh += NN_HCHUNKU(quadm);
quadl += NN_LCHUNKU(quadm) << LG2MPB_CHUNK;
tmparr[i-1] = quadl & MP_MASK;
carry = quadh + (quadl >> LG2MPB);
} while (++i < lng);
tmparr[lng-1] = carry;
j = 1;
do {
mul2h = NN_HCHUNKU(par1[j]);
mul2l = NN_LCHUNKU(par1[j]);
quadh = mul2h*ar20h;
quadm = mul2h*ar20l + mul2l*ar20h;
quadl = mul2l*ar20l + tmparr[0];
quadll = NN_LCHUNKU(quadl);
quadlh = NN_LCHUNKU(quadm + NN_HCHUNKU(quadl));
mul3l = quadll*inv3l;
mul3h = quadll*inv3h + quadlh*inv3l;
mul3h = NN_LCHUNKU(mul3h + NN_HCHUNKU(mul3l));
mul3l = NN_LCHUNKU(mul3l);
quadh += mul3h*ar30h;
quadm += mul3h*ar30l + mul3l*ar30h;
quadl += mul3l*ar30l;
quadm += NN_HCHUNKU(quadl);
carry = quadh + NN_HCHUNKU(quadm);
i = 1;
do {
mp_digit_tc ar2h = NN_HCHUNKU(ar2[i]);
mp_digit_tc ar2l = NN_LCHUNKU(ar2[i]);
mp_digit_tc ar3h = NN_HCHUNKU(ar3[i]);
mp_digit_tc ar3l = NN_LCHUNKU(ar3[i]);
quadh = mul2h*ar2h + mul3h*ar3h;
quadl = mul2l*ar2l + mul3l*ar3l;
quadm = quadh + quadl
- (mul2h-mul2l)*(ar2h-ar2l)
- (mul3h-mul3l)*(ar3h-ar3l);
quadm += NN_HCHUNKU(tmparr[i] + carry);
quadl += NN_LCHUNKU(tmparr[i] + carry);
quadh += NN_HCHUNKU(quadm);
quadl += NN_LCHUNKU(quadm) << LG2MPB_CHUNK;
tmparr[i-1] = quadl & MP_MASK;
carry = quadh + (quadl >> LG2MPB);
} while (++i < lng);
tmparr[lng-1] = carry;
} while (++j < lng);
} /* lng > 1 */
}
break; /* MULT_HALFINT */
#undef NN_HCHUNKU
#undef NN_LCHUNKU
case MULT_DOUBLE_PRECISION: /* Use double precision floating */
#define DBLTOP(dblarg) ((double)((smp_digit_t)((dblarg)*MP_BASE_DINV)))
/* Upper half of double precision floating arg */
#define DBLBOT(dblarg) ((dblarg) - MP_BASE_D*DBLTOP(dblarg))
/* Lower half of double precision floating arg */
#define MPDVAL(dblarg) ((smp_digit_t)(dblarg))
/* Convert floating to integer */
#define DBLVAL(mpdarg) ((double)((smp_digit_t)(mpdarg)))
/* Convert floating to integer */
{
const double dinv3 = DBLVAL(inv3);
register double mul2 = DBLVAL(par1[0]);
register double quad = mul2*DBLVAL(par2[0]);
register double mul3 = DBLBOT(DBLBOT(quad)*dinv3);
register double carry = (quad + mul3*DBLVAL(par3[0]))*MP_BASE_DINV;
if (lng == 1) {
tmparr[0] = MPDVAL(carry);
} else {
i = 1;
do {
register double q;
quad = mul2*DBLVAL(par2[i]) + mul3*DBLVAL(par3[i]) + carry;
q = DBLTOP(quad);
tmparr[i-1] = MPDVAL(quad - MP_BASE_D*q);
carry = q;
} while (++i < lng);
tmparr[lng-1] = MPDVAL(carry);
j = 1;
do {
mul2 = DBLVAL(par1[j]);
quad = mul2*DBLVAL(par2[0]) + DBLVAL(tmparr[0]);
mul3 = DBLBOT(DBLBOT(quad)*dinv3);
carry = (quad + mul3*DBLVAL(par3[0]))*MP_BASE_DINV;
i = 1;
do {
register double q;
quad = mul2*DBLVAL(par2[i]) + mul3*DBLVAL(par3[i])
+ DBLVAL(tmparr[i]) + carry;
q = DBLTOP(quad);
tmparr[i-1] = MPDVAL(quad - MP_BASE_D*q);
carry = q;
} while (++i < lng);
tmparr[lng-1] = MPDVAL(carry);
} while (++j < lng);
} /* lng > 1 */
}
#undef DBLTOP
#undef DBLBOT
#undef DBLVAL
#undef MPDVAL
break; /* MULT_DOUBLE_PRECISION */
} /* switch */
/* Common code (independent of multiplication algorithm) */
{
smp_digit_t diff;
j = lng - 1;
do {
diff = (smp_digit_t)tmparr[j] - (smp_digit_t)par3[j];
j--;
} while (diff == 0 && j >= 0);
if (diff < 0) {
(void)memcpy(ar4, tmparr, lng*sizeof(mp_digit_t));
} else { /* Multiple precision subtract */
register smp_digit_t c1 = 1; /* carry + 1 */
for (j = 0; j < lng; j++) {
c1 += tmparr[j] - par3[j] + (MP_BASE-1);
ar4[j] = c1 & MP_MASK;
c1 >>= LG2MPB;
}
assert (c1 == 1);
}
}
}
double CP_TIME(void) /* Return CP time used so far */
{
return clock() / (double)CLK_TCK;
}
mp_digit_t DIFFER(CONSTP(mp_digit_tc) ar1,
CONSTP(mp_digit_tc) ar2,
mp_lng_tc lng)
/* Return 0 if ar1 == ar2, nonzero if ar1 != ar2 */
{
mp_digit_t or_xors = 0;
mp_lng_t i;
for (i = 0; i < lng; i++) {or_xors |= (ar1[i] ^ ar2[i]);}
return or_xors;
}
mp_digit_t NR_INV(mp_digit_tc lowdig)
/* Find neg_recip such that lowdig*neg_recip == -1 mod MP_BASE */
/* Use Newton's method (all modulo a power of 2) */
{
mp_digit_t neg_recip = -lowdig; /* 3 bit approximation */
mp_lng_t prec = 3;
while (prec < LG2MPB) {
neg_recip *= (2 + neg_recip*lowdig);
prec *= 2;
}
neg_recip &= MP_MASK;
assert(((neg_recip*lowdig + 1) & MP_MASK) == 0);
return neg_recip;
}
static unsigned long int next = 1;
int my_rand() {
next = next * 1103515245 + 12345;
return (unsigned)(next/65536) % 32768;
}
mp_digit_t RAND_DIGIT()
{
mp_digit_tc n1 = my_rand(); /* At least 15-bits */
mp_digit_tc n2 = my_rand(); /* At least 15-bits */
mp_digit_tc n3 = n1 ^ (n2 >> LG2MPB_CHUNK) ^ (n2 << LG2MPB_CHUNK);
return (n3 & MP_MASK);
}
void MP_PRINT(CONSTP(char ) name,
CONSTP(mp_digit_tc) ar1,
mp_lng_tc lng) /* Print number in internal form */
{
mp_lng_t i;
printf("%s =", name);
for (i = 0; i < lng; i++) {
printf(" %lx", ar1[i]);
}
printf("\n");
}
#define NTRIAL 10000
int main (const int argc, CONSTP(const char) argv[])
{
mp_lng_t lng;
for (lng = 1; lng <= LNGMAX; lng++) {
mp_lng_t i, ialg;
mp_digit_t modlus[LNGMAX], n1[LNGMAX], n2[LNGMAX];
mp_digit_t invmod;
mp_digit_t prods[1+MAX_MULT_ALG][LNGMAX];
mp_digit_t dis_agree = 0;
for (i = 0; i < lng; i++) {
modlus[i] = RAND_DIGIT();
n1[i] = RAND_DIGIT();
n2[i] = RAND_DIGIT();
}
/*
Ensure modlus is odd, first digit positive,
with 0 <= n1, n2 < modlus.
*/
modlus[0] |= 1;
invmod = NR_INV(modlus[0]);
if (modlus[lng-1] == 0) modlus[lng-1] = 1;
n1[lng-1] %= modlus[lng-1];
n2[lng-1] %= modlus[lng-1];
MP_PRINT("modulus", modlus, lng);
MP_PRINT("n1", n1, lng);
MP_PRINT("n2", n2, lng);
for (ialg = 0; ialg <= MAX_MULT_ALG; ialg++) {
NR_MUL(n1, n2, modlus, prods[ialg], invmod, lng, ialg);
if (ialg > 0) {
dis_agree |= DIFFER(prods[0], prods[ialg], lng);
}
}
MP_PRINT("product", prods[0], lng);
if (dis_agree != 0) {
printf("Disagreement for lng = %d:\n", lng);
for (ialg = 0; ialg <= MAX_MULT_ALG; ialg++) {
MP_PRINT(desc[ialg], prods[ialg], lng);
}
} else { /* disagreement */
/*
Print average time per multiplication,
by each algorithm.
*/
mp_lng_t itrial;
double times[1 + MAX_MULT_ALG];
for (ialg = 0; ialg <= MAX_MULT_ALG; ialg++) {
const double tbeg = CP_TIME();
for (itrial = 0; itrial < NTRIAL; itrial++) {
NR_MUL(n1, n2, modlus, prods[ialg], invmod, lng, ialg);
}
times[ialg] = CP_TIME() - tbeg;
}
printf(" lng = %2d", lng);
#if 0
for (ialg = 0; ialg <= MAX_MULT_ALG; ialg++) {
printf(", %s %7.0f",
desc[ialg], 1.0e6*times[ialg]/(double)NTRIAL);
}
#endif
printf("\n");
} /* timing */
} /* for lng */
return(0);
}
#endif
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?