bench.cpp

来自「开放源码的编译器open watcom 1.6.0版的源代码」· C++ 代码 · 共 557 行 · 第 1/2 页

CPP
557
字号
                mp_digit_tc ar3l  = NN_LCHUNKU(ar3[i]);

                quadh = mul2h*ar2h + mul3h*ar3h;
                quadl = mul2l*ar2l + mul3l*ar3l;
                quadm = quadh - (mul2h-mul2l)*(ar2h-ar2l)
                              - (mul3h-mul3l)*(ar3h-ar3l) + quadl;

                quadm += NN_HCHUNKU(carry);
                quadl += NN_LCHUNKU(carry);

                quadh += NN_HCHUNKU(quadm);
                quadl += NN_LCHUNKU(quadm) << LG2MPB_CHUNK;
                tmparr[i-1] = quadl & MP_MASK;
                carry = quadh + (quadl >> LG2MPB);
            } while (++i < lng);
            tmparr[lng-1] = carry;

            j = 1;
            do {
                mul2h  = NN_HCHUNKU(par1[j]);
                mul2l  = NN_LCHUNKU(par1[j]);
                quadh  = mul2h*ar20h;
                quadm  = mul2h*ar20l + mul2l*ar20h;
                quadl  = mul2l*ar20l + tmparr[0];
                quadll = NN_LCHUNKU(quadl);
                quadlh = NN_LCHUNKU(quadm + NN_HCHUNKU(quadl));
                mul3l  = quadll*inv3l;
                mul3h  = quadll*inv3h + quadlh*inv3l;
                mul3h  = NN_LCHUNKU(mul3h + NN_HCHUNKU(mul3l));
                mul3l  = NN_LCHUNKU(mul3l);
                quadh += mul3h*ar30h;
                quadm += mul3h*ar30l + mul3l*ar30h;
                quadl += mul3l*ar30l;
                quadm += NN_HCHUNKU(quadl);
                carry  = quadh + NN_HCHUNKU(quadm);

                i = 1;
                do {
                    mp_digit_tc ar2h = NN_HCHUNKU(ar2[i]);
                    mp_digit_tc ar2l = NN_LCHUNKU(ar2[i]);
                    mp_digit_tc ar3h = NN_HCHUNKU(ar3[i]);
                    mp_digit_tc ar3l = NN_LCHUNKU(ar3[i]);

                    quadh = mul2h*ar2h + mul3h*ar3h;
                    quadl = mul2l*ar2l + mul3l*ar3l;
                    quadm = quadh + quadl
                       - (mul2h-mul2l)*(ar2h-ar2l)
                       - (mul3h-mul3l)*(ar3h-ar3l);

                    quadm += NN_HCHUNKU(tmparr[i] + carry);
                    quadl += NN_LCHUNKU(tmparr[i] + carry);

                    quadh += NN_HCHUNKU(quadm);
                    quadl += NN_LCHUNKU(quadm) << LG2MPB_CHUNK;

                    tmparr[i-1] = quadl & MP_MASK;
                    carry = quadh + (quadl >> LG2MPB);
                } while (++i < lng);
                tmparr[lng-1] = carry;
            } while (++j < lng);
        }       /* lng > 1 */
    }
    break;      /* MULT_HALFINT */
#undef NN_HCHUNKU
#undef NN_LCHUNKU

case MULT_DOUBLE_PRECISION:     /* Use double precision floating */

#define DBLTOP(dblarg) ((double)((smp_digit_t)((dblarg)*MP_BASE_DINV)))
                        /* Upper half of double precision floating arg */
#define DBLBOT(dblarg) ((dblarg) - MP_BASE_D*DBLTOP(dblarg))
                        /* Lower half of double precision floating arg */
#define MPDVAL(dblarg) ((smp_digit_t)(dblarg))
                        /* Convert floating to integer */
#define DBLVAL(mpdarg) ((double)((smp_digit_t)(mpdarg)))
                        /* Convert floating to integer */
    {
           const double dinv3 = DBLVAL(inv3);
        register double mul2  = DBLVAL(par1[0]);
        register double quad  = mul2*DBLVAL(par2[0]);
        register double mul3  = DBLBOT(DBLBOT(quad)*dinv3);
        register double carry = (quad + mul3*DBLVAL(par3[0]))*MP_BASE_DINV;

        if (lng == 1) {
            tmparr[0] = MPDVAL(carry);
        } else  {
            i = 1;
            do {
                register double q;

                quad = mul2*DBLVAL(par2[i]) + mul3*DBLVAL(par3[i]) + carry;
                q = DBLTOP(quad);
                tmparr[i-1] = MPDVAL(quad - MP_BASE_D*q);
                carry = q;
            } while (++i < lng);
            tmparr[lng-1] = MPDVAL(carry);

            j = 1;
            do {
                mul2  = DBLVAL(par1[j]);
                quad  = mul2*DBLVAL(par2[0]) + DBLVAL(tmparr[0]);
                mul3  = DBLBOT(DBLBOT(quad)*dinv3);
                carry = (quad + mul3*DBLVAL(par3[0]))*MP_BASE_DINV;

                i = 1;
                do {
                    register double q;

                    quad =    mul2*DBLVAL(par2[i]) + mul3*DBLVAL(par3[i])
                            + DBLVAL(tmparr[i]) + carry;
                    q = DBLTOP(quad);
                    tmparr[i-1] = MPDVAL(quad - MP_BASE_D*q);
                    carry = q;
                } while (++i < lng);
                tmparr[lng-1] = MPDVAL(carry);
            } while (++j < lng);
        }       /* lng > 1 */
    }
#undef DBLTOP
#undef DBLBOT
#undef DBLVAL
#undef MPDVAL
    break;      /* MULT_DOUBLE_PRECISION */
    }   /* switch */

                /* Common code (independent of multiplication algorithm) */

    {
        smp_digit_t diff;

        j = lng - 1;
        do {
            diff = (smp_digit_t)tmparr[j] - (smp_digit_t)par3[j];
            j--;
        } while (diff == 0 && j >= 0);

        if (diff < 0) {
            (void)memcpy(ar4, tmparr, lng*sizeof(mp_digit_t));
        } else {                /* Multiple precision subtract */
            register smp_digit_t c1 = 1;        /* carry + 1 */
            for (j = 0; j < lng; j++) {
                c1 += tmparr[j] - par3[j] + (MP_BASE-1);
                ar4[j] = c1 & MP_MASK;
                c1 >>= LG2MPB;
            }
            assert (c1 == 1);
        }
    }
}

double CP_TIME(void)            /* Return CP time used so far */
{
    return clock() / (double)CLK_TCK;
}

mp_digit_t DIFFER(CONSTP(mp_digit_tc) ar1,
                  CONSTP(mp_digit_tc) ar2,
                  mp_lng_tc lng)
        /* Return 0 if ar1 == ar2, nonzero if ar1 != ar2 */
{
    mp_digit_t or_xors = 0;
    mp_lng_t i;
    for (i = 0; i < lng; i++) {or_xors |= (ar1[i] ^ ar2[i]);}
    return or_xors;
}

mp_digit_t NR_INV(mp_digit_tc lowdig)
        /* Find neg_recip such that lowdig*neg_recip == -1 mod MP_BASE */
        /* Use Newton's method  (all modulo a power of 2) */
{
    mp_digit_t neg_recip = -lowdig;     /* 3 bit approximation */
    mp_lng_t prec = 3;
    while (prec < LG2MPB) {
        neg_recip *= (2 + neg_recip*lowdig);
        prec *= 2;
    }
    neg_recip &= MP_MASK;
    assert(((neg_recip*lowdig + 1) & MP_MASK) == 0);
    return neg_recip;
}

static unsigned long int next = 1;
int my_rand() {
    next = next * 1103515245 + 12345;
    return (unsigned)(next/65536) % 32768;
}

mp_digit_t RAND_DIGIT()
{
    mp_digit_tc n1 = my_rand(); /* At least 15-bits */
    mp_digit_tc n2 = my_rand(); /* At least 15-bits */
    mp_digit_tc n3 = n1 ^ (n2 >> LG2MPB_CHUNK) ^ (n2 << LG2MPB_CHUNK);
    return (n3 & MP_MASK);
}

void MP_PRINT(CONSTP(char       ) name,
              CONSTP(mp_digit_tc) ar1,
                     mp_lng_tc    lng)  /* Print number in internal form */
{
    mp_lng_t i;
    printf("%s =", name);
    for (i = 0; i < lng; i++) {
        printf(" %lx", ar1[i]);
    }
    printf("\n");


}

#define NTRIAL 10000
int main (const int argc, CONSTP(const char) argv[])
{
    mp_lng_t lng;
    for (lng = 1; lng <= LNGMAX; lng++) {
        mp_lng_t i, ialg;
        mp_digit_t modlus[LNGMAX], n1[LNGMAX], n2[LNGMAX];
        mp_digit_t invmod;
        mp_digit_t prods[1+MAX_MULT_ALG][LNGMAX];
        mp_digit_t dis_agree = 0;

        for (i = 0; i < lng; i++) {
            modlus[i] = RAND_DIGIT();
            n1[i] = RAND_DIGIT();
            n2[i] = RAND_DIGIT();
        }
/*
                Ensure modlus is odd, first digit positive,
                with 0 <= n1, n2 < modlus.
*/

        modlus[0] |= 1;
        invmod = NR_INV(modlus[0]);
        if (modlus[lng-1] == 0) modlus[lng-1] = 1;
        n1[lng-1] %= modlus[lng-1];
        n2[lng-1] %= modlus[lng-1];
        MP_PRINT("modulus", modlus, lng);
        MP_PRINT("n1", n1, lng);
        MP_PRINT("n2", n2, lng);
        for (ialg = 0; ialg <= MAX_MULT_ALG; ialg++)  {
            NR_MUL(n1, n2, modlus, prods[ialg], invmod, lng, ialg);
            if (ialg > 0) {
                dis_agree |= DIFFER(prods[0], prods[ialg], lng);
            }
        }
        MP_PRINT("product", prods[0], lng);
        if (dis_agree != 0) {
            printf("Disagreement for lng = %d:\n", lng);
            for (ialg = 0; ialg <= MAX_MULT_ALG; ialg++) {
                MP_PRINT(desc[ialg], prods[ialg], lng);
            }
        } else { /* disagreement */
/*
                Print average time per multiplication,
                by each algorithm.
*/
            mp_lng_t itrial;
            double times[1 + MAX_MULT_ALG];
            for (ialg = 0; ialg <= MAX_MULT_ALG; ialg++) {
                const double tbeg = CP_TIME();
                for (itrial = 0; itrial < NTRIAL; itrial++) {
                   NR_MUL(n1, n2, modlus, prods[ialg], invmod, lng, ialg);
                }
                times[ialg] = CP_TIME() - tbeg;
            }
            printf(" lng = %2d", lng);
#if 0
            for (ialg = 0; ialg <= MAX_MULT_ALG; ialg++) {
                printf(", %s %7.0f",
                        desc[ialg], 1.0e6*times[ialg]/(double)NTRIAL);
            }
#endif
            printf("\n");
        }       /* timing */
    } /* for lng */
    return(0);
}
#endif

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?