📄 pi.c
字号:
** -----------** 30 13 62 67**** We need only 3 multiplications, plus a few additions. And of course,** additions are a lot simpler and faster than multiplications, so we** end up ahead.**** The way it is written requires that the size to be a power of two.** That's why the program itself requires the size to be a power of two.** There is no actual requirement in the method, it's just how I did it** because I would be working close to powers of two anyway, and it was** easier.*/void FastMul(Short *prod, Short *a, Short *b, Indexer Len){ Indexer x, HLen; int SignA, SignB; Short *offset; Short *NextProd; /* ** This is the threshold where it's faster to do it the ordinary way ** On my computer, it's 16. Yours may be different. */ if (Len <= 16 ) { Mul(prod, a, b, Len); return; } HLen = Len/2; NextProd = prod + 2*Len; SignA = Sub(prod, a, a + HLen, HLen); if (SignA) { SignA = 1; Negate(prod, HLen); } SignB = Sub(prod + HLen, b + HLen, b, HLen); if (SignB) { SignB = 1; Negate(prod + HLen, HLen); } FastMul(NextProd, prod, prod + HLen, HLen); for (x = 0; x < 2 * Len; x++) prod[x] = 0; offset = prod + HLen; if (SignA == SignB) DoCarries(prod, offset - 1, Add(offset, offset, NextProd, Len)); else DoBorrows(prod, offset - 1, Sub(offset, offset, NextProd, Len)); FastMul(NextProd, a, b, HLen); offset = prod + HLen; DoCarries(prod, offset - 1, Add(offset, offset, NextProd, Len)); Add(prod, prod, NextProd, Len); FastMul(NextProd, a + HLen, b + HLen, HLen); offset = prod + HLen; DoCarries(prod, offset - 1, Add(offset, offset, NextProd, Len)); offset = prod + Len; DoCarries(prod, offset - 1, Add(offset, offset, NextProd, Len));}/*** Compute the reciprocal of 'a'.** x[k+1] = x[k]*(2-a*x[k])*/void Reciprocal(Short *r, Short *n, Indexer Len){ Indexer x, SubLen; int iter; double t; /* Estimate our first reciprocal */ for (x = 0; x < Len; x++) r[x] = 0; t = n[0] + n[1]*1.0e-4 + n[2]*1.0e-8; if (t == 0.0) t = 1; t = 1.0/t; r[0] = t; t = (t - floor(t))*10000.0; r[1] = t; t = (t - floor(t))*10000.0; r[2] = t; iter = log(SIZE)/log(2.0) + 1; SubLen = 1; while (iter--) { SubLen *= 2; if (SubLen > SIZE) SubLen = SIZE; FastMul(MulWork, n, r, SubLen); Round2x(Work1, MulWork, SubLen); MulWork[0] = 2; for (x = 1; x < Len; x++) MulWork[x] = 0; Sub(Work1, MulWork, Work1, SubLen); FastMul(MulWork, r, Work1, SubLen); Round2x(r, MulWork, SubLen); }}/*** Computes the reciprocal of the square root of 'a'** y[k+1] = y[k]*(3-a*y[k]^2)/2****** We can save quite a bit of time by using part of our previous** results! Since the number is converging onto a specific value,** at least part of our previous result will be correct, so we** can skip a bit of work.*/void RSqrt(Short *r, Short *n, Indexer Len, Indexer SubLen){ Indexer x; int iter; double t; /* Estimate our first reciprocal square root */ if (SubLen < 2 ) { for (x = 0; x < Len; x++) r[x] = 0; t = n[0] + n[1]*1.0e-4 + n[2]*1.0e-8; if (t == 0.0) t = 1; t = 1.0/sqrt(t); r[0] = t; t = (t - floor(t))*10000.0; r[1] = t; t = (t - floor(t))*10000.0; r[2] = t; } /* ** PrintShort2("\n ~R: ", r, SIZE); */ if (SubLen > SIZE) SubLen = SIZE; iter = SubLen; while (iter <= SIZE*2) { FastMul(MulWork, r, r, SubLen); Round2x(Work1, MulWork, SubLen); FastMul(MulWork, n, Work1, SubLen); Round2x(Work1, MulWork, SubLen); MulWork[0] = 3; for (x = 1; x < SubLen; x++) MulWork[x] = 0; Sub(Work1, MulWork, Work1, SubLen); FastMul(MulWork, r, Work1, SubLen); Round2x(r, MulWork, SubLen); DivBy2(r, SubLen); /* printf("%3d", SubLen); PrintShort2("R: ", r, SubLen); printf("%3d", SubLen); PrintShort2("R: ", r, Len); */ SubLen *= 2; if (SubLen > SIZE) SubLen = SIZE; iter *= 2; }}/*** d = a/b by computing the reciprocal of b and then multiplying** that by a. It's faster this way because the normal digit by** digit method has N^2 growth, and this method will have the same** growth as our multiplication method.*/void Divide(Short *d, Short *a, Short *b, Indexer Len){ Reciprocal(Work2, b, Len); FastMul(MulWork, a, Work2, Len); Round2x(d, MulWork, Len);}/*** r = sqrt(n) by computing the reciprocal of the square root of** n and then multiplying that by n*/void Sqrt(Short *r, Short *n, Indexer Len, Indexer SubLen){ RSqrt(Work2, n, Len, SubLen); FastMul(MulWork, n, Work2, Len); Round2x(r, MulWork, Len);}void usage(void){ puts("This program requires the number of digits/4 to calculate"); puts("This number must be a power of two."); exit(-1);}int main(int argc,char *argv[]){ Indexer x; Indexer SubLen; double Pow2, tempd, carryd; int AGMIter; int NeededIter; clock_t T0, T1, T2, T3; if (argc < 2) usage(); SIZE = atoi(argv[1]); if (!ispow2(SIZE)) usage(); a = (Short *)malloc(sizeof(Short)*SIZE); b = (Short *)malloc(sizeof(Short)*SIZE); c = (Short *)malloc(sizeof(Short)*SIZE); Work1 = (Short *)malloc(sizeof(Short)*SIZE); Work2 = (Short *)malloc(sizeof(Short)*SIZE); Work3 = (Short *)malloc(sizeof(Short)*SIZE); MulWork = (Short *)malloc(sizeof(Short)*SIZE*4); if ((a ==NULL) || (b == NULL) || (c == NULL) || (Work1 == NULL) || (Work2 == NULL) || (MulWork == NULL)) { printf("Unable to allocate memory\n");exit(1); } NeededIter = log(SIZE)/log(2.0) + 1; Pow2 = 4.0; AGMIter = NeededIter + 1; SubLen = 1; T0 = clock(); for (x = 0; x < SIZE; x++) a[x] = b[x] = c[x] = Work3[x] = 0; a[0] = 1; Work3[0] = 2; RSqrt(b, Work3, SIZE, 1); c[0] = 1; T1 = clock(); /* printf("AGMIter %d\n", AGMIter); PrintShort2("a AGM: ", a, SIZE); PrintShort2("b AGM: ", b, SIZE); PrintShort2("C sum: ", c, SIZE); */ while (AGMIter--) { Sub(Work1, a, b, SIZE); /* w = (a-b)/2 */ DivBy2(Work1, SIZE); FastMul(MulWork, Work1, Work1, SIZE); /* m = w*w */ Round2x(Work1, MulWork, SIZE); carryd = 0.0; /* m = m* w^(J+1) */ for (x = SIZE - 1; x >= 0; x--) { tempd = Pow2*Work1[x] + carryd; carryd = floor(tempd / 10000.0); Work1[x] = tempd - carryd*10000.0; } Pow2 *= 2.0; Sub(c, c, Work1, SIZE); /* c = c - m */ /* Save some time on last iter */ if (AGMIter != 0) FastMul(MulWork, a, b, SIZE); /* m = a*b */ Add(a, a, b, SIZE); /* a = (a+b)/2 */ DivBy2(a, SIZE); if (AGMIter != 0) { Round2x(Work3, MulWork, SIZE); Sqrt(b, Work3, SIZE, SubLen); /* b = sqrt(a*b) */ } /* printf("AGMIter %d\n", AGMIter); PrintShort2("a AGM: ", a, SIZE); PrintShort2("b AGM: ", b, SIZE); PrintShort2("C sum: ", c, SIZE); */ SubLen *= 2;if (SubLen > SIZE) SubLen = SIZE; } T2 = clock(); FastMul(MulWork, a, a, SIZE); Round2x(a, MulWork, SIZE); carryd = 0.0; for (x = SIZE - 1; x >= 0; x--) { tempd = 4.0*a[x] + carryd; carryd = floor(tempd / 10000.0); a[x] = tempd - carryd*10000.0; } Divide(b, a, c, SIZE); T3 = clock(); PrintShort("\nPI = ", b, SIZE); printf("\nTotal Execution time: %12.4lf\n", (double)(T3 - T0) / CLOCKS_PER_SEC); printf("Setup time : %12.4lf\n", (double)(T1 - T0) / CLOCKS_PER_SEC); printf("AGM Calculation time: %12.4lf\n", (double)(T2 - T1) / CLOCKS_PER_SEC); printf("Post calc time : %12.4lf\n", (double)(T3 - T2) / CLOCKS_PER_SEC); return 0;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -