📄 pi_agm.c
字号:
** -----------
** 30 13 62 67
**
** We need only 3 multiplications, plus a few additions. And of course,
** additions are a lot simpler and faster than multiplications, so we
** end up ahead.
**
** The way it is written requires that the size to be a power of two.
** That's why the program itself requires the size to be a power of two.
** There is no actual requirement in the method, it's just how I did it
** because I would be working close to powers of two anyway, and it was
** easier.
*/
void FastMul(Short *prod, Short *a, Short *b, Indexer Len)
{
Indexer x, HLen;
int SignA, SignB;
Short *offset;
Short *NextProd;
/*
** This is the threshold where it's faster to do it the ordinary way
** On my computer, it's 16. Yours may be different.
*/
if (Len <= 16 )
{
Mul(prod, a, b, Len);
return;
}
HLen = Len/2;
NextProd = prod + 2*Len;
SignA = Sub(prod, a, a + HLen, HLen);
if (SignA)
{
SignA = 1;
Negate(prod, HLen);
}
SignB = Sub(prod + HLen, b + HLen, b, HLen);
if (SignB)
{
SignB = 1;
Negate(prod + HLen, HLen);
}
FastMul(NextProd, prod, prod + HLen, HLen);
for (x = 0; x < 2 * Len; x++)
prod[x] = 0;
offset = prod + HLen;
if (SignA == SignB)
DoCarries(prod, offset - 1, Add(offset, offset, NextProd, Len));
else DoBorrows(prod, offset - 1, Sub(offset, offset, NextProd, Len));
FastMul(NextProd, a, b, HLen);
offset = prod + HLen;
DoCarries(prod, offset - 1, Add(offset, offset, NextProd, Len));
Add(prod, prod, NextProd, Len);
FastMul(NextProd, a + HLen, b + HLen, HLen);
offset = prod + HLen;
DoCarries(prod, offset - 1, Add(offset, offset, NextProd, Len));
offset = prod + Len;
DoCarries(prod, offset - 1, Add(offset, offset, NextProd, Len));
}
/*
** Compute the reciprocal of 'a'.
** x[k+1] = x[k]*(2-a*x[k])
*/
void Reciprocal(Short *r, Short *n, Indexer Len)
{
Indexer x, SubLen;
int iter;
double t;
/* Estimate our first reciprocal */
for (x = 0; x < Len; x++)
r[x] = 0;
t = n[0] + n[1]*1.0e-4 + n[2]*1.0e-8;
if (t == 0.0)
t = 1;
t = 1.0/t;
r[0] = t;
t = (t - floor(t))*10000.0;
r[1] = t;
t = (t - floor(t))*10000.0;
r[2] = t;
iter = log(SIZE)/log(2.0) + 1;
SubLen = 1;
while (iter--)
{
SubLen *= 2;
if (SubLen > SIZE)
SubLen = SIZE;
FastMul(MulWork, n, r, SubLen);
Round2x(Work1, MulWork, SubLen);
MulWork[0] = 2;
for (x = 1; x < Len; x++)
MulWork[x] = 0;
Sub(Work1, MulWork, Work1, SubLen);
FastMul(MulWork, r, Work1, SubLen);
Round2x(r, MulWork, SubLen);
}
}
/*
** Computes the reciprocal of the square root of 'a'
** y[k+1] = y[k]*(3-a*y[k]^2)/2
**
**
** We can save quite a bit of time by using part of our previous
** results! Since the number is converging onto a specific value,
** at least part of our previous result will be correct, so we
** can skip a bit of work.
*/
void RSqrt(Short *r, Short *n, Indexer Len, Indexer SubLen)
{
Indexer x;
int iter;
double t;
/* Estimate our first reciprocal square root */
if (SubLen < 2 )
{
for (x = 0; x < Len; x++)
r[x] = 0;
t = n[0] + n[1]*1.0e-4 + n[2]*1.0e-8;
if (t == 0.0)
t = 1;
t = 1.0/sqrt(t);
r[0] = t;
t = (t - floor(t))*10000.0;
r[1] = t;
t = (t - floor(t))*10000.0;
r[2] = t;
}
/*
** PrintShort2("\n ~R: ", r, SIZE);
*/
if (SubLen > SIZE)
SubLen = SIZE;
iter = SubLen;
while (iter <= SIZE*2)
{
FastMul(MulWork, r, r, SubLen);
Round2x(Work1, MulWork, SubLen);
FastMul(MulWork, n, Work1, SubLen);
Round2x(Work1, MulWork, SubLen);
MulWork[0] = 3;
for (x = 1; x < SubLen; x++)
MulWork[x] = 0;
Sub(Work1, MulWork, Work1, SubLen);
FastMul(MulWork, r, Work1, SubLen);
Round2x(r, MulWork, SubLen);
DivBy2(r, SubLen);
/*
printf("%3d", SubLen);
PrintShort2("R: ", r, SubLen);
printf("%3d", SubLen);
PrintShort2("R: ", r, Len);
*/
SubLen *= 2;
if (SubLen > SIZE)
SubLen = SIZE;
iter *= 2;
}
}
/*
** d = a/b by computing the reciprocal of b and then multiplying
** that by a. It's faster this way because the normal digit by
** digit method has N^2 growth, and this method will have the same
** growth as our multiplication method.
*/
void Divide(Short *d, Short *a, Short *b, Indexer Len)
{
Reciprocal(Work2, b, Len);
FastMul(MulWork, a, Work2, Len);
Round2x(d, MulWork, Len);
}
/*
** r = sqrt(n) by computing the reciprocal of the square root of
** n and then multiplying that by n
*/
void Sqrt(Short *r, Short *n, Indexer Len, Indexer SubLen)
{
RSqrt(Work2, n, Len, SubLen);
FastMul(MulWork, n, Work2, Len);
Round2x(r, MulWork, Len);
}
void usage(void)
{
puts("This program requires the number of digits/4 to calculate");
puts("This number must be a power of two.");
exit(-1);
}
int main(int argc,char *argv[])
{
Indexer x;
Indexer SubLen;
double Pow2, tempd, carryd;
int AGMIter;
int NeededIter;
clock_t T0, T1, T2, T3;
if (argc < 2)
usage();
SIZE = atoi(argv[1]);
if (!ispow2(SIZE))
usage();
a = (Short *)malloc(sizeof(Short)*SIZE);
b = (Short *)malloc(sizeof(Short)*SIZE);
c = (Short *)malloc(sizeof(Short)*SIZE);
Work1 = (Short *)malloc(sizeof(Short)*SIZE);
Work2 = (Short *)malloc(sizeof(Short)*SIZE);
Work3 = (Short *)malloc(sizeof(Short)*SIZE);
MulWork = (Short *)malloc(sizeof(Short)*SIZE*4);
if ((a ==NULL) || (b == NULL) || (c == NULL) || (Work1 == NULL) ||
(Work2 == NULL) || (MulWork == NULL))
{
printf("Unable to allocate memory\n");exit(1);
}
NeededIter = log(SIZE)/log(2.0) + 1;
Pow2 = 4.0;
AGMIter = NeededIter + 1;
SubLen = 1;
T0 = clock();
for (x = 0; x < SIZE; x++)
a[x] = b[x] = c[x] = Work3[x] = 0;
a[0] = 1;
Work3[0] = 2;
RSqrt(b, Work3, SIZE, 1);
c[0] = 1;
T1 = clock();
/*
printf("AGMIter %d\n", AGMIter);
PrintShort2("a AGM: ", a, SIZE);
PrintShort2("b AGM: ", b, SIZE);
PrintShort2("C sum: ", c, SIZE);
*/
while (AGMIter--)
{
Sub(Work1, a, b, SIZE); /* w = (a-b)/2 */
DivBy2(Work1, SIZE);
FastMul(MulWork, Work1, Work1, SIZE); /* m = w*w */
Round2x(Work1, MulWork, SIZE);
carryd = 0.0; /* m = m* w^(J+1) */
for (x = SIZE - 1; x >= 0; x--)
{
tempd = Pow2*Work1[x] + carryd;
carryd = floor(tempd / 10000.0);
Work1[x] = tempd - carryd*10000.0;
}
Pow2 *= 2.0;
Sub(c, c, Work1, SIZE); /* c = c - m */
/* Save some time on last iter */
if (AGMIter != 0)
FastMul(MulWork, a, b, SIZE); /* m = a*b */
Add(a, a, b, SIZE); /* a = (a+b)/2 */
DivBy2(a, SIZE);
if (AGMIter != 0)
{
Round2x(Work3, MulWork, SIZE);
Sqrt(b, Work3, SIZE, SubLen); /* b = sqrt(a*b) */
}
/*
printf("AGMIter %d\n", AGMIter);
PrintShort2("a AGM: ", a, SIZE);
PrintShort2("b AGM: ", b, SIZE);
PrintShort2("C sum: ", c, SIZE);
*/
SubLen *= 2;if (SubLen > SIZE) SubLen = SIZE;
}
T2 = clock();
FastMul(MulWork, a, a, SIZE);
Round2x(a, MulWork, SIZE);
carryd = 0.0;
for (x = SIZE - 1; x >= 0; x--)
{
tempd = 4.0*a[x] + carryd;
carryd = floor(tempd / 10000.0);
a[x] = tempd - carryd*10000.0;
}
Divide(b, a, c, SIZE);
T3 = clock();
PrintShort("\nPI = ", b, SIZE);
printf("\nTotal Execution time: %12.4lf\n",
(double)(T3 - T0) / CLOCKS_PER_SEC);
printf("Setup time : %12.4lf\n",
(double)(T1 - T0) / CLOCKS_PER_SEC);
printf("AGM Calculation time: %12.4lf\n",
(double)(T2 - T1) / CLOCKS_PER_SEC);
printf("Post calc time : %12.4lf\n",
(double)(T3 - T2) / CLOCKS_PER_SEC);
return 0;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -