📄 zmul.c
字号:
/* * Copyright (c) 1994 David I. Bell * Permission is granted to use, distribute, or modify this source, * provided that this copyright notice remains intact. * * Faster than usual multiplying and squaring routines. * The algorithm used is the reasonably simple one from Knuth, volume 2, * section 4.3.3. These recursive routines are of speed O(N^1.585) * instead of O(N^2). The usual multiplication and (almost usual) squaring * algorithms are used for small numbers. On a 386 with its compiler, the * two algorithms are equal in speed at about 100 decimal digits. */#include "zmath.h"LEN _mul2_ = MUL_ALG2; /* size of number to use multiply algorithm 2 */LEN _sq2_ = SQ_ALG2; /* size of number to use square algorithm 2 */static HALF *tempbuf; /* temporary buffer for multiply and square */static LEN domul MATH_PROTO((HALF *v1, LEN size1, HALF *v2, LEN size2, HALF *ans));static LEN dosquare MATH_PROTO((HALF *vp, LEN size, HALF *ans));/* * Multiply two numbers using the following formula recursively: * (A*S+B)*(C*S+D) = (S^2+S)*A*C + S*(A-B)*(D-C) + (S+1)*B*D * where S is a power of 2^16, and so multiplies by it are shifts, and * A,B,C,D are the left and right halfs of the numbers to be multiplied. */voidzmul(z1, z2, res) ZVALUE z1, z2; /* numbers to multiply */ ZVALUE *res; /* result of multiplication */{ LEN len; /* size of array */ if (ziszero(z1) || ziszero(z2)) { *res = _zero_; return; } if (zisunit(z1)) { zcopy(z2, res); res->sign = (z1.sign != z2.sign); return; } if (zisunit(z2)) { zcopy(z1, res); res->sign = (z1.sign != z2.sign); return; } /* * Allocate a temporary buffer for the recursion levels to use. * An array needs to be allocated large enough for all of the * temporary results to fit in. This size is about twice the size * of the largest original number, since each recursion level uses * the size of its given number, and whose size is 1/2 the size of * the previous level. The sum of the infinite series is 2. * Add some extra words because of rounding when dividing by 2 * and also because of the extra word that each multiply needs. */ len = z1.len; if (len < z2.len) len = z2.len; len = 2 * len + 64; tempbuf = zalloctemp(len); res->sign = (z1.sign != z2.sign); res->v = alloc(z1.len + z2.len + 1); res->len = domul(z1.v, z1.len, z2.v, z2.len, res->v);}/* * Recursive routine to multiply two numbers by splitting them up into * two numbers of half the size, and using the results of multiplying the * subpieces. The result is placed in the indicated array, which must be * large enough for the result plus one extra word (size1 + size2 + 1). * Returns the actual size of the result with leading zeroes stripped. * This also uses a temporary array which must be twice as large as * one more than the size of the number at the top level recursive call. */static LENdomul(v1, size1, v2, size2, ans) HALF *v1; /* first number */ LEN size1; /* size of first number */ HALF *v2; /* second number */ LEN size2; /* size of second number */ HALF *ans; /* location for result */{ LEN shift; /* amount numbers are shifted by */ LEN sizeA; /* size of left half of first number */ LEN sizeB; /* size of right half of first number */ LEN sizeC; /* size of left half of second number */ LEN sizeD; /* size of right half of second number */ LEN sizeAB; /* size of subtraction of A and B */ LEN sizeDC; /* size of subtraction of D and C */ LEN sizeABDC; /* size of product of above two results */ LEN subsize; /* size of difference of halfs */ LEN copysize; /* size of number left to copy */ LEN sizetotal; /* total size of product */ LEN len; /* temporary length */ HALF *baseA; /* base of left half of first number */ HALF *baseB; /* base of right half of first number */ HALF *baseC; /* base of left half of second number */ HALF *baseD; /* base of right half of second number */ HALF *baseAB; /* base of result of subtraction of A and B */ HALF *baseDC; /* base of result of subtraction of D and C */ HALF *baseABDC; /* base of product of above two results */ HALF *baseAC; /* base of product of A and C */ HALF *baseBD; /* base of product of B and D */ FULL carry; /* carry digit for small multiplications */ FULL carryACBD; /* carry from addition of A*C and B*D */ FULL digit; /* single digit multiplying by */ HALF *temp; /* base for temporary calculations */ BOOL neg; /* whether imtermediate term is negative */ register HALF *hd, *h1=NULL, *h2=NULL; /* for inner loops */ SIUNION sival; /* for addition of digits */ /* * Trim the numbers of leading zeroes and initialize the * estimated size of the result. */ hd = &v1[size1 - 1]; while ((*hd == 0) && (size1 > 1)) { hd--; size1--; } hd = &v2[size2 - 1]; while ((*hd == 0) && (size2 > 1)) { hd--; size2--; } sizetotal = size1 + size2; /* * First check for zero answer. */ if (((size1 == 1) && (*v1 == 0)) || ((size2 == 1) && (*v2 == 0))) { *ans = 0; return 1; } /* * Exchange the two numbers if necessary to make the number of * digits of the first number be greater than or equal to the * second number. */ if (size1 < size2) { len = size1; size1 = size2; size2 = len; hd = v1; v1 = v2; v2 = hd; } /* * If the smaller number has only a few digits, then calculate * the result in the normal manner in order to avoid the overhead * of the recursion for small numbers. The number of digits where * the algorithm changes is settable from 2 to maxint. */ if (size2 < _mul2_) { /* * First clear the top part of the result, and then multiply * by the lowest digit to get the first partial sum. Later * products will then add into this result. */ hd = &ans[size1]; len = size2; while (len--) *hd++ = 0; digit = *v2++; h1 = v1; hd = ans; carry = 0; len = size1; while (len >= 4) { /* expand the loop some */ len -= 4; sival.ivalue = ((FULL) *h1++) * digit + carry; *hd++ = sival.silow; carry = sival.sihigh; sival.ivalue = ((FULL) *h1++) * digit + carry; *hd++ = sival.silow; carry = sival.sihigh; sival.ivalue = ((FULL) *h1++) * digit + carry; *hd++ = sival.silow; carry = sival.sihigh; sival.ivalue = ((FULL) *h1++) * digit + carry; *hd++ = sival.silow; carry = sival.sihigh; } while (len--) { sival.ivalue = ((FULL) *h1++) * digit + carry; *hd++ = sival.silow; carry = sival.sihigh; } *hd = (HALF)carry; /* * Now multiply by the remaining digits of the second number, * adding each product into the final result. */ h2 = ans; while (--size2 > 0) { digit = *v2++; h1 = v1; hd = ++h2; if (digit == 0) continue; carry = 0; len = size1; while (len >= 4) { /* expand the loop some */ len -= 4; sival.ivalue = ((FULL) *h1++) * digit + ((FULL) *hd) + carry; *hd++ = sival.silow; carry = sival.sihigh; sival.ivalue = ((FULL) *h1++) * digit + ((FULL) *hd) + carry; *hd++ = sival.silow; carry = sival.sihigh; sival.ivalue = ((FULL) *h1++) * digit + ((FULL) *hd) + carry; *hd++ = sival.silow; carry = sival.sihigh; sival.ivalue = ((FULL) *h1++) * digit + ((FULL) *hd) + carry; *hd++ = sival.silow; carry = sival.sihigh; } while (len--) { sival.ivalue = ((FULL) *h1++) * digit + ((FULL) *hd) + carry; *hd++ = sival.silow; carry = sival.sihigh; } while (carry) { sival.ivalue = ((FULL) *hd) + carry; *hd++ = sival.silow; carry = sival.sihigh; } } /* * Now return the true size of the number. */ len = sizetotal; hd = &ans[len - 1]; while ((*hd == 0) && (len > 1)) { hd--; len--; } return len; } /* * Need to multiply by a large number. * Allocate temporary space for calculations, and calculate the * value for the shift. The shift value is 1/2 the size of the * larger (first) number (rounded up). The amount of temporary * space needed is twice the size of the shift, plus one more word * for the multiply to use. */ shift = (size1 + 1) / 2; temp = tempbuf; tempbuf += (2 * shift) + 1; /* * Determine the sizes and locations of all the numbers. * The value of sizeC can be negative, and this is checked later. * The value of sizeD is limited by the full size of the number. */ baseA = v1 + shift; baseB = v1; /* * XXX - Saber-C Version 3.1 says: * * W#26, Storing a bad pointer into auto variable dmul`baseC. * * This warning is issued during the regression test #026 * (read cryrand). * * Saver-C claims that v2+shift is past the end of allocated * memory for v2. When this warning is first issued, shift * has the value 51. * * The call stack is: * * domul(0x165ca0, 101, 0x160998, 40, 0x16d0a8) at "zmul.c":315 * zmul(0x2ddf88, 0x2ddf8c, 0x2ddc98) at "zmul.c":73 * qsqrt(0x38defc, 0x38ea94) at "qfunc.c":248 * qln(0x38defc, 0x38de70) at "qtrans.c":589 * qpower(0x38eacc, 0x38a398, 0x38a018) at "qtrans.c":657 * powervalue(0x1740f8,0x174118,0x195234,0x19523c) at "value.c":1009 * f_power(2, 0x533278) at "func.c":1188 * builtinfunc(117, 2, 0x174118) at "func.c":354 * o_call(0x5328d8, 117, 2) at "opcodes.c":2094 * calculate(0x5328d8, 1) at "opcodes.c":288 * o_usercall(0x5328d8, 54, 1) at "opcodes.c":2082 * calculate(0x48ffa8, 4) at "opcodes.c":288 * o_usercall(0x48ffa8, 53, 1) at "opcodes.c":2082 * calculate(0x1652a0, 1) at "opcodes.c":288 * o_usercall(0x1652a0, 57, 1) at "opcodes.c":2082 * calculate(0x16cca8, 0) at "opcodes.c":288 * evaluate(0) at "codegen.c":167 * getcommands(0) at "codegen.c":106 * getcommands(0) at "codegen.c":76 * getcommands(1) at "codegen.c":76 * main(-1, 0x181f8c) at "calc.c":155 * * Purify does not complain about this code. Who is right? */ baseC = v2 + shift; baseD = v2; baseAB = ans; baseDC = ans + shift; baseAC = ans + shift * 2; baseBD = ans; sizeA = size1 - shift; sizeC = size2 - shift; sizeB = shift; hd = &baseB[shift - 1]; while ((*hd == 0) && (sizeB > 1)) { hd--; sizeB--; } sizeD = shift; if (sizeD > size2) sizeD = size2; hd = &baseD[sizeD - 1]; while ((*hd == 0) && (sizeD > 1)) { hd--; sizeD--; } /* * If the smaller number has a high half of zero, then calculate * the result by breaking up the first number into two numbers * and combining the results using the obvious formula: * (A*S+B) * D = (A*D)*S + B*D */ if (sizeC <= 0) { len = domul(baseB, sizeB, baseD, sizeD, ans); hd = &ans[len]; len = sizetotal - len; while (len--) *hd++ = 0; /* * Add the second number into the first number, shifted * over at the correct position. */ len = domul(baseA, sizeA, baseD, sizeD, temp); h1 = temp; hd = ans + shift; carry = 0; while (len--) { sival.ivalue = ((FULL) *h1++) + ((FULL) *hd) + carry; *hd++ = sival.silow; carry = sival.sihigh; } while (carry) { sival.ivalue = ((FULL) *hd) + carry; *hd++ = sival.silow; carry = sival.sihigh; } /* * Determine the final size of the number and return it. */ len = sizetotal; hd = &ans[len - 1]; while ((*hd == 0) && (len > 1)) { hd--; len--; } tempbuf = temp; return len; } /* * Now we know that the high halfs of the numbers are nonzero, * so we can use the complete formula. * (A*S+B)*(C*S+D) = (S^2+S)*A*C + S*(A-B)*(D-C) + (S+1)*B*D. * The steps are done in the following order: * A-B * D-C * (A-B)*(D-C) * S^2*A*C + B*D * (S^2+S)*A*C + (S+1)*B*D (*) * (S^2+S)*A*C + S*(A-B)*(D-C) + (S+1)*B*D * * Note: step (*) above can produce a result which is larger than * the final product will be, and this is where the extra word * needed in the product comes from. After the final subtraction is * done, the result fits in the expected size. Using the extra word * is easier than suppressing the carries and borrows everywhere. * * Begin by forming the product (A-B)*(D-C) into a temporary * location that we save until the final step. Do each subtraction * at positions 0 and S. Be very careful about the relative sizes * of the numbers since this result can be negative. For the first * step calculate the absolute difference of A and B into a temporary * location at position 0 of the result. Negate the sign if A is * smaller than B. */ neg = FALSE; if (sizeA == sizeB) { len = sizeA; h1 = &baseA[len - 1]; h2 = &baseB[len - 1]; while ((len > 1) && (*h1 == *h2)) { len--; h1--; h2--; } } if ((sizeA > sizeB) || ((sizeA == sizeB) && h1 && h2 && (*h1 > *h2))) { h1 = baseA; h2 = baseB; sizeAB = sizeA; subsize = sizeB; } else { neg = !neg; h1 = baseB; h2 = baseA; sizeAB = sizeB; subsize = sizeA; } copysize = sizeAB - subsize; hd = baseAB; carry = 0; while (subsize--) { sival.ivalue = BASE1 - ((FULL) *h1++) + ((FULL) *h2++) + carry; *hd++ = BASE1 - sival.silow; carry = sival.sihigh; } while (copysize--) { sival.ivalue = (BASE1 - ((FULL) *h1++)) + carry; *hd++ = BASE1 - sival.silow; carry = sival.sihigh; } hd = &baseAB[sizeAB - 1]; while ((*hd == 0) && (sizeAB > 1)) { hd--; sizeAB--; } /* * This completes the calculation of abs(A-B). For the next step * calculate the absolute difference of D and C into a temporary * location at position S of the result. Negate the sign if C is * larger than D. */ if (sizeC == sizeD) { len = sizeC; h1 = &baseC[len - 1]; h2 = &baseD[len - 1]; while ((len > 1) && (*h1 == *h2)) { len--; h1--; h2--; } } if ((sizeC > sizeD) || ((sizeC == sizeD) && (*h1 > *h2))) { neg = !neg; h1 = baseC; h2 = baseD; sizeDC = sizeC; subsize = sizeD; } else { h1 = baseD; h2 = baseC; sizeDC = sizeD; subsize = sizeC; } copysize = sizeDC - subsize; hd = baseDC; carry = 0; while (subsize--) { sival.ivalue = BASE1 - ((FULL) *h1++) + ((FULL) *h2++) + carry; *hd++ = BASE1 - sival.silow; carry = sival.sihigh; } while (copysize--) { sival.ivalue = (BASE1 - ((FULL) *h1++)) + carry; *hd++ = BASE1 - sival.silow; carry = sival.sihigh; } hd = &baseDC[sizeDC - 1]; while ((*hd == 0) && (sizeDC > 1)) { hd--; sizeDC--; } /* * This completes the calculation of abs(D-C). Now multiply * together abs(A-B) and abs(D-C) into a temporary location, * which is preserved until the final steps. */ baseABDC = temp; sizeABDC = domul(baseAB, sizeAB, baseDC, sizeDC, baseABDC); /* * Now calculate B*D and A*C into one of their two final locations. * Make sure the high order digits of the products are zeroed since * this initializes the final result. Be careful about this zeroing * since the size of the high order words might be smaller than * the shift size. Do B*D first since the multiplies use one more * word than the size of the product. Also zero the final extra * word in the result for possible carries to use. */ len = domul(baseB, sizeB, baseD, sizeD, baseBD); hd = &baseBD[len]; len = shift * 2 - len; while (len--) *hd++ = 0; len = domul(baseA, sizeA, baseC, sizeC, baseAC); hd = &baseAC[len]; len = sizetotal - shift * 2 - len + 1; while (len--) *hd++ = 0; /* * Now add in A*C and B*D into themselves at the other shifted * position that they need. This addition is tricky in order to * make sure that the two additions cannot interfere with each other. * Therefore we first add in the top half of B*D and the lower half * of A*C. The sources and destinations of these two additions * overlap, and so the same answer results from the two additions, * thus only two pointers suffice for both additions. Keep the * final carry from these additions for later use since we cannot * afford to change the top half of A*C yet. */ h1 = baseBD + shift; h2 = baseAC; carryACBD = 0; len = shift; while (len--) { sival.ivalue = ((FULL) *h1) + ((FULL) *h2) + carryACBD; *h1++ = sival.silow; *h2++ = sival.silow; carryACBD = sival.sihigh;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -