📄 zmul.c
字号:
/* * zmul - faster than usual multiplying and squaring routines * * Copyright (C) 1999 David I. Bell * * Calc is open software; you can redistribute it and/or modify it under * the terms of the version 2.1 of the GNU Lesser General Public License * as published by the Free Software Foundation. * * Calc is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General * Public License for more details. * * A copy of version 2.1 of the GNU Lesser General Public License is * distributed with calc under the filename COPYING-LGPL. You should have * received a copy with calc; if not, write to Free Software Foundation, Inc. * 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. * * @(#) $Revision: 29.2 $ * @(#) $Id: zmul.c,v 29.2 2000/06/07 14:02:13 chongo Exp $ * @(#) $Source: /usr/local/src/cmd/calc/RCS/zmul.c,v $ * * Under source code control: 1991/01/09 20:01:31 * File existed as early as: 1991 * * Share and enjoy! :-) http://www.isthe.com/chongo/tech/comp/calc/ *//* * Faster than usual multiplying and squaring routines. * The algorithm used is the reasonably simple one from Knuth, volume 2, * section 4.3.3. These recursive routines are of speed O(N^1.585) * instead of O(N^2). The usual multiplication and (almost usual) squaring * algorithms are used for small numbers. On a 386 with its compiler, the * two algorithms are equal in speed at about 100 decimal digits. */#include "config.h"#include "zmath.h"static HALF *tempbuf; /* temporary buffer for multiply and square */static LEN domul(HALF *v1, LEN size1, HALF *v2, LEN size2, HALF *ans);static LEN dosquare(HALF *vp, LEN size, HALF *ans);/* * Multiply two numbers using the following formula recursively: * (A*S+B)*(C*S+D) = (S^2+S)*A*C + S*(A-B)*(D-C) + (S+1)*B*D * where S is a power of 2^16, and so multiplies by it are shifts, and * A,B,C,D are the left and right halfs of the numbers to be multiplied. * * given: * z1 numbers to multiply * z2 numbers to multiply * res result of multiplication */voidzmul(ZVALUE z1, ZVALUE z2, ZVALUE *res){ LEN len; /* size of array */ if (ziszero(z1) || ziszero(z2)) { *res = _zero_; return; } if (zisunit(z1)) { zcopy(z2, res); res->sign = (z1.sign != z2.sign); return; } if (zisunit(z2)) { zcopy(z1, res); res->sign = (z1.sign != z2.sign); return; } /* * Allocate a temporary buffer for the recursion levels to use. * An array needs to be allocated large enough for all of the * temporary results to fit in. This size is about twice the size * of the largest original number, since each recursion level uses * the size of its given number, and whose size is 1/2 the size of * the previous level. The sum of the infinite series is 2. * Add some extra words because of rounding when dividing by 2 * and also because of the extra word that each multiply needs. */ len = z1.len; if (len < z2.len) len = z2.len; len = 2 * len + 64; tempbuf = zalloctemp(len); res->sign = (z1.sign != z2.sign); res->v = alloc(z1.len + z2.len + 2); res->len = domul(z1.v, z1.len, z2.v, z2.len, res->v);}/* * Recursive routine to multiply two numbers by splitting them up into * two numbers of half the size, and using the results of multiplying the * subpieces. The result is placed in the indicated array, which must be * large enough for the result plus one extra word (size1 + size2 + 1). * Returns the actual size of the result with leading zeroes stripped. * This also uses a temporary array which must be twice as large as * one more than the size of the number at the top level recursive call. * * given: * v1 first number * size1 size of first number * v2 second number * size2 size of second number * ans location for result */static LENdomul(HALF *v1, LEN size1, HALF *v2, LEN size2, HALF *ans){ LEN shift; /* amount numbers are shifted by */ LEN sizeA; /* size of left half of first number */ LEN sizeB; /* size of right half of first number */ LEN sizeC; /* size of left half of second number */ LEN sizeD; /* size of right half of second number */ LEN sizeAB; /* size of subtraction of A and B */ LEN sizeDC; /* size of subtraction of D and C */ LEN sizeABDC; /* size of product of above two results */ LEN subsize; /* size of difference of halfs */ LEN copysize; /* size of number left to copy */ LEN sizetotal; /* total size of product */ LEN len; /* temporary length */ HALF *baseA; /* base of left half of first number */ HALF *baseB; /* base of right half of first number */ HALF *baseC; /* base of left half of second number */ HALF *baseD; /* base of right half of second number */ HALF *baseAB; /* base of result of subtraction of A and B */ HALF *baseDC; /* base of result of subtraction of D and C */ HALF *baseABDC; /* base of product of above two results */ HALF *baseAC; /* base of product of A and C */ HALF *baseBD; /* base of product of B and D */ FULL carry; /* carry digit for small multiplications */ FULL carryACBD; /* carry from addition of A*C and B*D */ FULL digit; /* single digit multiplying by */ HALF *temp; /* base for temporary calculations */ BOOL neg; /* whether imtermediate term is negative */ register HALF *hd, *h1=NULL, *h2=NULL; /* for inner loops */ SIUNION sival; /* for addition of digits */ /* * Trim the numbers of leading zeroes and initialize the * estimated size of the result. */ hd = &v1[size1 - 1]; while ((*hd == 0) && (size1 > 1)) { hd--; size1--; } hd = &v2[size2 - 1]; while ((*hd == 0) && (size2 > 1)) { hd--; size2--; } sizetotal = size1 + size2; /* * First check for zero answer. */ if (((size1 == 1) && (*v1 == 0)) || ((size2 == 1) && (*v2 == 0))) { *ans = 0; return 1; } /* * Exchange the two numbers if necessary to make the number of * digits of the first number be greater than or equal to the * second number. */ if (size1 < size2) { len = size1; size1 = size2; size2 = len; hd = v1; v1 = v2; v2 = hd; } /* * If the smaller number has only a few digits, then calculate * the result in the normal manner in order to avoid the overhead * of the recursion for small numbers. The number of digits where * the algorithm changes is settable from 2 to maxint. */ if (size2 < conf->mul2) { /* * First clear the top part of the result, and then multiply * by the lowest digit to get the first partial sum. Later * products will then add into this result. */ hd = &ans[size1]; len = size2; while (len--) *hd++ = 0; digit = *v2++; h1 = v1; hd = ans; carry = 0; len = size1; while (len >= 4) { /* expand the loop some */ len -= 4; sival.ivalue = ((FULL) *h1++) * digit + carry; /* ignore Saber-C warning #112 - get ushort from uint */ /* ok to ignore on name domul`sival */ *hd++ = sival.silow; carry = sival.sihigh; sival.ivalue = ((FULL) *h1++) * digit + carry; *hd++ = sival.silow; carry = sival.sihigh; sival.ivalue = ((FULL) *h1++) * digit + carry; *hd++ = sival.silow; carry = sival.sihigh; sival.ivalue = ((FULL) *h1++) * digit + carry; *hd++ = sival.silow; carry = sival.sihigh; } while (len--) { sival.ivalue = ((FULL) *h1++) * digit + carry; *hd++ = sival.silow; carry = sival.sihigh; } *hd = (HALF)carry; /* * Now multiply by the remaining digits of the second number, * adding each product into the final result. */ h2 = ans; while (--size2 > 0) { digit = *v2++; h1 = v1; hd = ++h2; if (digit == 0) continue; carry = 0; len = size1; while (len >= 4) { /* expand the loop some */ len -= 4; sival.ivalue = ((FULL) *h1++) * digit + ((FULL) *hd) + carry; *hd++ = sival.silow; carry = sival.sihigh; sival.ivalue = ((FULL) *h1++) * digit + ((FULL) *hd) + carry; *hd++ = sival.silow; carry = sival.sihigh; sival.ivalue = ((FULL) *h1++) * digit + ((FULL) *hd) + carry; *hd++ = sival.silow; carry = sival.sihigh; sival.ivalue = ((FULL) *h1++) * digit + ((FULL) *hd) + carry; *hd++ = sival.silow; carry = sival.sihigh; } while (len--) { sival.ivalue = ((FULL) *h1++) * digit + ((FULL) *hd) + carry; *hd++ = sival.silow; carry = sival.sihigh; } while (carry) { sival.ivalue = ((FULL) *hd) + carry; *hd++ = sival.silow; carry = sival.sihigh; } } /* * Now return the true size of the number. */ len = sizetotal; hd = &ans[len - 1]; while ((*hd == 0) && (len > 1)) { hd--; len--; } return len; } /* * Need to multiply by a large number. * Allocate temporary space for calculations, and calculate the * value for the shift. The shift value is 1/2 the size of the * larger (first) number (rounded up). The amount of temporary * space needed is twice the size of the shift, plus one more word * for the multiply to use. */ shift = (size1 + 1) / 2; temp = tempbuf; tempbuf += (2 * shift) + 1; /* * Determine the sizes and locations of all the numbers. * The value of sizeC can be negative, and this is checked later. * The value of sizeD is limited by the full size of the number. */ baseA = v1 + shift; baseB = v1; baseC = v2 + ((shift <= size2) ? shift : size2); baseD = v2; baseAB = ans; baseDC = ans + shift; baseAC = ans + shift * 2; baseBD = ans; sizeA = size1 - shift; sizeC = size2 - shift; sizeB = shift; hd = &baseB[shift - 1]; while ((*hd == 0) && (sizeB > 1)) { hd--; sizeB--; } sizeD = shift; if (sizeD > size2) sizeD = size2; hd = &baseD[sizeD - 1]; while ((*hd == 0) && (sizeD > 1)) { hd--; sizeD--; } /* * If the smaller number has a high half of zero, then calculate * the result by breaking up the first number into two numbers * and combining the results using the obvious formula: * (A*S+B) * D = (A*D)*S + B*D */ if (sizeC <= 0) { len = domul(baseB, sizeB, baseD, sizeD, ans); hd = &ans[len]; len = sizetotal - len; while (len--) *hd++ = 0; /* * Add the second number into the first number, shifted * over at the correct position. */ len = domul(baseA, sizeA, baseD, sizeD, temp); h1 = temp; hd = ans + shift; carry = 0; while (len--) { sival.ivalue = ((FULL) *h1++) + ((FULL) *hd) + carry; *hd++ = sival.silow; carry = sival.sihigh; } while (carry) { sival.ivalue = ((FULL) *hd) + carry; *hd++ = sival.silow; carry = sival.sihigh; } /* * Determine the final size of the number and return it. */ len = sizetotal; hd = &ans[len - 1]; while ((*hd == 0) && (len > 1)) { hd--; len--; } tempbuf = temp; return len; } /* * Now we know that the high halfs of the numbers are nonzero, * so we can use the complete formula. * (A*S+B)*(C*S+D) = (S^2+S)*A*C + S*(A-B)*(D-C) + (S+1)*B*D. * The steps are done in the following order: * A-B * D-C * (A-B)*(D-C) * S^2*A*C + B*D * (S^2+S)*A*C + (S+1)*B*D (*) * (S^2+S)*A*C + S*(A-B)*(D-C) + (S+1)*B*D * * Note: step (*) above can produce a result which is larger than * the final product will be, and this is where the extra word * needed in the product comes from. After the final subtraction is * done, the result fits in the expected size. Using the extra word * is easier than suppressing the carries and borrows everywhere. * * Begin by forming the product (A-B)*(D-C) into a temporary * location that we save until the final step. Do each subtraction * at positions 0 and S. Be very careful about the relative sizes * of the numbers since this result can be negative. For the first * step calculate the absolute difference of A and B into a temporary * location at position 0 of the result. Negate the sign if A is * smaller than B. */ neg = FALSE; if (sizeA == sizeB) { len = sizeA; h1 = &baseA[len - 1]; h2 = &baseB[len - 1]; while ((len > 1) && (*h1 == *h2)) { len--; h1--; h2--; } } if ((sizeA > sizeB) || ((sizeA == sizeB) && h1 && h2 && (*h1 > *h2))) { h1 = baseA; h2 = baseB; sizeAB = sizeA; subsize = sizeB; } else { neg = !neg; h1 = baseB; h2 = baseA; sizeAB = sizeB; subsize = sizeA; } copysize = sizeAB - subsize; hd = baseAB; carry = 0; while (subsize--) { sival.ivalue = BASE1 - ((FULL) *h1++) + ((FULL) *h2++) + carry; *hd++ = (HALF)(BASE1 - sival.silow); carry = sival.sihigh; } while (copysize--) { sival.ivalue = (BASE1 - ((FULL) *h1++)) + carry; *hd++ = (HALF)(BASE1 - sival.silow); carry = sival.sihigh; } hd = &baseAB[sizeAB - 1]; while ((*hd == 0) && (sizeAB > 1)) { hd--; sizeAB--; } /* * This completes the calculation of abs(A-B). For the next step * calculate the absolute difference of D and C into a temporary * location at position S of the result. Negate the sign if C is * larger than D. */ if (sizeC == sizeD) { len = sizeC; h1 = &baseC[len - 1]; h2 = &baseD[len - 1]; while ((len > 1) && (*h1 == *h2)) { len--; h1--; h2--; } } if ((sizeC > sizeD) || ((sizeC == sizeD) && (*h1 > *h2))) { neg = !neg; h1 = baseC; h2 = baseD; sizeDC = sizeC; subsize = sizeD; } else { h1 = baseD; h2 = baseC; sizeDC = sizeD; subsize = sizeC; } copysize = sizeDC - subsize; hd = baseDC; carry = 0; while (subsize--) { sival.ivalue = BASE1 - ((FULL) *h1++) + ((FULL) *h2++) + carry; *hd++ = (HALF)(BASE1 - sival.silow); carry = sival.sihigh; } while (copysize--) { sival.ivalue = (BASE1 - ((FULL) *h1++)) + carry; *hd++ = (HALF)(BASE1 - sival.silow); carry = sival.sihigh; } hd = &baseDC[sizeDC - 1]; while ((*hd == 0) && (sizeDC > 1)) { hd--; sizeDC--; } /* * This completes the calculation of abs(D-C). Now multiply * together abs(A-B) and abs(D-C) into a temporary location, * which is preserved until the final steps. */ baseABDC = temp; sizeABDC = domul(baseAB, sizeAB, baseDC, sizeDC, baseABDC); /* * Now calculate B*D and A*C into one of their two final locations. * Make sure the high order digits of the products are zeroed since * this initializes the final result. Be careful about this zeroing * since the size of the high order words might be smaller than * the shift size. Do B*D first since the multiplies use one more * word than the size of the product. Also zero the final extra * word in the result for possible carries to use. */ len = domul(baseB, sizeB, baseD, sizeD, baseBD); hd = &baseBD[len]; len = shift * 2 - len; while (len--) *hd++ = 0; len = domul(baseA, sizeA, baseC, sizeC, baseAC); hd = &baseAC[len]; len = sizetotal - shift * 2 - len + 1; while (len--) *hd++ = 0; /* * Now add in A*C and B*D into themselves at the other shifted * position that they need. This addition is tricky in order to * make sure that the two additions cannot interfere with each other. * Therefore we first add in the top half of B*D and the lower half * of A*C. The sources and destinations of these two additions * overlap, and so the same answer results from the two additions, * thus only two pointers suffice for both additions. Keep the * final carry from these additions for later use since we cannot * afford to change the top half of A*C yet. */ h1 = baseBD + shift; h2 = baseAC; carryACBD = 0; len = shift; while (len--) { sival.ivalue = ((FULL) *h1) + ((FULL) *h2) + carryACBD;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -