📄 zmul.c

📁 早期freebsd实现
💻 C
📖 第 1 页 / 共 2 页
字号:
12 下一页
/* * Copyright (c) 1994 David I. Bell * Permission is granted to use, distribute, or modify this source, * provided that this copyright notice remains intact. * * Faster than usual multiplying and squaring routines. * The algorithm used is the reasonably simple one from Knuth, volume 2, * section 4.3.3.  These recursive routines are of speed O(N^1.585) * instead of O(N^2).  The usual multiplication and (almost usual) squaring * algorithms are used for small numbers.  On a 386 with its compiler, the * two algorithms are equal in speed at about 100 decimal digits. */#include "zmath.h"LEN _mul2_ = MUL_ALG2;		/* size of number to use multiply algorithm 2 */LEN _sq2_ = SQ_ALG2;		/* size of number to use square algorithm 2 */static HALF *tempbuf;		/* temporary buffer for multiply and square */static LEN domul MATH_PROTO((HALF *v1, LEN size1, HALF *v2, LEN size2, HALF *ans));static LEN dosquare MATH_PROTO((HALF *vp, LEN size, HALF *ans));/* * Multiply two numbers using the following formula recursively: *	(A*S+B)*(C*S+D) = (S^2+S)*A*C + S*(A-B)*(D-C) + (S+1)*B*D * where S is a power of 2^16, and so multiplies by it are shifts, and * A,B,C,D are the left and right halfs of the numbers to be multiplied. */voidzmul(z1, z2, res)	ZVALUE z1, z2;		/* numbers to multiply */	ZVALUE *res;		/* result of multiplication */{	LEN len;		/* size of array */	if (ziszero(z1) || ziszero(z2)) {		*res = _zero_;		return;	}	if (zisunit(z1)) {		zcopy(z2, res);		res->sign = (z1.sign != z2.sign);		return;	}	if (zisunit(z2)) {		zcopy(z1, res);		res->sign = (z1.sign != z2.sign);		return;	}	/*	 * Allocate a temporary buffer for the recursion levels to use.	 * An array needs to be allocated large enough for all of the	 * temporary results to fit in.  This size is about twice the size	 * of the largest original number, since each recursion level uses	 * the size of its given number, and whose size is 1/2 the size of	 * the previous level.  The sum of the infinite series is 2.	 * Add some extra words because of rounding when dividing by 2	 * and also because of the extra word that each multiply needs.	 */	len = z1.len;	if (len < z2.len)		len = z2.len;	len = 2 * len + 64;	tempbuf = zalloctemp(len);	res->sign = (z1.sign != z2.sign);	res->v = alloc(z1.len + z2.len + 1);	res->len = domul(z1.v, z1.len, z2.v, z2.len, res->v);}/* * Recursive routine to multiply two numbers by splitting them up into * two numbers of half the size, and using the results of multiplying the * subpieces.  The result is placed in the indicated array, which must be * large enough for the result plus one extra word (size1 + size2 + 1). * Returns the actual size of the result with leading zeroes stripped. * This also uses a temporary array which must be twice as large as * one more than the size of the number at the top level recursive call. */static LENdomul(v1, size1, v2, size2, ans)	HALF *v1;		/* first number */	LEN size1;		/* size of first number */	HALF *v2;		/* second number */	LEN size2;		/* size of second number */	HALF *ans;		/* location for result */{	LEN shift;		/* amount numbers are shifted by */	LEN sizeA;		/* size of left half of first number */	LEN sizeB;		/* size of right half of first number */	LEN sizeC;		/* size of left half of second number */	LEN sizeD;		/* size of right half of second number */	LEN sizeAB;		/* size of subtraction of A and B */	LEN sizeDC;		/* size of subtraction of D and C */	LEN sizeABDC;		/* size of product of above two results */	LEN subsize;		/* size of difference of halfs */	LEN copysize;		/* size of number left to copy */	LEN sizetotal;		/* total size of product */	LEN len;		/* temporary length */	HALF *baseA;		/* base of left half of first number */	HALF *baseB;		/* base of right half of first number */	HALF *baseC;		/* base of left half of second number */	HALF *baseD;		/* base of right half of second number */	HALF *baseAB;		/* base of result of subtraction of A and B */	HALF *baseDC;		/* base of result of subtraction of D and C */	HALF *baseABDC;		/* base of product of above two results */	HALF *baseAC;		/* base of product of A and C */	HALF *baseBD;		/* base of product of B and D */	FULL carry;		/* carry digit for small multiplications */	FULL carryACBD;		/* carry from addition of A*C and B*D */	FULL digit;		/* single digit multiplying by */	HALF *temp;		/* base for temporary calculations */	BOOL neg;		/* whether imtermediate term is negative */	register HALF *hd, *h1=NULL, *h2=NULL;	/* for inner loops */	SIUNION sival;		/* for addition of digits */	/*	 * Trim the numbers of leading zeroes and initialize the	 * estimated size of the result.	 */	hd = &v1[size1 - 1];	while ((*hd == 0) && (size1 > 1)) {		hd--;		size1--;	}	hd = &v2[size2 - 1];	while ((*hd == 0) && (size2 > 1)) {		hd--;		size2--;	}	sizetotal = size1 + size2;	/*	 * First check for zero answer.	 */	if (((size1 == 1) && (*v1 == 0)) || ((size2 == 1) && (*v2 == 0))) {		*ans = 0;		return 1;	}	/*	 * Exchange the two numbers if necessary to make the number of	 * digits of the first number be greater than or equal to the	 * second number.	 */	if (size1 < size2) {		len = size1; size1 = size2; size2 = len;		hd = v1; v1 = v2; v2 = hd;	}	/*	 * If the smaller number has only a few digits, then calculate	 * the result in the normal manner in order to avoid the overhead	 * of the recursion for small numbers.  The number of digits where	 * the algorithm changes is settable from 2 to maxint.	 */	if (size2 < _mul2_) {		/*		 * First clear the top part of the result, and then multiply		 * by the lowest digit to get the first partial sum.  Later		 * products will then add into this result.		 */		hd = &ans[size1];		len = size2;		while (len--)			*hd++ = 0;		digit = *v2++;		h1 = v1;		hd = ans;		carry = 0;		len = size1;		while (len >= 4) {	/* expand the loop some */			len -= 4;			sival.ivalue = ((FULL) *h1++) * digit + carry;			*hd++ = sival.silow;			carry = sival.sihigh;			sival.ivalue = ((FULL) *h1++) * digit + carry;			*hd++ = sival.silow;			carry = sival.sihigh;			sival.ivalue = ((FULL) *h1++) * digit + carry;			*hd++ = sival.silow;			carry = sival.sihigh;			sival.ivalue = ((FULL) *h1++) * digit + carry;			*hd++ = sival.silow;			carry = sival.sihigh;		}		while (len--) {			sival.ivalue = ((FULL) *h1++) * digit + carry;			*hd++ = sival.silow;			carry = sival.sihigh;		}		*hd = (HALF)carry;		/*		 * Now multiply by the remaining digits of the second number,		 * adding each product into the final result.		 */		h2 = ans;		while (--size2 > 0) {			digit = *v2++;			h1 = v1;			hd = ++h2;			if (digit == 0)				continue;			carry = 0;			len = size1;			while (len >= 4) {	/* expand the loop some */				len -= 4;				sival.ivalue = ((FULL) *h1++) * digit					+ ((FULL) *hd) + carry;				*hd++ = sival.silow;				carry = sival.sihigh;				sival.ivalue = ((FULL) *h1++) * digit					+ ((FULL) *hd) + carry;				*hd++ = sival.silow;				carry = sival.sihigh;				sival.ivalue = ((FULL) *h1++) * digit					+ ((FULL) *hd) + carry;				*hd++ = sival.silow;				carry = sival.sihigh;				sival.ivalue = ((FULL) *h1++) * digit					+ ((FULL) *hd) + carry;				*hd++ = sival.silow;				carry = sival.sihigh;			}			while (len--) {				sival.ivalue = ((FULL) *h1++) * digit					+ ((FULL) *hd) + carry;				*hd++ = sival.silow;				carry = sival.sihigh;			}			while (carry) {				sival.ivalue = ((FULL) *hd) + carry;				*hd++ = sival.silow;				carry = sival.sihigh;			}		}		/*		 * Now return the true size of the number.		 */		len = sizetotal;		hd = &ans[len - 1];		while ((*hd == 0) && (len > 1)) {			hd--;			len--;		}		return len;	}	/*	 * Need to multiply by a large number.	 * Allocate temporary space for calculations, and calculate the	 * value for the shift.  The shift value is 1/2 the size of the	 * larger (first) number (rounded up).  The amount of temporary	 * space needed is twice the size of the shift, plus one more word	 * for the multiply to use.	 */	shift = (size1 + 1) / 2;	temp = tempbuf;	tempbuf += (2 * shift) + 1;	/*	 * Determine the sizes and locations of all the numbers.	 * The value of sizeC can be negative, and this is checked later.	 * The value of sizeD is limited by the full size of the number.	 */	baseA = v1 + shift;	baseB = v1;	/* 	 * XXX - Saber-C Version 3.1 says:	 *	 *    W#26, Storing a bad pointer into auto variable dmul`baseC.	 *	 * This warning is issued during the regression test #026	 * (read cryrand).	 *	 * Saver-C claims that v2+shift is past the end of allocated	 * memory for v2.  When this warning is first issued, shift	 * has the value 51.	 *	 * The call stack is:	 *	 *   domul(0x165ca0, 101, 0x160998, 40, 0x16d0a8) at "zmul.c":315 	 *   zmul(0x2ddf88, 0x2ddf8c, 0x2ddc98) at "zmul.c":73 	 *   qsqrt(0x38defc, 0x38ea94) at "qfunc.c":248 	 *   qln(0x38defc, 0x38de70) at "qtrans.c":589 	 *   qpower(0x38eacc, 0x38a398, 0x38a018) at "qtrans.c":657 	 *   powervalue(0x1740f8,0x174118,0x195234,0x19523c) at "value.c":1009 	 *   f_power(2, 0x533278) at "func.c":1188 	 *   builtinfunc(117, 2, 0x174118) at "func.c":354 	 *   o_call(0x5328d8, 117, 2) at "opcodes.c":2094 	 *   calculate(0x5328d8, 1) at "opcodes.c":288 	 *   o_usercall(0x5328d8, 54, 1) at "opcodes.c":2082 	 *   calculate(0x48ffa8, 4) at "opcodes.c":288 	 *   o_usercall(0x48ffa8, 53, 1) at "opcodes.c":2082 	 *   calculate(0x1652a0, 1) at "opcodes.c":288 	 *   o_usercall(0x1652a0, 57, 1) at "opcodes.c":2082 	 *   calculate(0x16cca8, 0) at "opcodes.c":288 	 *   evaluate(0) at "codegen.c":167 	 *   getcommands(0) at "codegen.c":106 	 *   getcommands(0) at "codegen.c":76 	 *   getcommands(1) at "codegen.c":76 	 *   main(-1, 0x181f8c) at "calc.c":155 	 *	 * Purify does not complain about this code.  Who is right?	 */	baseC = v2 + shift;	baseD = v2;	baseAB = ans;	baseDC = ans + shift;	baseAC = ans + shift * 2;	baseBD = ans;	sizeA = size1 - shift;	sizeC = size2 - shift;	sizeB = shift;	hd = &baseB[shift - 1];	while ((*hd == 0) && (sizeB > 1)) {		hd--;		sizeB--;	}	sizeD = shift;	if (sizeD > size2)		sizeD = size2;	hd = &baseD[sizeD - 1];	while ((*hd == 0) && (sizeD > 1)) {		hd--;		sizeD--;	}	/*	 * If the smaller number has a high half of zero, then calculate	 * the result by breaking up the first number into two numbers	 * and combining the results using the obvious formula:	 *	(A*S+B) * D = (A*D)*S + B*D	 */	if (sizeC <= 0) {		len = domul(baseB, sizeB, baseD, sizeD, ans);		hd = &ans[len];		len = sizetotal - len;		while (len--)			*hd++ = 0;		/*		 * Add the second number into the first number, shifted		 * over at the correct position.		 */		len = domul(baseA, sizeA, baseD, sizeD, temp);		h1 = temp;		hd = ans + shift;		carry = 0;		while (len--) {			sival.ivalue = ((FULL) *h1++) + ((FULL) *hd) + carry;			*hd++ = sival.silow;			carry = sival.sihigh;		}		while (carry) {			sival.ivalue = ((FULL) *hd) + carry;			*hd++ = sival.silow;			carry = sival.sihigh;		}		/*		 * Determine the final size of the number and return it.		 */		len = sizetotal;		hd = &ans[len - 1];		while ((*hd == 0) && (len > 1)) {			hd--;			len--;		}		tempbuf = temp;		return len;	}	/*	 * Now we know that the high halfs of the numbers are nonzero,	 * so we can use the complete formula.	 *	(A*S+B)*(C*S+D) = (S^2+S)*A*C + S*(A-B)*(D-C) + (S+1)*B*D.	 * The steps are done in the following order:	 *	A-B	 *	D-C	 *	(A-B)*(D-C)	 *	S^2*A*C + B*D	 *	(S^2+S)*A*C + (S+1)*B*D				(*)	 *	(S^2+S)*A*C + S*(A-B)*(D-C) + (S+1)*B*D	 *	 * Note: step (*) above can produce a result which is larger than	 * the final product will be, and this is where the extra word	 * needed in the product comes from.  After the final subtraction is	 * done, the result fits in the expected size.  Using the extra word	 * is easier than suppressing the carries and borrows everywhere.	 *	 * Begin by forming the product (A-B)*(D-C) into a temporary	 * location that we save until the final step.  Do each subtraction	 * at positions 0 and S.  Be very careful about the relative sizes	 * of the numbers since this result can be negative.  For the first	 * step calculate the absolute difference of A and B into a temporary	 * location at position 0 of the result.  Negate the sign if A is	 * smaller than B.	 */	neg = FALSE;	if (sizeA == sizeB) {		len = sizeA;		h1 = &baseA[len - 1];		h2 = &baseB[len - 1];		while ((len > 1) && (*h1 == *h2)) {			len--;			h1--;			h2--;		}	}	if ((sizeA > sizeB) || ((sizeA == sizeB) && h1 && h2 && (*h1 > *h2))) {		h1 = baseA;		h2 = baseB;		sizeAB = sizeA;		subsize = sizeB;	} else {		neg = !neg;		h1 = baseB;		h2 = baseA;		sizeAB = sizeB;		subsize = sizeA;	}	copysize = sizeAB - subsize;	hd = baseAB;	carry = 0;	while (subsize--) {		sival.ivalue = BASE1 - ((FULL) *h1++) + ((FULL) *h2++) + carry;		*hd++ = BASE1 - sival.silow;		carry = sival.sihigh;	}	while (copysize--) {		sival.ivalue = (BASE1 - ((FULL) *h1++)) + carry;		*hd++ = BASE1 - sival.silow;		carry = sival.sihigh;	}	hd = &baseAB[sizeAB - 1];	while ((*hd == 0) && (sizeAB > 1)) {		hd--;		sizeAB--;	}	/*	 * This completes the calculation of abs(A-B).  For the next step	 * calculate the absolute difference of D and C into a temporary	 * location at position S of the result.  Negate the sign if C is	 * larger than D.	 */	if (sizeC == sizeD) {		len = sizeC;		h1 = &baseC[len - 1];		h2 = &baseD[len - 1];		while ((len > 1) && (*h1 == *h2)) {			len--;			h1--;			h2--;		}	}	if ((sizeC > sizeD) || ((sizeC == sizeD) && (*h1 > *h2)))	{		neg = !neg;		h1 = baseC;		h2 = baseD;		sizeDC = sizeC;		subsize = sizeD;	} else {		h1 = baseD;		h2 = baseC;		sizeDC = sizeD;		subsize = sizeC;	}	copysize = sizeDC - subsize;	hd = baseDC;	carry = 0;	while (subsize--) {		sival.ivalue = BASE1 - ((FULL) *h1++) + ((FULL) *h2++) + carry;		*hd++ = BASE1 - sival.silow;		carry = sival.sihigh;	}	while (copysize--) {		sival.ivalue = (BASE1 - ((FULL) *h1++)) + carry;		*hd++ = BASE1 - sival.silow;		carry = sival.sihigh;	}	hd = &baseDC[sizeDC - 1];	while ((*hd == 0) && (sizeDC > 1)) {		hd--;		sizeDC--;	}	/*	 * This completes the calculation of abs(D-C).  Now multiply	 * together abs(A-B) and abs(D-C) into a temporary location,	 * which is preserved until the final steps.	 */	baseABDC = temp;	sizeABDC = domul(baseAB, sizeAB, baseDC, sizeDC, baseABDC);	/*	 * Now calculate B*D and A*C into one of their two final locations.	 * Make sure the high order digits of the products are zeroed since	 * this initializes the final result.  Be careful about this zeroing	 * since the size of the high order words might be smaller than	 * the shift size.  Do B*D first since the multiplies use one more	 * word than the size of the product.  Also zero the final extra	 * word in the result for possible carries to use.	 */	len = domul(baseB, sizeB, baseD, sizeD, baseBD);	hd = &baseBD[len];	len = shift * 2 - len;	while (len--)		*hd++ = 0;	len = domul(baseA, sizeA, baseC, sizeC, baseAC);	hd = &baseAC[len];	len = sizetotal - shift * 2 - len + 1;	while (len--)		*hd++ = 0;	/*	 * Now add in A*C and B*D into themselves at the other shifted	 * position that they need.  This addition is tricky in order to	 * make sure that the two additions cannot interfere with each other.	 * Therefore we first add in the top half of B*D and the lower half	 * of A*C.  The sources and destinations of these two additions	 * overlap, and so the same answer results from the two additions,	 * thus only two pointers suffice for both additions.  Keep the	 * final carry from these additions for later use since we cannot	 * afford to change the top half of A*C yet.	 */	h1 = baseBD + shift;	h2 = baseAC;	carryACBD = 0;	len = shift;	while (len--) {		sival.ivalue = ((FULL) *h1) + ((FULL) *h2) + carryACBD;		*h1++ = sival.silow;		*h2++ = sival.silow;		carryACBD = sival.sihigh;
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -