📄 mapmfmul.c
字号:
/* * M_APM - mapmfmul.c * * Copyright (C) 1999 Michael C. Ring * * Permission to use, copy, and distribute this software and its * documentation for any purpose with or without fee is hereby granted, * provided that the above copyright notice appear in all copies and * that both that copyright notice and this permission notice appear * in supporting documentation. * * Permission to modify the software is granted, but not the right to * distribute the modified code. Modifications are to be distributed * as patches to released version. * * This software is provided "as is" without express or implied warranty. *//* * $Id: mapmfmul.c,v 1.8 1999/09/19 21:13:44 mike Exp $ * * This file contains the FAST MULTIPLICATION function as well * as its support functions. * * $Log: mapmfmul.c,v $ * Revision 1.8 1999/09/19 21:13:44 mike * eliminate unneeded local int in _split * * Revision 1.7 1999/08/12 22:36:23 mike * move the 3 'simple' function to the top of file * so GCC can in-line the code. * * Revision 1.6 1999/08/12 22:01:14 mike * more minor optimizations * * Revision 1.5 1999/08/12 02:02:06 mike * minor optimization * * Revision 1.4 1999/08/10 22:51:59 mike * minor tweak * * Revision 1.3 1999/08/10 00:45:47 mike * added more comments and a few minor tweaks * * Revision 1.2 1999/08/09 02:50:02 mike * add some comments * * Revision 1.1 1999/08/08 18:27:57 mike * Initial revision */#include "m_apm_lc.h"static int firsttimef = TRUE;/* * the following stack sizes are sized to meet the * worst case expected assuming we are multiplying * numbers with 2.14E+9 (2 ^ 31) digits. * * For sizeof(int) == 4 (32 bits) there may be up to 32 recursive * calls. Each call requires 7 stack variables so we need a * stack depth of 32 * 7 + PAD. (we use 240) * * For 'exp_stack', 3 integers also are required to be saved * for each recursive call so we need a stack depth of * 32 * 3 + PAD. (we use 100) */#define M_STACK_SIZE 240static UCHAR *mul_stack_data[M_STACK_SIZE];static int mul_stack_data_size[M_STACK_SIZE];static int M_mul_stack_ptr;static int exp_stack[100];static int exp_stack_ptr;static UCHAR *fmul_a1, *fmul_a0, *fmul_a9, *fmul_b1, *fmul_b0, *fmul_b9, *fmul_t0;static int stmp, itmp, mii;static UCHAR numdiv, numrem;extern void M_fast_multiply(M_APM, M_APM, M_APM);extern void M_fmul_2(UCHAR *, UCHAR *, UCHAR *, int);extern void M_fmul_3(UCHAR *, UCHAR *, UCHAR *, int);extern void M_fmul_add(UCHAR *, UCHAR *, int, int);extern int M_fmul_subtract(UCHAR *, UCHAR *, UCHAR *, int);extern int M_next_power_of_2(int);extern int M_get_stack_ptr(int);extern void M_mapm_split(UCHAR *, UCHAR *, UCHAR *, int);extern void M_push_mul_int(int);extern int M_pop_mul_int(void);/* * the following algorithm is used in the fast multiply routine * (sometimes called the divide-and-conquer technique.) * * assume we have 2 numbers (a & b) with 2N digits. * * let : a = (2^N) * A1 + A0 , b = (2^N) * B1 + B0 * * where 'A1' is the 'most significant half' of 'a' and * 'A0' is the 'least significant half' of 'a'. Same for * B1 and B0. * * Now use the identity : * * 2N N N N * ab = (2 + 2 ) A1B1 + 2 (A1-A0)(B0-B1) + (2 + 1)A0B0 * * * The original problem of multiplying 2 (2N) digit numbers has * been reduced to 3 multiplications of N digit numbers plus some * additions, subtractions, and shifts. * * The fast multiplication algorithm used here uses the above * identity in a recursive process. Knuth in "The Art of Computer * Programming" shows that this algorithm has a significantly * faster run time when N is large. * * If N = 10,000, the traditional multiplication algorithm will * require N^2 multiplications = 100,000,000. * * The new algorithm is proportional to N ^ (log(3) / log(2)) * or approx N ^ 1.585. * * So if N = 10,000, the new algorithm will only require * 2,187,006 multiplications. *//****************************************************************************/void M_push_mul_int(val)int val;{exp_stack[++exp_stack_ptr] = val;}/****************************************************************************/int M_pop_mul_int(){return(exp_stack[exp_stack_ptr--]);}/****************************************************************************/void M_mapm_split(x1, x0, xin, nbytes)UCHAR *x1, *x0, *xin;int nbytes;{memcpy(x1, xin, nbytes);memcpy(x0, (xin + nbytes), nbytes);}/****************************************************************************/void M_fast_multiply(rr,aa,bb)M_APM rr, aa, bb;{M_APM ain, bin;void *vp;int ii, k, nexp, sign;if (firsttimef) { firsttimef = FALSE; for (k=0; k < M_STACK_SIZE; k++) mul_stack_data_size[k] = 0; }exp_stack_ptr = -1;M_mul_stack_ptr = -1; ain = M_get_stack_var();bin = M_get_stack_var();m_apm_copy(ain, aa);m_apm_copy(bin, bb);sign = ain->m_apm_sign * bin->m_apm_sign;nexp = ain->m_apm_exponent + bin->m_apm_exponent;if (ain->m_apm_datalength >= bin->m_apm_datalength) ii = ain->m_apm_datalength;else ii = bin->m_apm_datalength;ii = (ii + 1) >> 1;ii = M_next_power_of_2(ii);k = 2 * ii; /* required size of result, in bytes */M_apm_pad(ain, k); /* fill out the data so the number of */M_apm_pad(bin, k); /* bytes is an exact power of 2 */if (k >= rr->m_apm_malloclength) { if ((vp = realloc(rr->m_apm_data,(k + 256))) == NULL) { fprintf(stderr,"\'M_fast_multiply\', Out of memory\n"); exit(16); } rr->m_apm_malloclength = k + 252; rr->m_apm_data = (UCHAR *)vp; }M_fmul_2(rr->m_apm_data, ain->m_apm_data, bin->m_apm_data, ii);rr->m_apm_sign = sign;rr->m_apm_exponent = nexp;rr->m_apm_datalength = 4 * ii;M_apm_normalize(rr);M_restore_stack(2);}/****************************************************************************/void M_fmul_2(rr, aa, bb, sz)UCHAR *rr, *aa, *bb;int sz;{if (sz == 1) { itmp = (int)(*aa) * (int)(*bb); M_get_div_rem(itmp,&numdiv,&numrem); rr[0] = numdiv; rr[1] = numrem; return; }memset(rr, 0, (2 * sz)); /* zero out the result */mii = sz >> 1;itmp = M_get_stack_ptr(mii);M_push_mul_int(itmp);fmul_a1 = mul_stack_data[itmp];itmp = M_get_stack_ptr(mii);fmul_a0 = mul_stack_data[itmp];itmp = M_get_stack_ptr(2 * sz);fmul_a9 = mul_stack_data[itmp];itmp = M_get_stack_ptr(mii);fmul_b1 = mul_stack_data[itmp];itmp = M_get_stack_ptr(mii);fmul_b0 = mul_stack_data[itmp];itmp = M_get_stack_ptr(2 * sz);fmul_b9 = mul_stack_data[itmp];itmp = M_get_stack_ptr(2 * sz);fmul_t0 = mul_stack_data[itmp];M_mapm_split(fmul_a1, fmul_a0, aa, mii);M_mapm_split(fmul_b1, fmul_b0, bb, mii);stmp = M_fmul_subtract(fmul_a9, fmul_a1, fmul_a0, mii);stmp *= M_fmul_subtract(fmul_b9, fmul_b0, fmul_b1, mii);M_push_mul_int(stmp);M_push_mul_int(mii);M_fmul_3(fmul_t0, fmul_a0, fmul_b0, mii);mii = M_pop_mul_int();stmp = M_pop_mul_int();itmp = M_pop_mul_int();M_push_mul_int(itmp);M_push_mul_int(stmp);M_push_mul_int(mii);/* to restore all stack variables ...fmul_a1 = mul_stack_data[itmp];fmul_a0 = mul_stack_data[itmp+1];fmul_a9 = mul_stack_data[itmp+2];fmul_b1 = mul_stack_data[itmp+3];fmul_b0 = mul_stack_data[itmp+4];fmul_b9 = mul_stack_data[itmp+5];fmul_t0 = mul_stack_data[itmp+6];*/fmul_a1 = mul_stack_data[itmp];fmul_b1 = mul_stack_data[itmp+3];fmul_t0 = mul_stack_data[itmp+6];memcpy((rr + sz), fmul_t0, sz); /* first 'add', result is now zero */ /* so we just copy in the bytes */M_fmul_add(rr, fmul_t0, mii, sz);M_fmul_3(fmul_t0, fmul_a1, fmul_b1, mii);mii = M_pop_mul_int();stmp = M_pop_mul_int();itmp = M_pop_mul_int();M_push_mul_int(itmp);M_push_mul_int(stmp);M_push_mul_int(mii);fmul_a9 = mul_stack_data[itmp+2];fmul_b9 = mul_stack_data[itmp+5];fmul_t0 = mul_stack_data[itmp+6];M_fmul_add(rr, fmul_t0, 0, sz);M_fmul_add(rr, fmul_t0, mii, sz);if (stmp != 0) M_fmul_3(fmul_t0, fmul_a9, fmul_b9, mii);mii = M_pop_mul_int();stmp = M_pop_mul_int();itmp = M_pop_mul_int();fmul_t0 = mul_stack_data[itmp+6];/* * if the sign of (A1 - A0)(B0 - B1) is positive, ADD to * the result. if it is negative, SUBTRACT from the result. */if (stmp < 0) { fmul_a9 = mul_stack_data[itmp+2]; fmul_b9 = mul_stack_data[itmp+5]; memset(fmul_b9, 0, (2 * sz)); memcpy((fmul_b9 + mii), fmul_t0, sz); M_fmul_subtract(fmul_a9, rr, fmul_b9, (2 * sz)); memcpy(rr, fmul_a9, (2 * sz)); }if (stmp > 0) M_fmul_add(rr, fmul_t0, mii, sz);M_mul_stack_ptr -= 7;}/****************************************************************************/void M_fmul_3(rr, aa, bb, sz)UCHAR *rr, *aa, *bb;int sz;{if (sz == 1) { itmp = (int)(*aa) * (int)(*bb); M_get_div_rem(itmp,&numdiv,&numrem); rr[0] = numdiv; rr[1] = numrem; return; }memset(rr, 0, (2 * sz)); /* zero out the result */mii = sz >> 1;itmp = M_get_stack_ptr(mii);M_push_mul_int(itmp);fmul_a1 = mul_stack_data[itmp];itmp = M_get_stack_ptr(mii);fmul_a0 = mul_stack_data[itmp];itmp = M_get_stack_ptr(2 * sz);fmul_a9 = mul_stack_data[itmp];itmp = M_get_stack_ptr(mii);fmul_b1 = mul_stack_data[itmp];itmp = M_get_stack_ptr(mii);fmul_b0 = mul_stack_data[itmp];itmp = M_get_stack_ptr(2 * sz);fmul_b9 = mul_stack_data[itmp];itmp = M_get_stack_ptr(2 * sz);fmul_t0 = mul_stack_data[itmp];M_mapm_split(fmul_a1, fmul_a0, aa, mii);M_mapm_split(fmul_b1, fmul_b0, bb, mii);stmp = M_fmul_subtract(fmul_a9, fmul_a1, fmul_a0, mii);stmp *= M_fmul_subtract(fmul_b9, fmul_b0, fmul_b1, mii);M_push_mul_int(stmp);M_push_mul_int(mii);M_fmul_2(fmul_t0, fmul_a0, fmul_b0, mii);mii = M_pop_mul_int();stmp = M_pop_mul_int();itmp = M_pop_mul_int();M_push_mul_int(itmp);M_push_mul_int(stmp);M_push_mul_int(mii);/* to restore all stack variables ...fmul_a1 = mul_stack_data[itmp];fmul_a0 = mul_stack_data[itmp+1];fmul_a9 = mul_stack_data[itmp+2];fmul_b1 = mul_stack_data[itmp+3];fmul_b0 = mul_stack_data[itmp+4];fmul_b9 = mul_stack_data[itmp+5];fmul_t0 = mul_stack_data[itmp+6];*/fmul_a1 = mul_stack_data[itmp];fmul_b1 = mul_stack_data[itmp+3];fmul_t0 = mul_stack_data[itmp+6];memcpy((rr + sz), fmul_t0, sz); /* first 'add', result is now zero */ /* so we just copy in the bytes */M_fmul_add(rr, fmul_t0, mii, sz);M_fmul_2(fmul_t0, fmul_a1, fmul_b1, mii);mii = M_pop_mul_int();stmp = M_pop_mul_int();itmp = M_pop_mul_int();M_push_mul_int(itmp);M_push_mul_int(stmp);M_push_mul_int(mii);fmul_a9 = mul_stack_data[itmp+2];fmul_b9 = mul_stack_data[itmp+5];fmul_t0 = mul_stack_data[itmp+6];M_fmul_add(rr, fmul_t0, 0, sz);M_fmul_add(rr, fmul_t0, mii, sz);if (stmp != 0) M_fmul_2(fmul_t0, fmul_a9, fmul_b9, mii);mii = M_pop_mul_int();stmp = M_pop_mul_int();itmp = M_pop_mul_int();fmul_t0 = mul_stack_data[itmp+6];/* * if the sign of (A1 - A0)(B0 - B1) is positive, ADD to * the result. if it is negative, SUBTRACT from the result. */if (stmp < 0) { fmul_a9 = mul_stack_data[itmp+2]; fmul_b9 = mul_stack_data[itmp+5]; memset(fmul_b9, 0, (2 * sz)); memcpy((fmul_b9 + mii), fmul_t0, sz); M_fmul_subtract(fmul_a9, rr, fmul_b9, (2 * sz)); memcpy(rr, fmul_a9, (2 * sz)); }if (stmp > 0) M_fmul_add(rr, fmul_t0, mii, sz);M_mul_stack_ptr -= 7;}/****************************************************************************//* * special addition function for use with the fast multiply operation */void M_fmul_add(r, a, offset, sz)UCHAR *r, *a;int offset, sz;{int i, j;UCHAR carry;carry = 0;j = offset + sz;i = sz;while (TRUE) { r[--j] += carry + a[--i]; if (r[j] >= 100) { r[j] -= 100; carry = 1; } else carry = 0; if (i == 0) break; }if (carry) { while (TRUE) { r[--j] += 1; if (r[j] < 100) break; r[j] -= 100; } }}/****************************************************************************//* * special subtraction function for use with the fast multiply operation */int M_fmul_subtract(r, a, b, sz)UCHAR *r, *a, *b;int sz;{int k, jtmp, sflag, nb, borrow;nb = sz;sflag = 0; /* sign flag: assume the numbers are equal *//* * find if a > b (so we perform a-b) * or a < b (so we perform b-a) */for (k=0; k < nb; k++) { if (a[k] < b[k]) { sflag = -1; break; } if (a[k] > b[k]) { sflag = 1; break; } }if (sflag == 0) { memset(r, 0, nb); /* zero out the result */ }else { k = nb; borrow = 0; while (TRUE) { k--; if (sflag == 1) jtmp = (int)a[k] - (int)b[k] - borrow; else jtmp = (int)b[k] - (int)a[k] - borrow; if (jtmp >= 0) { r[k] = (UCHAR)jtmp; borrow = 0; } else { r[k] = (UCHAR)(100 + jtmp); borrow = 1; } if (k == 0) break; } }return(sflag);}/****************************************************************************/int M_next_power_of_2(n)int n;{int ct, k;if (n <= 2) return(n);k = 2;ct = 0;while (TRUE) { if (k >= n) break; k = k << 1; if (++ct == 33) { fprintf(stderr, "\'M_next_power_of_2\', ERROR :sizeof(int) too small ??\n"); exit(4); } }return(k);}/****************************************************************************/int M_get_stack_ptr(sz)int sz;{int i, k;UCHAR *cp;k = ++M_mul_stack_ptr;/* if size is 0, just need to malloc and return */if (mul_stack_data_size[k] == 0) { if ((i = sz) < 16) i = 16; if ((cp = (UCHAR *)malloc(i + 4)) == NULL) { fprintf(stderr,"\'M_get_stack_ptr\', Out of memory\n"); exit(16); } mul_stack_data[k] = cp; mul_stack_data_size[k] = i; }else /* it has been malloc'ed, see if it's big enough */ { if (sz > mul_stack_data_size[k]) { cp = mul_stack_data[k]; if ((cp = (UCHAR *)realloc(cp, (sz + 4))) == NULL) { fprintf(stderr,"\'M_get_stack_ptr\', Out of memory\n"); exit(16); } mul_stack_data[k] = cp; mul_stack_data_size[k] = sz; } }return(k);}/****************************************************************************/
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -