⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 mapmfmul.c

📁 任意精度的数学库
💻 C
字号:
/*  *  M_APM  -  mapmfmul.c * *  Copyright (C) 1999   Michael C. Ring * *  Permission to use, copy, and distribute this software and its *  documentation for any purpose with or without fee is hereby granted,  *  provided that the above copyright notice appear in all copies and  *  that both that copyright notice and this permission notice appear  *  in supporting documentation. * *  Permission to modify the software is granted, but not the right to *  distribute the modified code.  Modifications are to be distributed  *  as patches to released version. *   *  This software is provided "as is" without express or implied warranty. *//* *      $Id: mapmfmul.c,v 1.8 1999/09/19 21:13:44 mike Exp $ * *      This file contains the FAST MULTIPLICATION function as well  *	as its support functions. * *      $Log: mapmfmul.c,v $ *      Revision 1.8  1999/09/19 21:13:44  mike *      eliminate unneeded local int in _split * *      Revision 1.7  1999/08/12 22:36:23  mike *      move the 3 'simple' function to the top of file *      so GCC can in-line the code. * *      Revision 1.6  1999/08/12 22:01:14  mike *      more minor optimizations * *      Revision 1.5  1999/08/12 02:02:06  mike *      minor optimization * *      Revision 1.4  1999/08/10 22:51:59  mike *      minor tweak * *      Revision 1.3  1999/08/10 00:45:47  mike *      added more comments and a few minor tweaks * *      Revision 1.2  1999/08/09 02:50:02  mike *      add some comments * *      Revision 1.1  1999/08/08 18:27:57  mike *      Initial revision */#include "m_apm_lc.h"static int firsttimef = TRUE;/* *     the following stack sizes are sized to meet the  *     worst case expected assuming we are multiplying  *     numbers with 2.14E+9 (2 ^ 31) digits.  * *     For sizeof(int) == 4 (32 bits) there may be up to 32 recursive *     calls. Each call requires 7 stack variables so we need a *     stack depth of 32 * 7 + PAD.  (we use 240) * *     For 'exp_stack', 3 integers also are required to be saved  *     for each recursive call so we need a stack depth of  *     32 * 3 + PAD. (we use 100) */#define M_STACK_SIZE 240static UCHAR  *mul_stack_data[M_STACK_SIZE];static int    mul_stack_data_size[M_STACK_SIZE];static int    M_mul_stack_ptr;static int    exp_stack[100];static int    exp_stack_ptr;static UCHAR  *fmul_a1, *fmul_a0, *fmul_a9, *fmul_b1, *fmul_b0, 	      *fmul_b9, *fmul_t0;static int    stmp, itmp, mii;static UCHAR  numdiv, numrem;extern void   M_fast_multiply(M_APM, M_APM, M_APM);extern void   M_fmul_2(UCHAR *, UCHAR *, UCHAR *, int);extern void   M_fmul_3(UCHAR *, UCHAR *, UCHAR *, int);extern void   M_fmul_add(UCHAR *, UCHAR *, int, int);extern int    M_fmul_subtract(UCHAR *, UCHAR *, UCHAR *, int);extern int    M_next_power_of_2(int);extern int    M_get_stack_ptr(int);extern void   M_mapm_split(UCHAR *, UCHAR *, UCHAR *, int);extern void   M_push_mul_int(int);extern int    M_pop_mul_int(void);/* *      the following algorithm is used in the fast multiply routine *	(sometimes called the divide-and-conquer technique.) * *	assume we have 2 numbers (a & b) with 2N digits.  * *      let : a = (2^N) * A1 + A0 , b = (2^N) * B1 + B0       * *	where 'A1' is the 'most significant half' of 'a' and  *      'A0' is the 'least significant half' of 'a'. Same for  *	B1 and B0. * *	Now use the identity : * *               2N   N            N                    N *	ab  =  (2  + 2 ) A1B1  +  2 (A1-A0)(B0-B1)  + (2 + 1)A0B0 * * *      The original problem of multiplying 2 (2N) digit numbers has *	been reduced to 3 multiplications of N digit numbers plus some *	additions, subtractions, and shifts. * *	The fast multiplication algorithm used here uses the above  *	identity in a recursive process. Knuth in "The Art of Computer *	Programming" shows that this algorithm has a significantly  *	faster run time when N is large. * *      If N = 10,000, the traditional multiplication algorithm will  *	require N^2 multiplications = 100,000,000. * *      The new algorithm is proportional to N ^ (log(3) / log(2)) *	or approx N ^ 1.585. * *	So if N = 10,000, the new algorithm will only require  *      2,187,006 multiplications. *//****************************************************************************/void	M_push_mul_int(val)int	val;{exp_stack[++exp_stack_ptr] = val;}/****************************************************************************/int	M_pop_mul_int(){return(exp_stack[exp_stack_ptr--]);}/****************************************************************************/void   	M_mapm_split(x1, x0, xin, nbytes)UCHAR   *x1, *x0, *xin;int	nbytes;{memcpy(x1, xin, nbytes);memcpy(x0, (xin + nbytes), nbytes);}/****************************************************************************/void	M_fast_multiply(rr,aa,bb)M_APM	rr, aa, bb;{M_APM   ain, bin;void	*vp;int	ii, k, nexp, sign;if (firsttimef)  {   firsttimef = FALSE;   for (k=0; k < M_STACK_SIZE; k++)     mul_stack_data_size[k] = 0;  }exp_stack_ptr   = -1;M_mul_stack_ptr = -1;  ain = M_get_stack_var();bin = M_get_stack_var();m_apm_copy(ain, aa);m_apm_copy(bin, bb);sign = ain->m_apm_sign * bin->m_apm_sign;nexp = ain->m_apm_exponent + bin->m_apm_exponent;if (ain->m_apm_datalength >= bin->m_apm_datalength)  ii = ain->m_apm_datalength;else  ii = bin->m_apm_datalength;ii = (ii + 1) >> 1;ii = M_next_power_of_2(ii);k = 2 * ii;                   /* required size of result, in bytes  */M_apm_pad(ain, k);            /* fill out the data so the number of */M_apm_pad(bin, k);            /* bytes is an exact power of 2       */if (k >= rr->m_apm_malloclength)  {   if ((vp = realloc(rr->m_apm_data,(k + 256))) == NULL)     {      fprintf(stderr,"\'M_fast_multiply\', Out of memory\n");      exit(16);     }     rr->m_apm_malloclength = k + 252;   rr->m_apm_data = (UCHAR *)vp;  }M_fmul_2(rr->m_apm_data, ain->m_apm_data, bin->m_apm_data, ii);rr->m_apm_sign       = sign;rr->m_apm_exponent   = nexp;rr->m_apm_datalength = 4 * ii;M_apm_normalize(rr);M_restore_stack(2);}/****************************************************************************/void	M_fmul_2(rr, aa, bb, sz)UCHAR	*rr, *aa, *bb;int	sz;{if (sz == 1)  {   itmp = (int)(*aa) * (int)(*bb);   M_get_div_rem(itmp,&numdiv,&numrem);   rr[0] = numdiv;   rr[1] = numrem;   return;  }memset(rr, 0, (2 * sz));    /* zero out the result */mii = sz >> 1;itmp = M_get_stack_ptr(mii);M_push_mul_int(itmp);fmul_a1 = mul_stack_data[itmp];itmp    = M_get_stack_ptr(mii);fmul_a0 = mul_stack_data[itmp];itmp    = M_get_stack_ptr(2 * sz);fmul_a9 = mul_stack_data[itmp];itmp    = M_get_stack_ptr(mii);fmul_b1 = mul_stack_data[itmp];itmp    = M_get_stack_ptr(mii);fmul_b0 = mul_stack_data[itmp];itmp    = M_get_stack_ptr(2 * sz);fmul_b9 = mul_stack_data[itmp];itmp    = M_get_stack_ptr(2 * sz);fmul_t0 = mul_stack_data[itmp];M_mapm_split(fmul_a1, fmul_a0, aa, mii);M_mapm_split(fmul_b1, fmul_b0, bb, mii);stmp  = M_fmul_subtract(fmul_a9, fmul_a1, fmul_a0, mii);stmp *= M_fmul_subtract(fmul_b9, fmul_b0, fmul_b1, mii);M_push_mul_int(stmp);M_push_mul_int(mii);M_fmul_3(fmul_t0, fmul_a0, fmul_b0, mii);mii  = M_pop_mul_int();stmp = M_pop_mul_int();itmp = M_pop_mul_int();M_push_mul_int(itmp);M_push_mul_int(stmp);M_push_mul_int(mii);/*   to restore all stack variables ...fmul_a1 = mul_stack_data[itmp];fmul_a0 = mul_stack_data[itmp+1];fmul_a9 = mul_stack_data[itmp+2];fmul_b1 = mul_stack_data[itmp+3];fmul_b0 = mul_stack_data[itmp+4];fmul_b9 = mul_stack_data[itmp+5];fmul_t0 = mul_stack_data[itmp+6];*/fmul_a1 = mul_stack_data[itmp];fmul_b1 = mul_stack_data[itmp+3];fmul_t0 = mul_stack_data[itmp+6];memcpy((rr + sz), fmul_t0, sz);    /* first 'add', result is now zero */				   /* so we just copy in the bytes    */M_fmul_add(rr, fmul_t0, mii, sz);M_fmul_3(fmul_t0, fmul_a1, fmul_b1, mii);mii  = M_pop_mul_int();stmp = M_pop_mul_int();itmp = M_pop_mul_int();M_push_mul_int(itmp);M_push_mul_int(stmp);M_push_mul_int(mii);fmul_a9 = mul_stack_data[itmp+2];fmul_b9 = mul_stack_data[itmp+5];fmul_t0 = mul_stack_data[itmp+6];M_fmul_add(rr, fmul_t0, 0, sz);M_fmul_add(rr, fmul_t0, mii, sz);if (stmp != 0)  M_fmul_3(fmul_t0, fmul_a9, fmul_b9, mii);mii  = M_pop_mul_int();stmp = M_pop_mul_int();itmp = M_pop_mul_int();fmul_t0 = mul_stack_data[itmp+6];/* *  if the sign of (A1 - A0)(B0 - B1) is positive, ADD to *  the result. if it is negative, SUBTRACT from the result. */if (stmp < 0)  {   fmul_a9 = mul_stack_data[itmp+2];   fmul_b9 = mul_stack_data[itmp+5];   memset(fmul_b9, 0, (2 * sz));    memcpy((fmul_b9 + mii), fmul_t0, sz);    M_fmul_subtract(fmul_a9, rr, fmul_b9, (2 * sz));   memcpy(rr, fmul_a9, (2 * sz));  }if (stmp > 0)  M_fmul_add(rr, fmul_t0, mii, sz);M_mul_stack_ptr -= 7;}/****************************************************************************/void	M_fmul_3(rr, aa, bb, sz)UCHAR	*rr, *aa, *bb;int	sz;{if (sz == 1)  {   itmp = (int)(*aa) * (int)(*bb);   M_get_div_rem(itmp,&numdiv,&numrem);   rr[0] = numdiv;   rr[1] = numrem;   return;  }memset(rr, 0, (2 * sz));    /* zero out the result */mii = sz >> 1;itmp = M_get_stack_ptr(mii);M_push_mul_int(itmp);fmul_a1 = mul_stack_data[itmp];itmp    = M_get_stack_ptr(mii);fmul_a0 = mul_stack_data[itmp];itmp    = M_get_stack_ptr(2 * sz);fmul_a9 = mul_stack_data[itmp];itmp    = M_get_stack_ptr(mii);fmul_b1 = mul_stack_data[itmp];itmp    = M_get_stack_ptr(mii);fmul_b0 = mul_stack_data[itmp];itmp    = M_get_stack_ptr(2 * sz);fmul_b9 = mul_stack_data[itmp];itmp    = M_get_stack_ptr(2 * sz);fmul_t0 = mul_stack_data[itmp];M_mapm_split(fmul_a1, fmul_a0, aa, mii);M_mapm_split(fmul_b1, fmul_b0, bb, mii);stmp  = M_fmul_subtract(fmul_a9, fmul_a1, fmul_a0, mii);stmp *= M_fmul_subtract(fmul_b9, fmul_b0, fmul_b1, mii);M_push_mul_int(stmp);M_push_mul_int(mii);M_fmul_2(fmul_t0, fmul_a0, fmul_b0, mii);mii  = M_pop_mul_int();stmp = M_pop_mul_int();itmp = M_pop_mul_int();M_push_mul_int(itmp);M_push_mul_int(stmp);M_push_mul_int(mii);/*   to restore all stack variables ...fmul_a1 = mul_stack_data[itmp];fmul_a0 = mul_stack_data[itmp+1];fmul_a9 = mul_stack_data[itmp+2];fmul_b1 = mul_stack_data[itmp+3];fmul_b0 = mul_stack_data[itmp+4];fmul_b9 = mul_stack_data[itmp+5];fmul_t0 = mul_stack_data[itmp+6];*/fmul_a1 = mul_stack_data[itmp];fmul_b1 = mul_stack_data[itmp+3];fmul_t0 = mul_stack_data[itmp+6];memcpy((rr + sz), fmul_t0, sz);    /* first 'add', result is now zero */				   /* so we just copy in the bytes    */M_fmul_add(rr, fmul_t0, mii, sz);M_fmul_2(fmul_t0, fmul_a1, fmul_b1, mii);mii  = M_pop_mul_int();stmp = M_pop_mul_int();itmp = M_pop_mul_int();M_push_mul_int(itmp);M_push_mul_int(stmp);M_push_mul_int(mii);fmul_a9 = mul_stack_data[itmp+2];fmul_b9 = mul_stack_data[itmp+5];fmul_t0 = mul_stack_data[itmp+6];M_fmul_add(rr, fmul_t0, 0, sz);M_fmul_add(rr, fmul_t0, mii, sz);if (stmp != 0)  M_fmul_2(fmul_t0, fmul_a9, fmul_b9, mii);mii  = M_pop_mul_int();stmp = M_pop_mul_int();itmp = M_pop_mul_int();fmul_t0 = mul_stack_data[itmp+6];/* *  if the sign of (A1 - A0)(B0 - B1) is positive, ADD to *  the result. if it is negative, SUBTRACT from the result. */if (stmp < 0)  {   fmul_a9 = mul_stack_data[itmp+2];   fmul_b9 = mul_stack_data[itmp+5];   memset(fmul_b9, 0, (2 * sz));    memcpy((fmul_b9 + mii), fmul_t0, sz);    M_fmul_subtract(fmul_a9, rr, fmul_b9, (2 * sz));   memcpy(rr, fmul_a9, (2 * sz));  }if (stmp > 0)  M_fmul_add(rr, fmul_t0, mii, sz);M_mul_stack_ptr -= 7;}/****************************************************************************//* *	special addition function for use with the fast multiply operation */void    M_fmul_add(r, a, offset, sz)UCHAR   *r, *a;int	offset, sz;{int	i, j;UCHAR   carry;carry = 0;j = offset + sz;i = sz;while (TRUE)  {   r[--j] += carry + a[--i];   if (r[j] >= 100)     {      r[j] -= 100;      carry = 1;     }   else      carry = 0;   if (i == 0)     break;  }if (carry)  {   while (TRUE)     {      r[--j] += 1;         if (r[j] < 100)        break;      r[j] -= 100;     }  }}/****************************************************************************//* *	special subtraction function for use with the fast multiply operation */int     M_fmul_subtract(r, a, b, sz)UCHAR   *r, *a, *b;int     sz;{int	k, jtmp, sflag, nb, borrow;nb    = sz;sflag = 0;      /* sign flag: assume the numbers are equal *//* *   find if a > b (so we perform a-b) *   or      a < b (so we perform b-a) */for (k=0; k < nb; k++)  {   if (a[k] < b[k])     {      sflag = -1;      break;     }   if (a[k] > b[k])     {      sflag = 1;      break;     }  }if (sflag == 0)  {   memset(r, 0, nb);            /* zero out the result */  }else  {   k = nb;   borrow = 0;   while (TRUE)     {      k--;      if (sflag == 1)        jtmp = (int)a[k] - (int)b[k] - borrow;      else        jtmp = (int)b[k] - (int)a[k] - borrow;      if (jtmp >= 0)        {         r[k] = (UCHAR)jtmp;         borrow = 0;        }      else        {         r[k] = (UCHAR)(100 + jtmp);         borrow = 1;        }      if (k == 0)        break;     }  }return(sflag);}/****************************************************************************/int     M_next_power_of_2(n)int	n;{int     ct, k;if (n <= 2)  return(n);k  = 2;ct = 0;while (TRUE)  {   if (k >= n)     break;   k = k << 1;   if (++ct == 33)     {      fprintf(stderr,         "\'M_next_power_of_2\', ERROR :sizeof(int) too small ??\n");      exit(4);     }  }return(k);}/****************************************************************************/int	M_get_stack_ptr(sz)int	sz;{int	i, k;UCHAR   *cp;k = ++M_mul_stack_ptr;/* if size is 0, just need to malloc and return */if (mul_stack_data_size[k] == 0)  {   if ((i = sz) < 16)     i = 16;   if ((cp = (UCHAR *)malloc(i + 4)) == NULL)     {      fprintf(stderr,"\'M_get_stack_ptr\', Out of memory\n");      exit(16);     }   mul_stack_data[k]      = cp;   mul_stack_data_size[k] = i;  }else        /* it has been malloc'ed, see if it's big enough */  {   if (sz > mul_stack_data_size[k])     {      cp = mul_stack_data[k];      if ((cp = (UCHAR *)realloc(cp, (sz + 4))) == NULL)        {         fprintf(stderr,"\'M_get_stack_ptr\', Out of memory\n");         exit(16);        }         mul_stack_data[k]      = cp;      mul_stack_data_size[k] = sz;     }  }return(k);}/****************************************************************************/

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -