📄 mul_n.c

📁 a very popular packet of cryptography tools,it encloses the most common used algorithm and protocols
💻 C
📖 第 1 页 / 共 2 页
字号:
12 下一页
/* mpn_mul_n and helper function -- Multiply/square natural numbers.   THE HELPER FUNCTIONS IN THIS FILE (meaning everything except mpn_mul_n)   ARE INTERNAL FUNCTIONS WITH MUTABLE INTERFACES.  IT IS ONLY SAFE TO REACH   THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST GUARANTEED   THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.Copyright 1991, 1993, 1994, 1996, 1997, 1998, 1999, 2000, 2001, 2002 FreeSoftware Foundation, Inc.This file is part of the GNU MP Library.The GNU MP Library is free software; you can redistribute it and/or modifyit under the terms of the GNU Lesser General Public License as published bythe Free Software Foundation; either version 2.1 of the License, or (at youroption) any later version.The GNU MP Library is distributed in the hope that it will be useful, butWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITYor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General PublicLicense for more details.You should have received a copy of the GNU Lesser General Public Licensealong with the GNU MP Library; see the file COPYING.LIB.  If not, write tothe Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,MA 02111-1307, USA. */#include "gmp.h"#include "gmp-impl.h"#include "longlong.h"#if GMP_NAIL_BITS != 0/* The open-coded interpolate3 stuff has not been generalized for nails.  */#define USE_MORE_MPN 1#endif#ifndef USE_MORE_MPN#if !defined (__alpha) && !defined (__mips)/* For all other machines, we want to call mpn functions for the compund   operations instead of open-coding them.  */#define USE_MORE_MPN 1#endif#endif/*== Function declarations =================================================*/static void evaluate3 _PROTO ((mp_ptr, mp_ptr, mp_ptr,			       mp_ptr, mp_ptr, mp_ptr,			       mp_srcptr, mp_srcptr, mp_srcptr,			       mp_size_t, mp_size_t));static void interpolate3 _PROTO ((mp_srcptr,				  mp_ptr, mp_ptr, mp_ptr,				  mp_srcptr,				  mp_ptr, mp_ptr, mp_ptr,				  mp_size_t, mp_size_t));static mp_limb_t add2Times _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));/*-- mpn_kara_mul_n ---------------------------------------------------------------*//* Multiplies using 3 half-sized mults and so on recursively. * p[0..2*n-1] := product of a[0..n-1] and b[0..n-1]. * No overlap of p[...] with a[...] or b[...]. * ws is workspace. */voidmpn_kara_mul_n (mp_ptr p, mp_srcptr a, mp_srcptr b, mp_size_t n, mp_ptr ws){  mp_limb_t w, w0, w1;  mp_size_t n2;  mp_srcptr x, y;  mp_size_t i;  int sign;  n2 = n >> 1;  ASSERT (n2 > 0);  if ((n & 1) != 0)    {      /* Odd length. */      mp_size_t n1, n3, nm1;      n3 = n - n2;      sign = 0;      w = a[n2];      if (w != 0)	w -= mpn_sub_n (p, a, a + n3, n2);      else	{	  i = n2;	  do	    {	      --i;	      w0 = a[i];	      w1 = a[n3 + i];	    }	  while (w0 == w1 && i != 0);	  if (w0 < w1)	    {	      x = a + n3;	      y = a;	      sign = ~0;	    }	  else	    {	      x = a;	      y = a + n3;	    }	  mpn_sub_n (p, x, y, n2);	}      p[n2] = w;      w = b[n2];      if (w != 0)	w -= mpn_sub_n (p + n3, b, b + n3, n2);      else	{	  i = n2;	  do	    {	      --i;	      w0 = b[i];	      w1 = b[n3 + i];	    }	  while (w0 == w1 && i != 0);	  if (w0 < w1)	    {	      x = b + n3;	      y = b;	      sign = ~sign;	    }	  else	    {	      x = b;	      y = b + n3;	    }	  mpn_sub_n (p + n3, x, y, n2);	}      p[n] = w;      n1 = n + 1;      if (n2 < MUL_KARATSUBA_THRESHOLD)	{	  if (n3 < MUL_KARATSUBA_THRESHOLD)	    {	      mpn_mul_basecase (ws, p, n3, p + n3, n3);	      mpn_mul_basecase (p, a, n3, b, n3);	    }	  else	    {	      mpn_kara_mul_n (ws, p, p + n3, n3, ws + n1);	      mpn_kara_mul_n (p, a, b, n3, ws + n1);	    }	  mpn_mul_basecase (p + n1, a + n3, n2, b + n3, n2);	}      else	{	  mpn_kara_mul_n (ws, p, p + n3, n3, ws + n1);	  mpn_kara_mul_n (p, a, b, n3, ws + n1);	  mpn_kara_mul_n (p + n1, a + n3, b + n3, n2, ws + n1);	}      if (sign)	mpn_add_n (ws, p, ws, n1);      else	mpn_sub_n (ws, p, ws, n1);      nm1 = n - 1;      if (mpn_add_n (ws, p + n1, ws, nm1))	{	  mp_limb_t x = (ws[nm1] + 1) & GMP_NUMB_MASK;	  ws[nm1] = x;	  if (x == 0)	    ws[n] = (ws[n] + 1) & GMP_NUMB_MASK;	}      if (mpn_add_n (p + n3, p + n3, ws, n1))	{	  mpn_incr_u (p + n1 + n3, 1);	}    }  else    {      /* Even length. */      i = n2;      do	{	  --i;	  w0 = a[i];	  w1 = a[n2 + i];	}      while (w0 == w1 && i != 0);      sign = 0;      if (w0 < w1)	{	  x = a + n2;	  y = a;	  sign = ~0;	}      else	{	  x = a;	  y = a + n2;	}      mpn_sub_n (p, x, y, n2);      i = n2;      do	{	  --i;	  w0 = b[i];	  w1 = b[n2 + i];	}      while (w0 == w1 && i != 0);      if (w0 < w1)	{	  x = b + n2;	  y = b;	  sign = ~sign;	}      else	{	  x = b;	  y = b + n2;	}      mpn_sub_n (p + n2, x, y, n2);      /* Pointwise products. */      if (n2 < MUL_KARATSUBA_THRESHOLD)	{	  mpn_mul_basecase (ws, p, n2, p + n2, n2);	  mpn_mul_basecase (p, a, n2, b, n2);	  mpn_mul_basecase (p + n, a + n2, n2, b + n2, n2);	}      else	{	  mpn_kara_mul_n (ws, p, p + n2, n2, ws + n);	  mpn_kara_mul_n (p, a, b, n2, ws + n);	  mpn_kara_mul_n (p + n, a + n2, b + n2, n2, ws + n);	}      /* Interpolate. */      if (sign)	w = mpn_add_n (ws, p, ws, n);      else	w = -mpn_sub_n (ws, p, ws, n);      w += mpn_add_n (ws, p + n, ws, n);      w += mpn_add_n (p + n2, p + n2, ws, n);      MPN_INCR_U (p + n2 + n, 2 * n - (n2 + n), w);    }}voidmpn_kara_sqr_n (mp_ptr p, mp_srcptr a, mp_size_t n, mp_ptr ws){  mp_limb_t w, w0, w1;  mp_size_t n2;  mp_srcptr x, y;  mp_size_t i;  n2 = n >> 1;  ASSERT (n2 > 0);  if ((n & 1) != 0)    {      /* Odd length. */      mp_size_t n1, n3, nm1;      n3 = n - n2;      w = a[n2];      if (w != 0)	w -= mpn_sub_n (p, a, a + n3, n2);      else	{	  i = n2;	  do	    {	      --i;	      w0 = a[i];	      w1 = a[n3 + i];	    }	  while (w0 == w1 && i != 0);	  if (w0 < w1)	    {	      x = a + n3;	      y = a;	    }	  else	    {	      x = a;	      y = a + n3;	    }	  mpn_sub_n (p, x, y, n2);	}      p[n2] = w;      n1 = n + 1;      /* n2 is always either n3 or n3-1 so maybe the two sets of tests here	 could be combined.  But that's not important, since the tests will	 take a miniscule amount of time compared to the function calls.  */      if (BELOW_THRESHOLD (n3, SQR_BASECASE_THRESHOLD))	{	  mpn_mul_basecase (ws, p, n3, p, n3);	  mpn_mul_basecase (p,  a, n3, a, n3);	}      else if (BELOW_THRESHOLD (n3, SQR_KARATSUBA_THRESHOLD))	{	  mpn_sqr_basecase (ws, p, n3);	  mpn_sqr_basecase (p,  a, n3);	}      else	{	  mpn_kara_sqr_n   (ws, p, n3, ws + n1);	 /* (x-y)^2 */	  mpn_kara_sqr_n   (p,  a, n3, ws + n1);	 /* x^2	    */	}      if (BELOW_THRESHOLD (n2, SQR_BASECASE_THRESHOLD))	mpn_mul_basecase (p + n1, a + n3, n2, a + n3, n2);      else if (BELOW_THRESHOLD (n2, SQR_KARATSUBA_THRESHOLD))	mpn_sqr_basecase (p + n1, a + n3, n2);      else	mpn_kara_sqr_n   (p + n1, a + n3, n2, ws + n1);	 /* y^2	    */      /* Since x^2+y^2-(x-y)^2 = 2xy >= 0 there's no need to track the	 borrow from mpn_sub_n.	 If it occurs then it'll be cancelled by a	 carry from ws[n].  Further, since 2xy fits in n1 limbs there won't	 be any carry out of ws[n] other than cancelling that borrow. */      mpn_sub_n (ws, p, ws, n1);	     /* x^2-(x-y)^2 */      nm1 = n - 1;      if (mpn_add_n (ws, p + n1, ws, nm1))   /* x^2+y^2-(x-y)^2 = 2xy */	{	  mp_limb_t x = (ws[nm1] + 1) & GMP_NUMB_MASK;	  ws[nm1] = x;	  if (x == 0)	    ws[n] = (ws[n] + 1) & GMP_NUMB_MASK;	}      if (mpn_add_n (p + n3, p + n3, ws, n1))	{	  mpn_incr_u (p + n1 + n3, 1);	}    }  else    {      /* Even length. */      i = n2;      do	{	  --i;	  w0 = a[i];	  w1 = a[n2 + i];	}      while (w0 == w1 && i != 0);      if (w0 < w1)	{	  x = a + n2;	  y = a;	}      else	{	  x = a;	  y = a + n2;	}      mpn_sub_n (p, x, y, n2);      /* Pointwise products. */      if (BELOW_THRESHOLD (n2, SQR_BASECASE_THRESHOLD))	{	  mpn_mul_basecase (ws,    p,      n2, p,      n2);	  mpn_mul_basecase (p,     a,      n2, a,      n2);	  mpn_mul_basecase (p + n, a + n2, n2, a + n2, n2);	}      else if (BELOW_THRESHOLD (n2, SQR_KARATSUBA_THRESHOLD))	{	  mpn_sqr_basecase (ws,    p,      n2);	  mpn_sqr_basecase (p,     a,      n2);	  mpn_sqr_basecase (p + n, a + n2, n2);	}      else	{	  mpn_kara_sqr_n (ws,    p,      n2, ws + n);	  mpn_kara_sqr_n (p,     a,      n2, ws + n);	  mpn_kara_sqr_n (p + n, a + n2, n2, ws + n);	}      /* Interpolate. */      w = -mpn_sub_n (ws, p, ws, n);      w += mpn_add_n (ws, p + n, ws, n);      w += mpn_add_n (p + n2, p + n2, ws, n);      MPN_INCR_U (p + n2 + n, 2 * n - (n2 + n), w);    }}/*-- add2Times -------------------------------------------------------------*//* z[] = x[] + 2 * y[]   Note that z and x might point to the same vectors.   FIXME: gcc won't inline this because it uses alloca. */#if USE_MORE_MPNstatic inline mp_limb_tadd2Times (mp_ptr z, mp_srcptr x, mp_srcptr y, mp_size_t n){  mp_ptr t;  mp_limb_t c;  TMP_DECL (marker);  TMP_MARK (marker);  t = (mp_ptr) TMP_ALLOC (n * BYTES_PER_MP_LIMB);  c = mpn_lshift (t, y, n, 1);  c += mpn_add_n (z, x, t, n);  TMP_FREE (marker);  return c;}#elsestatic mp_limb_tadd2Times (mp_ptr z, mp_srcptr x, mp_srcptr y, mp_size_t n){  mp_limb_t c, v, w;  ASSERT (n > 0);  v = *x;  w = *y;  c = w >> (BITS_PER_MP_LIMB - 1);  w <<= 1;  v += w;  c += v < w;  *z = v;  ++x; ++y; ++z;  while (--n)    {      v = *x;      w = *y;      v += c;      c = v < c;      c += w >> (BITS_PER_MP_LIMB - 1);      w <<= 1;      v += w;      c += v < w;      *z = v;      ++x; ++y; ++z;    }  return c;}#endif/*-- evaluate3 -------------------------------------------------------------*//* Evaluates: *   ph := 4*A+2*B+C *   p1 := A+B+C *   p2 := A+2*B+4*C * where: *   ph[], p1[], p2[], A[] and B[] all have length len, *   C[] has length len2 with len-len2 = 0, 1 or 2. * Returns top words (overflow) at pth, pt1 and pt2 respectively. */#if USE_MORE_MPNstatic voidevaluate3 (mp_ptr ph, mp_ptr p1, mp_ptr p2, mp_ptr pth, mp_ptr pt1, mp_ptr pt2,	   mp_srcptr A, mp_srcptr B, mp_srcptr C, mp_size_t len,mp_size_t len2){  mp_limb_t c, d, e;  ASSERT (len - len2 <= 2);  e = mpn_lshift (p1, B, len, 1);  c = mpn_lshift (ph, A, len, 2);  c += e + mpn_add_n (ph, ph, p1, len);  d = mpn_add_n (ph, ph, C, len2);  if (len2 == len)    c += d;  else    c += mpn_add_1 (ph + len2, ph + len2, len-len2, d);  ASSERT (c < 7);  *pth = c;  c = mpn_lshift (p2, C, len2, 2);#if 1  if (len2 != len)    {      p2[len-1] = 0;      p2[len2] = c;      c = 0;    }  c += e + mpn_add_n (p2, p2, p1, len);#else  d = mpn_add_n (p2, p2, p1, len2);  c += d;  if (len2 != len)    c = mpn_add_1 (p2+len2, p1+len2, len-len2, c);  c += e;#endif  c += mpn_add_n (p2, p2, A, len);  ASSERT (c < 7);  *pt2 = c;  c = mpn_add_n (p1, A, B, len);  d = mpn_add_n (p1, p1, C, len2);  if (len2 == len)    c += d;  else    c += mpn_add_1 (p1+len2, p1+len2, len-len2, d);  ASSERT (c < 3);  *pt1 = c;}#elsestatic voidevaluate3 (mp_ptr ph, mp_ptr p1, mp_ptr p2, mp_ptr pth, mp_ptr pt1, mp_ptr pt2,	   mp_srcptr A, mp_srcptr B, mp_srcptr C, mp_size_t l, mp_size_t ls){  mp_limb_t a,b,c, i, t, th,t1,t2, vh,v1,v2;  ASSERT (l - ls <= 2);  th = t1 = t2 = 0;  for (i = 0; i < l; ++i)    {      a = *A;      b = *B;      c = i < ls ? *C : 0;      /* TO DO: choose one of the following alternatives. */#if 0      t = a << 2;      vh = th + t;      th = vh < t;      th += a >> (BITS_PER_MP_LIMB - 2);      t = b << 1;      vh += t;      th += vh < t;      th += b >> (BITS_PER_MP_LIMB - 1);      vh += c;      th += vh < c;#else      vh = th + c;      th = vh < c;      t = b << 1;      vh += t;      th += vh < t;      th += b >> (BITS_PER_MP_LIMB - 1);      t = a << 2;      vh += t;      th += vh < t;      th += a >> (BITS_PER_MP_LIMB - 2);#endif      v1 = t1 + a;      t1 = v1 < a;      v1 += b;      t1 += v1 < b;      v1 += c;      t1 += v1 < c;      v2 = t2 + a;      t2 = v2 < a;      t = b << 1;      v2 += t;      t2 += v2 < t;      t2 += b >> (BITS_PER_MP_LIMB - 1);      t = c << 2;      v2 += t;      t2 += v2 < t;      t2 += c >> (BITS_PER_MP_LIMB - 2);      *ph = vh;      *p1 = v1;      *p2 = v2;      ++A; ++B; ++C;      ++ph; ++p1; ++p2;    }  ASSERT (th < 7);  ASSERT (t1 < 3);
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -