⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 mul_n.c

📁 a very popular packet of cryptography tools,it encloses the most common used algorithm and protocols
💻 C
📖 第 1 页 / 共 2 页
字号:
  ASSERT (t2 < 7);  *pth = th;  *pt1 = t1;  *pt2 = t2;}#endif/*-- interpolate3 ----------------------------------------------------------*//* Interpolates B, C, D (in-place) from: *   16*A+8*B+4*C+2*D+E *   A+B+C+D+E *   A+2*B+4*C+8*D+16*E * where: *   A[], B[], C[] and D[] all have length l, *   E[] has length ls with l-ls = 0, 2 or 4. * * Reads top words (from earlier overflow) from ptb, ptc and ptd, * and returns new top words there. */#if USE_MORE_MPNstatic voidinterpolate3 (mp_srcptr A, mp_ptr B, mp_ptr C, mp_ptr D, mp_srcptr E,	      mp_ptr ptb, mp_ptr ptc, mp_ptr ptd, mp_size_t len,mp_size_t len2){  mp_ptr ws;  mp_limb_t t, tb,tc,td;  TMP_DECL (marker);  TMP_MARK (marker);  ASSERT (len - len2 == 0 || len - len2 == 2 || len - len2 == 4);  /* Let x1, x2, x3 be the values to interpolate.  We have:   *	     b = 16*a + 8*x1 + 4*x2 + 2*x3 +	e   *	     c =    a +	  x1 +	 x2 +	x3 +	e   *	     d =    a + 2*x1 + 4*x2 + 8*x3 + 16*e   */  ws = (mp_ptr) TMP_ALLOC (len * BYTES_PER_MP_LIMB);  tb = *ptb; tc = *ptc; td = *ptd;  /* b := b - 16*a -	e   * c := c -	 a -	e   * d := d -	 a - 16*e   */  t = mpn_lshift (ws, A, len, 4);  tb -= t + mpn_sub_n (B, B, ws, len);  t = mpn_sub_n (B, B, E, len2);  if (len2 == len)    tb -= t;  else    tb -= mpn_sub_1 (B+len2, B+len2, len-len2, t);  tc -= mpn_sub_n (C, C, A, len);  t = mpn_sub_n (C, C, E, len2);  if (len2 == len)    tc -= t;  else    tc -= mpn_sub_1 (C+len2, C+len2, len-len2, t);  t = mpn_lshift (ws, E, len2, 4);  t += mpn_add_n (ws, ws, A, len2);#if 1  if (len2 != len)    t = mpn_add_1 (ws+len2, A+len2, len-len2, t);  td -= t + mpn_sub_n (D, D, ws, len);#else  t += mpn_sub_n (D, D, ws, len2);  if (len2 != len)    {      t = mpn_sub_1 (D+len2, D+len2, len-len2, t);      t += mpn_sub_n (D+len2, D+len2, A+len2, len-len2);    }  td -= t;#endif  /* b, d := b + d, b - d */#ifdef HAVE_MPN_ADD_SUB_N  /* #error TO DO ... */#else  t = tb + td + mpn_add_n (ws, B, D, len);  td = (tb - td - mpn_sub_n (D, B, D, len)) & GMP_NUMB_MASK;  tb = t;  MPN_COPY (B, ws, len);#endif  /* b := b-8*c */  t = 8 * tc + mpn_lshift (ws, C, len, 3);  tb -= t + mpn_sub_n (B, B, ws, len);  /* c := 2*c - b */  tc = 2 * tc + mpn_lshift (C, C, len, 1);  tc -= tb + mpn_sub_n (C, C, B, len);  /* d := d/3 */  td = ((td - mpn_divexact_by3 (D, D, len)) * MODLIMB_INVERSE_3) & GMP_NUMB_MASK;  /* b, d := b + d, b - d */#ifdef HAVE_MPN_ADD_SUB_N  /* #error TO DO ... */#else  t = (tb + td + mpn_add_n (ws, B, D, len)) & GMP_NUMB_MASK;  td = (tb - td - mpn_sub_n (D, B, D, len)) & GMP_NUMB_MASK;  tb = t;  MPN_COPY (B, ws, len);#endif      /* Now:       *	 b = 4*x1       *	 c = 2*x2       *	 d = 4*x3       */  ASSERT(!(*B & 3));  mpn_rshift (B, B, len, 2);  B[len-1] |= (tb << (GMP_NUMB_BITS - 2)) & GMP_NUMB_MASK;  ASSERT((mp_limb_signed_t)tb >= 0);  tb >>= 2;  ASSERT(!(*C & 1));  mpn_rshift (C, C, len, 1);  C[len-1] |= (tc << (GMP_NUMB_BITS - 1)) & GMP_NUMB_MASK;  ASSERT((mp_limb_signed_t)tc >= 0);  tc >>= 1;  ASSERT(!(*D & 3));  mpn_rshift (D, D, len, 2);  D[len-1] |= (td << (GMP_NUMB_BITS - 2)) & GMP_NUMB_MASK;  ASSERT((mp_limb_signed_t)td >= 0);  td >>= 2;#if WANT_ASSERT  ASSERT (tb < 2);  if (len == len2)    {      ASSERT (tc < 3);      ASSERT (td < 2);    }  else    {      ASSERT (tc < 2);      ASSERT (!td);    }#endif  *ptb = tb;  *ptc = tc;  *ptd = td;  TMP_FREE (marker);}#elsestatic voidinterpolate3 (mp_srcptr A, mp_ptr B, mp_ptr C, mp_ptr D, mp_srcptr E,	      mp_ptr ptb, mp_ptr ptc, mp_ptr ptd, mp_size_t l, mp_size_t ls){  mp_limb_t a,b,c,d,e,t, i, sb,sc,sd, ob,oc,od;  const mp_limb_t maskOffHalf = (~(mp_limb_t) 0) << (BITS_PER_MP_LIMB >> 1);#if WANT_ASSERT  t = l - ls;  ASSERT (t == 0 || t == 2 || t == 4);#endif  sb = sc = sd = 0;  for (i = 0; i < l; ++i)    {      mp_limb_t tb, tc, td, tt;      a = *A;      b = *B;      c = *C;      d = *D;      e = i < ls ? *E : 0;      /* Let x1, x2, x3 be the values to interpolate.  We have:       *	 b = 16*a + 8*x1 + 4*x2 + 2*x3 +    e       *	 c =	a +   x1 +   x2 +   x3 +    e       *	 d =	a + 2*x1 + 4*x2 + 8*x3 + 16*e       */      /* b := b - 16*a -    e       * c := c -    a -    e       * d := d -    a - 16*e       */      t = a << 4;      tb = -(a >> (BITS_PER_MP_LIMB - 4)) - (b < t);      b -= t;      tb -= b < e;      b -= e;      tc = -(c < a);      c -= a;      tc -= c < e;      c -= e;      td = -(d < a);      d -= a;      t = e << 4;      td = td - (e >> (BITS_PER_MP_LIMB - 4)) - (d < t);      d -= t;      /* b, d := b + d, b - d */      t = b + d;      tt = tb + td + (t < b);      td = tb - td - (b < d);      d = b - d;      b = t;      tb = tt;      /* b := b-8*c */      t = c << 3;      tb = tb - (tc << 3) - (c >> (BITS_PER_MP_LIMB - 3)) - (b < t);      b -= t;      /* c := 2*c - b */      t = c << 1;      tc = (tc << 1) + (c >> (BITS_PER_MP_LIMB - 1)) - tb - (t < b);      c = t - b;      /* d := d/3 */      d *= MODLIMB_INVERSE_3;      td = td - (d >> (BITS_PER_MP_LIMB - 1)) - (d*3 < d);      td *= MODLIMB_INVERSE_3;      /* b, d := b + d, b - d */      t = b + d;      tt = tb + td + (t < b);      td = tb - td - (b < d);      d = b - d;      b = t;      tb = tt;      /* Now:       *	 b = 4*x1       *	 c = 2*x2       *	 d = 4*x3       */      /* sb has period 2. */      b += sb;      tb += b < sb;      sb &= maskOffHalf;      sb |= sb >> (BITS_PER_MP_LIMB >> 1);      sb += tb;      /* sc has period 1. */      c += sc;      tc += c < sc;      /* TO DO: choose one of the following alternatives. */#if 1      sc = (mp_limb_signed_t) sc >> (BITS_PER_MP_LIMB - 1);      sc += tc;#else      sc = tc - ((mp_limb_signed_t) sc < 0L);#endif      /* sd has period 2. */      d += sd;      td += d < sd;      sd &= maskOffHalf;      sd |= sd >> (BITS_PER_MP_LIMB >> 1);      sd += td;      if (i != 0)	{	  B[-1] = ob | b << (BITS_PER_MP_LIMB - 2);	  C[-1] = oc | c << (BITS_PER_MP_LIMB - 1);	  D[-1] = od | d << (BITS_PER_MP_LIMB - 2);	}      ob = b >> 2;      oc = c >> 1;      od = d >> 2;      ++A; ++B; ++C; ++D; ++E;    }  /* Handle top words. */  b = *ptb;  c = *ptc;  d = *ptd;  t = b + d;  d = b - d;  b = t;  b -= c << 3;  c = (c << 1) - b;  d *= MODLIMB_INVERSE_3;  t = b + d;  d = b - d;  b = t;  b += sb;  c += sc;  d += sd;  B[-1] = ob | b << (BITS_PER_MP_LIMB - 2);  C[-1] = oc | c << (BITS_PER_MP_LIMB - 1);  D[-1] = od | d << (BITS_PER_MP_LIMB - 2);  b >>= 2;  c >>= 1;  d >>= 2;#if WANT_ASSERT  ASSERT (b < 2);  if (l == ls)    {      ASSERT (c < 3);      ASSERT (d < 2);    }  else    {      ASSERT (c < 2);      ASSERT (!d);    }#endif  *ptb = b;  *ptc = c;  *ptd = d;}#endif/*-- mpn_toom3_mul_n --------------------------------------------------------------*//* Multiplies using 5 mults of one third size and so on recursively. * p[0..2*n-1] := product of a[0..n-1] and b[0..n-1]. * No overlap of p[...] with a[...] or b[...]. * ws is workspace. *//* TO DO: If MUL_TOOM3_THRESHOLD is much bigger than MUL_KARATSUBA_THRESHOLD then the *	  recursion in mpn_toom3_mul_n() will always bottom out with mpn_kara_mul_n() *	  because the "n < MUL_KARATSUBA_THRESHOLD" test here will always be false. */#define TOOM3_MUL_REC(p, a, b, n, ws) \  do {								\    if (n < MUL_KARATSUBA_THRESHOLD)				\      mpn_mul_basecase (p, a, n, b, n);				\    else if (n < MUL_TOOM3_THRESHOLD)				\      mpn_kara_mul_n (p, a, b, n, ws);				\    else							\      mpn_toom3_mul_n (p, a, b, n, ws);				\  } while (0)voidmpn_toom3_mul_n (mp_ptr p, mp_srcptr a, mp_srcptr b, mp_size_t n, mp_ptr ws){  mp_limb_t cB,cC,cD, dB,dC,dD, tB,tC,tD;  mp_limb_t *A,*B,*C,*D,*E, *W;  mp_size_t l,l2,l3,l4,l5,ls;  /* Break n words into chunks of size l, l and ls.   * n = 3*k   => l = k,   ls = k   * n = 3*k+1 => l = k+1, ls = k-1   * n = 3*k+2 => l = k+1, ls = k   */  {    mp_limb_t m;    /* this is probably unnecessarily strict */    ASSERT (n >= MUL_TOOM3_THRESHOLD);    l = ls = n / 3;    m = n - l * 3;    if (m != 0)      ++l;    if (m == 1)      --ls;    l2 = l * 2;    l3 = l * 3;    l4 = l * 4;    l5 = l * 5;    A = p;    B = ws;    C = p + l2;    D = ws + l2;    E = p + l4;    W = ws + l4;  }  ASSERT (l >= 1);  ASSERT (ls >= 1);  /** First stage: evaluation at points 0, 1/2, 1, 2, oo. **/  evaluate3 (A, B, C, &cB, &cC, &cD, a, a + l, a + l2, l, ls);  evaluate3 (A + l, B + l, C + l, &dB, &dC, &dD, b, b + l, b + l2, l, ls);  /** Second stage: pointwise multiplies. **/  TOOM3_MUL_REC(D, C, C + l, l, W);  tD = cD*dD;  if (cD) tD += mpn_addmul_1 (D + l, C + l, l, cD);  if (dD) tD += mpn_addmul_1 (D + l, C, l, dD);  ASSERT (tD < 49);  TOOM3_MUL_REC(C, B, B + l, l, W);  tC = cC*dC;  /* TO DO: choose one of the following alternatives. */#if 0  if (cC) tC += mpn_addmul_1 (C + l, B + l, l, cC);  if (dC) tC += mpn_addmul_1 (C + l, B, l, dC);#else  if (cC)    {      if (cC == 1) tC += mpn_add_n (C + l, C + l, B + l, l);      else tC += add2Times (C + l, C + l, B + l, l);    }  if (dC)    {      if (dC == 1) tC += mpn_add_n (C + l, C + l, B, l);      else tC += add2Times (C + l, C + l, B, l);    }#endif  ASSERT (tC < 9);  TOOM3_MUL_REC(B, A, A + l, l, W);  tB = cB*dB;  if (cB) tB += mpn_addmul_1 (B + l, A + l, l, cB);  if (dB) tB += mpn_addmul_1 (B + l, A, l, dB);  ASSERT (tB < 49);  TOOM3_MUL_REC(A, a, b, l, W);  TOOM3_MUL_REC(E, a + l2, b + l2, ls, W);  /** Third stage: interpolation. **/  interpolate3 (A, B, C, D, E, &tB, &tC, &tD, l2, ls << 1);  /** Final stage: add up the coefficients. **/  tB += mpn_add_n (p + l, p + l, B, l2);  tD += mpn_add_n (p + l3, p + l3, D, l2);  MPN_INCR_U (p + l3, 2 * n - l3, tB);  MPN_INCR_U (p + l4, 2 * n - l4, tC);  MPN_INCR_U (p + l5, 2 * n - l5, tD);}/*-- mpn_toom3_sqr_n --------------------------------------------------------------*//* Like previous function but for squaring *//* FIXME: If SQR_TOOM3_THRESHOLD is big enough it might never get into the   basecase range.  Try to arrange those conditonals go dead.  */#define TOOM3_SQR_REC(p, a, n, ws)                              \  do {                                                          \    if (BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))            \      mpn_mul_basecase (p, a, n, a, n);                         \    else if (BELOW_THRESHOLD (n, SQR_KARATSUBA_THRESHOLD))      \      mpn_sqr_basecase (p, a, n);                               \    else if (BELOW_THRESHOLD (n, SQR_TOOM3_THRESHOLD))          \      mpn_kara_sqr_n (p, a, n, ws);                             \    else                                                        \      mpn_toom3_sqr_n (p, a, n, ws);                            \  } while (0)voidmpn_toom3_sqr_n (mp_ptr p, mp_srcptr a, mp_size_t n, mp_ptr ws){  mp_limb_t cB,cC,cD, tB,tC,tD;  mp_limb_t *A,*B,*C,*D,*E, *W;  mp_size_t l,l2,l3,l4,l5,ls;  /* Break n words into chunks of size l, l and ls.   * n = 3*k   => l = k,   ls = k   * n = 3*k+1 => l = k+1, ls = k-1   * n = 3*k+2 => l = k+1, ls = k   */  {    mp_limb_t m;    /* this is probably unnecessarily strict */    ASSERT (n >= SQR_TOOM3_THRESHOLD);    l = ls = n / 3;    m = n - l * 3;    if (m != 0)      ++l;    if (m == 1)      --ls;    l2 = l * 2;    l3 = l * 3;    l4 = l * 4;    l5 = l * 5;    A = p;    B = ws;    C = p + l2;    D = ws + l2;    E = p + l4;    W = ws + l4;  }  ASSERT (l >= 1);  ASSERT (ls >= 1);  /** First stage: evaluation at points 0, 1/2, 1, 2, oo. **/  evaluate3 (A, B, C, &cB, &cC, &cD, a, a + l, a + l2, l, ls);  /** Second stage: pointwise multiplies. **/  TOOM3_SQR_REC(D, C, l, W);  tD = cD*cD;  if (cD) tD += mpn_addmul_1 (D + l, C, l, 2*cD);  ASSERT (tD < 49);  TOOM3_SQR_REC(C, B, l, W);  tC = cC*cC;  /* TO DO: choose one of the following alternatives. */#if 0  if (cC) tC += mpn_addmul_1 (C + l, B, l, 2*cC);#else  if (cC >= 1)    {      tC += add2Times (C + l, C + l, B, l);      if (cC == 2)	tC += add2Times (C + l, C + l, B, l);    }#endif  ASSERT (tC < 9);  TOOM3_SQR_REC(B, A, l, W);  tB = cB*cB;  if (cB) tB += mpn_addmul_1 (B + l, A, l, 2*cB);  ASSERT (tB < 49);  TOOM3_SQR_REC(A, a, l, W);  TOOM3_SQR_REC(E, a + l2, ls, W);  /** Third stage: interpolation. **/  interpolate3 (A, B, C, D, E, &tB, &tC, &tD, l2, ls << 1);  /** Final stage: add up the coefficients. **/  tB += mpn_add_n (p + l, p + l, B, l2);  tD += mpn_add_n (p + l3, p + l3, D, l2);  MPN_INCR_U (p + l3, 2 * n - l3, tB);  MPN_INCR_U (p + l4, 2 * n - l4, tC);  MPN_INCR_U (p + l5, 2 * n - l5, tD);}voidmpn_mul_n (mp_ptr p, mp_srcptr a, mp_srcptr b, mp_size_t n){  ASSERT (n >= 1);  ASSERT (! MPN_OVERLAP_P (p, 2 * n, a, n));  ASSERT (! MPN_OVERLAP_P (p, 2 * n, b, n));  if (n < MUL_KARATSUBA_THRESHOLD)    mpn_mul_basecase (p, a, n, b, n);  else if (n < MUL_TOOM3_THRESHOLD)    {      /* Allocate workspace of fixed size on stack: fast! */#if TUNE_PROGRAM_BUILD      mp_limb_t ws[MPN_KARA_MUL_N_TSIZE (MUL_TOOM3_THRESHOLD_LIMIT-1)];#else      mp_limb_t ws[MPN_KARA_MUL_N_TSIZE (MUL_TOOM3_THRESHOLD-1)];#endif      mpn_kara_mul_n (p, a, b, n, ws);    }#if WANT_FFT || TUNE_PROGRAM_BUILD  else if (n < MUL_FFT_THRESHOLD)#else  else#endif    {      /* Use workspace of unknown size in heap, as stack space may       * be limited.  Since n is at least MUL_TOOM3_THRESHOLD, the       * multiplication will take much longer than malloc()/free().  */      mp_limb_t wsLen, *ws;      wsLen = MPN_TOOM3_MUL_N_TSIZE (n);      ws = __GMP_ALLOCATE_FUNC_LIMBS ((size_t) wsLen);      mpn_toom3_mul_n (p, a, b, n, ws);      __GMP_FREE_FUNC_LIMBS (ws, (size_t) wsLen);    }#if WANT_FFT || TUNE_PROGRAM_BUILD  else    {      mpn_mul_fft_full (p, a, n, b, n);    }#endif}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -