📄 mul_n.c
字号:
/* mpn_mul_n and helper function -- Multiply/square natural numbers. THE HELPER FUNCTIONS IN THIS FILE (meaning everything except mpn_mul_n) ARE INTERNAL FUNCTIONS WITH MUTABLE INTERFACES. IT IS ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.Copyright 1991, 1993, 1994, 1996, 1997, 1998, 1999, 2000, 2001, 2002 FreeSoftware Foundation, Inc.This file is part of the GNU MP Library.The GNU MP Library is free software; you can redistribute it and/or modifyit under the terms of the GNU Lesser General Public License as published bythe Free Software Foundation; either version 2.1 of the License, or (at youroption) any later version.The GNU MP Library is distributed in the hope that it will be useful, butWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITYor FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General PublicLicense for more details.You should have received a copy of the GNU Lesser General Public Licensealong with the GNU MP Library; see the file COPYING.LIB. If not, write tothe Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,MA 02111-1307, USA. */#include "gmp.h"#include "gmp-impl.h"#include "longlong.h"#if GMP_NAIL_BITS != 0/* The open-coded interpolate3 stuff has not been generalized for nails. */#define USE_MORE_MPN 1#endif#ifndef USE_MORE_MPN#if !defined (__alpha) && !defined (__mips)/* For all other machines, we want to call mpn functions for the compund operations instead of open-coding them. */#define USE_MORE_MPN 1#endif#endif/*== Function declarations =================================================*/static void evaluate3 _PROTO ((mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_srcptr, mp_srcptr, mp_srcptr, mp_size_t, mp_size_t));static void interpolate3 _PROTO ((mp_srcptr, mp_ptr, mp_ptr, mp_ptr, mp_srcptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_size_t));static mp_limb_t add2Times _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));/*-- mpn_kara_mul_n ---------------------------------------------------------------*//* Multiplies using 3 half-sized mults and so on recursively. * p[0..2*n-1] := product of a[0..n-1] and b[0..n-1]. * No overlap of p[...] with a[...] or b[...]. * ws is workspace. */voidmpn_kara_mul_n (mp_ptr p, mp_srcptr a, mp_srcptr b, mp_size_t n, mp_ptr ws){ mp_limb_t w, w0, w1; mp_size_t n2; mp_srcptr x, y; mp_size_t i; int sign; n2 = n >> 1; ASSERT (n2 > 0); if ((n & 1) != 0) { /* Odd length. */ mp_size_t n1, n3, nm1; n3 = n - n2; sign = 0; w = a[n2]; if (w != 0) w -= mpn_sub_n (p, a, a + n3, n2); else { i = n2; do { --i; w0 = a[i]; w1 = a[n3 + i]; } while (w0 == w1 && i != 0); if (w0 < w1) { x = a + n3; y = a; sign = ~0; } else { x = a; y = a + n3; } mpn_sub_n (p, x, y, n2); } p[n2] = w; w = b[n2]; if (w != 0) w -= mpn_sub_n (p + n3, b, b + n3, n2); else { i = n2; do { --i; w0 = b[i]; w1 = b[n3 + i]; } while (w0 == w1 && i != 0); if (w0 < w1) { x = b + n3; y = b; sign = ~sign; } else { x = b; y = b + n3; } mpn_sub_n (p + n3, x, y, n2); } p[n] = w; n1 = n + 1; if (n2 < MUL_KARATSUBA_THRESHOLD) { if (n3 < MUL_KARATSUBA_THRESHOLD) { mpn_mul_basecase (ws, p, n3, p + n3, n3); mpn_mul_basecase (p, a, n3, b, n3); } else { mpn_kara_mul_n (ws, p, p + n3, n3, ws + n1); mpn_kara_mul_n (p, a, b, n3, ws + n1); } mpn_mul_basecase (p + n1, a + n3, n2, b + n3, n2); } else { mpn_kara_mul_n (ws, p, p + n3, n3, ws + n1); mpn_kara_mul_n (p, a, b, n3, ws + n1); mpn_kara_mul_n (p + n1, a + n3, b + n3, n2, ws + n1); } if (sign) mpn_add_n (ws, p, ws, n1); else mpn_sub_n (ws, p, ws, n1); nm1 = n - 1; if (mpn_add_n (ws, p + n1, ws, nm1)) { mp_limb_t x = (ws[nm1] + 1) & GMP_NUMB_MASK; ws[nm1] = x; if (x == 0) ws[n] = (ws[n] + 1) & GMP_NUMB_MASK; } if (mpn_add_n (p + n3, p + n3, ws, n1)) { mpn_incr_u (p + n1 + n3, 1); } } else { /* Even length. */ i = n2; do { --i; w0 = a[i]; w1 = a[n2 + i]; } while (w0 == w1 && i != 0); sign = 0; if (w0 < w1) { x = a + n2; y = a; sign = ~0; } else { x = a; y = a + n2; } mpn_sub_n (p, x, y, n2); i = n2; do { --i; w0 = b[i]; w1 = b[n2 + i]; } while (w0 == w1 && i != 0); if (w0 < w1) { x = b + n2; y = b; sign = ~sign; } else { x = b; y = b + n2; } mpn_sub_n (p + n2, x, y, n2); /* Pointwise products. */ if (n2 < MUL_KARATSUBA_THRESHOLD) { mpn_mul_basecase (ws, p, n2, p + n2, n2); mpn_mul_basecase (p, a, n2, b, n2); mpn_mul_basecase (p + n, a + n2, n2, b + n2, n2); } else { mpn_kara_mul_n (ws, p, p + n2, n2, ws + n); mpn_kara_mul_n (p, a, b, n2, ws + n); mpn_kara_mul_n (p + n, a + n2, b + n2, n2, ws + n); } /* Interpolate. */ if (sign) w = mpn_add_n (ws, p, ws, n); else w = -mpn_sub_n (ws, p, ws, n); w += mpn_add_n (ws, p + n, ws, n); w += mpn_add_n (p + n2, p + n2, ws, n); MPN_INCR_U (p + n2 + n, 2 * n - (n2 + n), w); }}voidmpn_kara_sqr_n (mp_ptr p, mp_srcptr a, mp_size_t n, mp_ptr ws){ mp_limb_t w, w0, w1; mp_size_t n2; mp_srcptr x, y; mp_size_t i; n2 = n >> 1; ASSERT (n2 > 0); if ((n & 1) != 0) { /* Odd length. */ mp_size_t n1, n3, nm1; n3 = n - n2; w = a[n2]; if (w != 0) w -= mpn_sub_n (p, a, a + n3, n2); else { i = n2; do { --i; w0 = a[i]; w1 = a[n3 + i]; } while (w0 == w1 && i != 0); if (w0 < w1) { x = a + n3; y = a; } else { x = a; y = a + n3; } mpn_sub_n (p, x, y, n2); } p[n2] = w; n1 = n + 1; /* n2 is always either n3 or n3-1 so maybe the two sets of tests here could be combined. But that's not important, since the tests will take a miniscule amount of time compared to the function calls. */ if (BELOW_THRESHOLD (n3, SQR_BASECASE_THRESHOLD)) { mpn_mul_basecase (ws, p, n3, p, n3); mpn_mul_basecase (p, a, n3, a, n3); } else if (BELOW_THRESHOLD (n3, SQR_KARATSUBA_THRESHOLD)) { mpn_sqr_basecase (ws, p, n3); mpn_sqr_basecase (p, a, n3); } else { mpn_kara_sqr_n (ws, p, n3, ws + n1); /* (x-y)^2 */ mpn_kara_sqr_n (p, a, n3, ws + n1); /* x^2 */ } if (BELOW_THRESHOLD (n2, SQR_BASECASE_THRESHOLD)) mpn_mul_basecase (p + n1, a + n3, n2, a + n3, n2); else if (BELOW_THRESHOLD (n2, SQR_KARATSUBA_THRESHOLD)) mpn_sqr_basecase (p + n1, a + n3, n2); else mpn_kara_sqr_n (p + n1, a + n3, n2, ws + n1); /* y^2 */ /* Since x^2+y^2-(x-y)^2 = 2xy >= 0 there's no need to track the borrow from mpn_sub_n. If it occurs then it'll be cancelled by a carry from ws[n]. Further, since 2xy fits in n1 limbs there won't be any carry out of ws[n] other than cancelling that borrow. */ mpn_sub_n (ws, p, ws, n1); /* x^2-(x-y)^2 */ nm1 = n - 1; if (mpn_add_n (ws, p + n1, ws, nm1)) /* x^2+y^2-(x-y)^2 = 2xy */ { mp_limb_t x = (ws[nm1] + 1) & GMP_NUMB_MASK; ws[nm1] = x; if (x == 0) ws[n] = (ws[n] + 1) & GMP_NUMB_MASK; } if (mpn_add_n (p + n3, p + n3, ws, n1)) { mpn_incr_u (p + n1 + n3, 1); } } else { /* Even length. */ i = n2; do { --i; w0 = a[i]; w1 = a[n2 + i]; } while (w0 == w1 && i != 0); if (w0 < w1) { x = a + n2; y = a; } else { x = a; y = a + n2; } mpn_sub_n (p, x, y, n2); /* Pointwise products. */ if (BELOW_THRESHOLD (n2, SQR_BASECASE_THRESHOLD)) { mpn_mul_basecase (ws, p, n2, p, n2); mpn_mul_basecase (p, a, n2, a, n2); mpn_mul_basecase (p + n, a + n2, n2, a + n2, n2); } else if (BELOW_THRESHOLD (n2, SQR_KARATSUBA_THRESHOLD)) { mpn_sqr_basecase (ws, p, n2); mpn_sqr_basecase (p, a, n2); mpn_sqr_basecase (p + n, a + n2, n2); } else { mpn_kara_sqr_n (ws, p, n2, ws + n); mpn_kara_sqr_n (p, a, n2, ws + n); mpn_kara_sqr_n (p + n, a + n2, n2, ws + n); } /* Interpolate. */ w = -mpn_sub_n (ws, p, ws, n); w += mpn_add_n (ws, p + n, ws, n); w += mpn_add_n (p + n2, p + n2, ws, n); MPN_INCR_U (p + n2 + n, 2 * n - (n2 + n), w); }}/*-- add2Times -------------------------------------------------------------*//* z[] = x[] + 2 * y[] Note that z and x might point to the same vectors. FIXME: gcc won't inline this because it uses alloca. */#if USE_MORE_MPNstatic inline mp_limb_tadd2Times (mp_ptr z, mp_srcptr x, mp_srcptr y, mp_size_t n){ mp_ptr t; mp_limb_t c; TMP_DECL (marker); TMP_MARK (marker); t = (mp_ptr) TMP_ALLOC (n * BYTES_PER_MP_LIMB); c = mpn_lshift (t, y, n, 1); c += mpn_add_n (z, x, t, n); TMP_FREE (marker); return c;}#elsestatic mp_limb_tadd2Times (mp_ptr z, mp_srcptr x, mp_srcptr y, mp_size_t n){ mp_limb_t c, v, w; ASSERT (n > 0); v = *x; w = *y; c = w >> (BITS_PER_MP_LIMB - 1); w <<= 1; v += w; c += v < w; *z = v; ++x; ++y; ++z; while (--n) { v = *x; w = *y; v += c; c = v < c; c += w >> (BITS_PER_MP_LIMB - 1); w <<= 1; v += w; c += v < w; *z = v; ++x; ++y; ++z; } return c;}#endif/*-- evaluate3 -------------------------------------------------------------*//* Evaluates: * ph := 4*A+2*B+C * p1 := A+B+C * p2 := A+2*B+4*C * where: * ph[], p1[], p2[], A[] and B[] all have length len, * C[] has length len2 with len-len2 = 0, 1 or 2. * Returns top words (overflow) at pth, pt1 and pt2 respectively. */#if USE_MORE_MPNstatic voidevaluate3 (mp_ptr ph, mp_ptr p1, mp_ptr p2, mp_ptr pth, mp_ptr pt1, mp_ptr pt2, mp_srcptr A, mp_srcptr B, mp_srcptr C, mp_size_t len,mp_size_t len2){ mp_limb_t c, d, e; ASSERT (len - len2 <= 2); e = mpn_lshift (p1, B, len, 1); c = mpn_lshift (ph, A, len, 2); c += e + mpn_add_n (ph, ph, p1, len); d = mpn_add_n (ph, ph, C, len2); if (len2 == len) c += d; else c += mpn_add_1 (ph + len2, ph + len2, len-len2, d); ASSERT (c < 7); *pth = c; c = mpn_lshift (p2, C, len2, 2);#if 1 if (len2 != len) { p2[len-1] = 0; p2[len2] = c; c = 0; } c += e + mpn_add_n (p2, p2, p1, len);#else d = mpn_add_n (p2, p2, p1, len2); c += d; if (len2 != len) c = mpn_add_1 (p2+len2, p1+len2, len-len2, c); c += e;#endif c += mpn_add_n (p2, p2, A, len); ASSERT (c < 7); *pt2 = c; c = mpn_add_n (p1, A, B, len); d = mpn_add_n (p1, p1, C, len2); if (len2 == len) c += d; else c += mpn_add_1 (p1+len2, p1+len2, len-len2, d); ASSERT (c < 3); *pt1 = c;}#elsestatic voidevaluate3 (mp_ptr ph, mp_ptr p1, mp_ptr p2, mp_ptr pth, mp_ptr pt1, mp_ptr pt2, mp_srcptr A, mp_srcptr B, mp_srcptr C, mp_size_t l, mp_size_t ls){ mp_limb_t a,b,c, i, t, th,t1,t2, vh,v1,v2; ASSERT (l - ls <= 2); th = t1 = t2 = 0; for (i = 0; i < l; ++i) { a = *A; b = *B; c = i < ls ? *C : 0; /* TO DO: choose one of the following alternatives. */#if 0 t = a << 2; vh = th + t; th = vh < t; th += a >> (BITS_PER_MP_LIMB - 2); t = b << 1; vh += t; th += vh < t; th += b >> (BITS_PER_MP_LIMB - 1); vh += c; th += vh < c;#else vh = th + c; th = vh < c; t = b << 1; vh += t; th += vh < t; th += b >> (BITS_PER_MP_LIMB - 1); t = a << 2; vh += t; th += vh < t; th += a >> (BITS_PER_MP_LIMB - 2);#endif v1 = t1 + a; t1 = v1 < a; v1 += b; t1 += v1 < b; v1 += c; t1 += v1 < c; v2 = t2 + a; t2 = v2 < a; t = b << 1; v2 += t; t2 += v2 < t; t2 += b >> (BITS_PER_MP_LIMB - 1); t = c << 2; v2 += t; t2 += v2 < t; t2 += c >> (BITS_PER_MP_LIMB - 2); *ph = vh; *p1 = v1; *p2 = v2; ++A; ++B; ++C; ++ph; ++p1; ++p2; } ASSERT (th < 7); ASSERT (t1 < 3);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -