📄 gmp-impl.h
字号:
} \
} while (0)
#endif
/* On the x86s repe/scasl doesn't seem useful, since it takes many cycles to
start up and would need to strip a lot of zeros before it'd be faster
than a simple cmpl loop. Here are some times in cycles for
std/repe/scasl/cld and cld/repe/scasl (the latter would be for stripping
low zeros).
std cld
P5 18 16
P6 46 38
K6 36 13
K7 21 20
*/
#ifndef MPN_NORMALIZE
#define MPN_NORMALIZE(DST, NLIMBS) \
do { \
while (NLIMBS > 0) \
{ \
if ((DST)[(NLIMBS) - 1] != 0) \
break; \
NLIMBS--; \
} \
} while (0)
#endif
#ifndef MPN_NORMALIZE_NOT_ZERO
#define MPN_NORMALIZE_NOT_ZERO(DST, NLIMBS) \
do { \
ASSERT ((NLIMBS) >= 1); \
while (1) \
{ \
if ((DST)[(NLIMBS) - 1] != 0) \
break; \
NLIMBS--; \
} \
} while (0)
#endif
/* Strip least significant zero limbs from {ptr,size} by incrementing ptr
and decrementing size. low should be ptr[0], and will be the new ptr[0]
on returning. The number in {ptr,size} must be non-zero, ie. size!=0 and
somewhere a non-zero limb. */
#define MPN_STRIP_LOW_ZEROS_NOT_ZERO(ptr, size, low) \
do { \
ASSERT ((size) >= 1); \
ASSERT ((low) == (ptr)[0]); \
\
while ((low) == 0) \
{ \
(size)--; \
ASSERT ((size) >= 1); \
(ptr)++; \
(low) = *(ptr); \
} \
} while (0)
/* Initialize X of type mpz_t with space for NLIMBS limbs. X should be a
temporary variable; it will be automatically cleared out at function
return. We use __x here to make it possible to accept both mpz_ptr and
mpz_t arguments. */
#define MPZ_TMP_INIT(X, NLIMBS) \
do { \
mpz_ptr __x = (X); \
ASSERT ((NLIMBS) >= 1); \
__x->_mp_alloc = (NLIMBS); \
__x->_mp_d = (mp_ptr) TMP_ALLOC ((NLIMBS) * BYTES_PER_MP_LIMB); \
} while (0)
/* Realloc for an mpz_t WHAT if it has less than NEEDED limbs. */
#define MPZ_REALLOC(z,n) ((n) > ALLOC(z) ? _mpz_realloc(z,n) : PTR(z))
#define MPZ_EQUAL_1_P(z) (SIZ(z)==1 && PTR(z)[0] == 1)
/* MPN_FIB2_SIZE(n) is the size in limbs required by mpn_fib2_ui for fp and
f1p.
From Knuth vol 1 section 1.2.8, F[n] = phi^n/sqrt(5) rounded to the
nearest integer, where phi=(1+sqrt(5))/2 is the golden ratio. So the
number of bits required is n*log_2((1+sqrt(5))/2) = n*0.6942419.
The multiplier used is 23/32=0.71875 for efficient calculation on CPUs
without good floating point. There's +2 for rounding up, and a further
+2 since at the last step x limbs are doubled into a 2x+1 limb region
whereas the actual F[2k] value might be only 2x-1 limbs.
Note that a division is done first, since on a 32-bit system it's at
least conceivable to go right up to n==ULONG_MAX. (F[2^32-1] would be
about 380Mbytes, plus temporary workspace of about 1.2Gbytes here and
whatever a multiply of two 190Mbyte numbers takes.)
Enhancement: When GMP_NUMB_BITS is not a power of 2 the division could be
worked into the multiplier. */
#define MPN_FIB2_SIZE(n) \
((mp_size_t) ((n) / 32 * 23 / GMP_NUMB_BITS) + 4)
/* FIB_TABLE(n) returns the Fibonacci number F[n]. Must have n in the range
-1 <= n <= FIB_TABLE_LIMIT.
FIB_TABLE_LUCNUM_LIMIT is the largest n for which L[n] = F[n] + 2*F[n-1]
fits in a limb.
This data generated by code at the end of mpn/generic/fib2_ui.c. */
extern const mp_limb_t __gmp_fib_table[];
#define FIB_TABLE(n) (__gmp_fib_table[(n)+1])
#if GMP_NUMB_BITS >= 64
#define FIB_TABLE_LIMIT 93
#define FIB_TABLE_LUCNUM_LIMIT 92
#else
#if GMP_NUMB_BITS >= 32
#define FIB_TABLE_LIMIT 47
#define FIB_TABLE_LUCNUM_LIMIT 46
#else
#if GMP_NUMB_BITS >= 16
#define FIB_TABLE_LIMIT 24
#define FIB_TABLE_LUCNUM_LIMIT 23
#else
#if GMP_NUMB_BITS >= 8
#define FIB_TABLE_LIMIT 13
#define FIB_TABLE_LUCNUM_LIMIT 11
#else
#if GMP_NUMB_BITS >= 4
#define FIB_TABLE_LIMIT 7
#define FIB_TABLE_LUCNUM_LIMIT 5
#endif /* 4 */
#endif /* 8 */
#endif /* 16 */
#endif /* 32 */
#endif /* 64 */
/* For a threshold between algorithms A and B, size>=thresh is where B
should be used. Special value MP_SIZE_T_MAX means only ever use A, or
value 0 means only ever use B. The tests for these special values will
be compile-time constants, so the compiler should be able to eliminate
the code for the unwanted algorithm. */
#define ABOVE_THRESHOLD(size,thresh) \
((thresh) == 0 \
|| ((thresh) != MP_SIZE_T_MAX \
&& (size) >= (thresh)))
#define BELOW_THRESHOLD(size,thresh) (! ABOVE_THRESHOLD (size, thresh))
/* If MUL_KARATSUBA_THRESHOLD is not already defined, define it to a
value which is good on most machines. */
#ifndef MUL_KARATSUBA_THRESHOLD
#define MUL_KARATSUBA_THRESHOLD 32
#endif
/* If MUL_TOOM3_THRESHOLD is not already defined, define it to a
value which is good on most machines. */
#ifndef MUL_TOOM3_THRESHOLD
#define MUL_TOOM3_THRESHOLD 256
#endif
/* This is the threshold at which mpn_sqr_basecase should take over from
mpn_mul_basecase in mpn_sqr_n. Default is to use mpn_sqr_basecase
always.
If it turns out that mpn_kara_sqr_n becomes faster than mpn_mul_basecase
before mpn_sqr_basecase does, then SQR_BASECASE_THRESHOLD is the
karatsuba threshold and SQR_KARATSUBA_THRESHOLD is 0. This oddity arises
more or less because SQR_KARATSUBA_THRESHOLD represents the size up to
which mpn_sqr_basecase should be used, and that may be never. */
#ifndef SQR_BASECASE_THRESHOLD
#define SQR_BASECASE_THRESHOLD 0
#endif
#ifndef SQR_KARATSUBA_THRESHOLD
#define SQR_KARATSUBA_THRESHOLD (2*MUL_KARATSUBA_THRESHOLD)
#endif
#ifndef SQR_TOOM3_THRESHOLD
#define SQR_TOOM3_THRESHOLD (2*MUL_TOOM3_THRESHOLD)
#endif
/* First k to use for an FFT modF multiply. A modF FFT is an order
log(2^k)/log(2^(k-1)) algorithm, so k=3 is merely 1.5 like karatsuba,
whereas k=4 is 1.33 which is faster than toom3 at 1.485. */
#define FFT_FIRST_K 4
/* Threshold at which FFT should be used to do a modF NxN -> N multiply. */
#ifndef MUL_FFT_MODF_THRESHOLD
#define MUL_FFT_MODF_THRESHOLD (MUL_TOOM3_THRESHOLD * 3)
#endif
#ifndef SQR_FFT_MODF_THRESHOLD
#define SQR_FFT_MODF_THRESHOLD (SQR_TOOM3_THRESHOLD * 3)
#endif
/* Threshold at which FFT should be used to do an NxN -> 2N multiply. This
will be a size where FFT is using k=7 or k=8, since an FFT-k used for an
NxN->2N multiply and not recursing into itself is an order
log(2^k)/log(2^(k-2)) algorithm, so it'll be at least k=7 at 1.39 which
is the first better than toom3. */
#ifndef MUL_FFT_THRESHOLD
#define MUL_FFT_THRESHOLD (MUL_FFT_MODF_THRESHOLD * 10)
#endif
#ifndef SQR_FFT_THRESHOLD
#define SQR_FFT_THRESHOLD (SQR_FFT_MODF_THRESHOLD * 10)
#endif
/* Table of thresholds for successive modF FFT "k"s. The first entry is
where FFT_FIRST_K+1 should be used, the second FFT_FIRST_K+2,
etc. See mpn_fft_best_k(). */
#ifndef MUL_FFT_TABLE
#define MUL_FFT_TABLE \
{ MUL_TOOM3_THRESHOLD * 4, /* k=5 */ \
MUL_TOOM3_THRESHOLD * 8, /* k=6 */ \
MUL_TOOM3_THRESHOLD * 16, /* k=7 */ \
MUL_TOOM3_THRESHOLD * 32, /* k=8 */ \
MUL_TOOM3_THRESHOLD * 96, /* k=9 */ \
MUL_TOOM3_THRESHOLD * 288, /* k=10 */ \
0 }
#endif
#ifndef SQR_FFT_TABLE
#define SQR_FFT_TABLE \
{ SQR_TOOM3_THRESHOLD * 4, /* k=5 */ \
SQR_TOOM3_THRESHOLD * 8, /* k=6 */ \
SQR_TOOM3_THRESHOLD * 16, /* k=7 */ \
SQR_TOOM3_THRESHOLD * 32, /* k=8 */ \
SQR_TOOM3_THRESHOLD * 96, /* k=9 */ \
SQR_TOOM3_THRESHOLD * 288, /* k=10 */ \
0 }
#endif
#ifndef FFT_TABLE_ATTRS
#define FFT_TABLE_ATTRS static const
#endif
#define MPN_FFT_TABLE_SIZE 16
/* mpn_dc_divrem_n(n) calls 2*mul(n/2)+2*div(n/2), thus to be faster than
div(n) = 4*div(n/2), we need mul(n/2) to be faster than the classic way,
i.e. n/2 >= MUL_KARATSUBA_THRESHOLD
Measured values are between 2 and 4 times MUL_KARATSUBA_THRESHOLD, so go
for 3 as an average. */
#ifndef DIV_DC_THRESHOLD
#define DIV_DC_THRESHOLD (3 * MUL_KARATSUBA_THRESHOLD)
#endif
/* Return non-zero if xp,xsize and yp,ysize overlap.
If xp+xsize<=yp there's no overlap, or if yp+ysize<=xp there's no
overlap. If both these are false, there's an overlap. */
#define MPN_OVERLAP_P(xp, xsize, yp, ysize) \
((xp) + (xsize) > (yp) && (yp) + (ysize) > (xp))
#define MEM_OVERLAP_P(xp, xsize, yp, ysize) \
( (char *) (xp) + (xsize) > (char *) (yp) \
&& (char *) (yp) + (ysize) > (char *) (xp))
/* Return non-zero if xp,xsize and yp,ysize are either identical or not
overlapping. Return zero if they're partially overlapping. */
#define MPN_SAME_OR_SEPARATE_P(xp, yp, size) \
MPN_SAME_OR_SEPARATE2_P(xp, size, yp, size)
#define MPN_SAME_OR_SEPARATE2_P(xp, xsize, yp, ysize) \
((xp) == (yp) || ! MPN_OVERLAP_P (xp, xsize, yp, ysize))
/* Return non-zero if dst,dsize and src,ssize are either identical or
overlapping in a way suitable for an incrementing/decrementing algorithm.
Return zero if they're partially overlapping in an unsuitable fashion. */
#define MPN_SAME_OR_INCR2_P(dst, dsize, src, ssize) \
((dst) <= (src) || ! MPN_OVERLAP_P (dst, dsize, src, ssize))
#define MPN_SAME_OR_INCR_P(dst, src, size) \
MPN_SAME_OR_INCR2_P(dst, size, src, size)
#define MPN_SAME_OR_DECR2_P(dst, dsize, src, ssize) \
((dst) >= (src) || ! MPN_OVERLAP_P (dst, dsize, src, ssize))
#define MPN_SAME_OR_DECR_P(dst, src, size) \
MPN_SAME_OR_DECR2_P(dst, size, src, size)
/* ASSERT() is a private assertion checking scheme, similar to <assert.h>.
ASSERT() does the check only if WANT_ASSERT is selected, ASSERT_ALWAYS()
does it always. Generally assertions are meant for development, but
might help when looking for a problem later too.
Note that strings shouldn't be used within the ASSERT expression,
eg. ASSERT(strcmp(s,"notgood")!=0), since the quotes upset the "expr"
used in the !HAVE_STRINGIZE case (ie. K&R). */
#ifdef __LINE__
#define ASSERT_LINE __LINE__
#else
#define ASSERT_LINE -1
#endif
#ifdef __FILE__
#define ASSERT_FILE __FILE__
#else
#define ASSERT_FILE ""
#endif
void __gmp_assert_header _PROTO ((const char *filename, int linenum));
__GMP_DECLSPEC void __gmp_assert_fail _PROTO ((const char *filename, int linenum, const char *expr)) ATTRIBUTE_NORETURN;
#if HAVE_STRINGIZE
#define ASSERT_FAIL(expr) __gmp_assert_fail (ASSERT_FILE, ASSERT_LINE, #expr)
#else
#define ASSERT_FAIL(expr) __gmp_assert_fail (ASSERT_FILE, ASSERT_LINE, "expr")
#endif
#define ASSERT_ALWAYS(expr) \
do { \
if (!(expr)) \
ASSERT_FAIL (expr); \
} while (0)
#if WANT_ASSERT
#define ASSERT(expr) ASSERT_ALWAYS (expr)
#else
#define ASSERT(expr) do {} while (0)
#endif
/* ASSERT_CARRY checks the expression is non-zero, and ASSERT_NOCARRY checks
that it's zero. In both cases if assertion checking is disabled the
expression is still evaluated. These macros are meant for use with
routines like mpn_add_n() where the return value represents a carry or
whatever that should or shouldn't occur in some context. For example,
ASSERT_NOCARRY (mpn_add_n (rp, s1p, s2p, size)); */
#if WANT_ASSERT
#define ASSERT_CARRY(expr) ASSERT_ALWAYS ((expr) != 0)
#define ASSERT_NOCARRY(expr) ASSERT_ALWAYS ((expr) == 0)
#else
#define ASSERT_CARRY(expr) (expr)
#define ASSERT_NOCARRY(expr) (expr)
#endif
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -