📄 bnsieve.c
字号:
/*
* bnsieve.c - Trial division for prime finding.
*
* Written by Colin Plumb
*
* $Id: bnsieve.c,v 1.7 1997/05/13 00:23:11 lloyd Exp $
*
* Finding primes:
* - Sieve 1 to find the small primes for
* - Sieve 2 to find the candidate large primes, then
* - Pseudo-primality test.
*
* An important question is how much trial division by small primes
* should we do? The answer is a LOT. Even a heavily optimized
* Fermat test to the base 2 (the simplest pseudoprimality test)
* is much more expensive than a division.
*
* For an prime of n k-bit words, a Fermat test to the base 2 requires n*k
* modular squarings, each of which involves n*(n+1)/2 signle-word multiplies
* in the squaring and n*(n+1) multiplies in the modular reduction, plus
* some overhead to get into and out of Montgomery form. This is a total
* of 3/2 * k * n^2 * (n+1). Equivalently, if n*k = b bits, it's
* 3/2 * (b/k+1) * b^2 / k.
*
* A modulo operation requires n single-word divides. Let's assume that
* a divide is 4 times the cost of a multiply. That's 4*n multiplies.
* However, you only have to do the division once for your entire
* search. It can be amortized over 10-15 primes. So it's
* really more like n/3 multiplies. This is b/3k.
*
* Now, let's suppose you have a candidate prime t. Your options
* are to a) do trial division by a prime p, then do a Fermat test,
* or to do the Fermat test directly. Doing the trial division
* costs b/3k multiplies, but a certain fraction of the time (1/p), it
* saves you 3/2 b^3 / k^2 multiplies. Thus, it's worth it doing the
* division as long as b/3k < 3/2 * (b/k+1) * b^2 / k / p.
* I.e. p < 9/2 * (b/k + 1) * b = 9/2 * (b^2/k + b).
* E.g. for k=16 and b=256, p < 9/2 * 17 * 256 = 19584.
* Solving for k=16 and k=32 at a few interesting value of b:
*
* k=16, b=256: p < 19584 k=32, b=256: p < 10368
* k=16, b=384: p < 43200 k=32, b=384; p < 22464
* k=16, b=512: p < 76032 k=32, b=512: p < 39168
* k=16, b=640: p < 118080 k=32, b=640: p < 60480
*
* H'm... before using the highly-optimized Fermat test, I got much larger
* numbers (64K to 256K), and designed the sieve for that. Maybe it needs
* to be reduced. It *is* true that the desirable sieve size increases
* rapidly with increasing prime size, and it's the larger primes that are
* worrisome in any case. I'll leave it as is (64K) for now while I
* think about it.
*
* A bit of tweaking the division (we can compute a reciprocal and do
* multiplies instead, turning 4*n into 4 + 2*n) would increase all the
* numbers by a factor of 2 or so.
*
*
* Bit k in a sieve corresponds to the number a + k*b.
* For a given a and b, the sieve's job is to find the values of
* k for which a + k*b == 0 (mod p). Multiplying by b^-1 and
* isolating k, you get k == -a*b^-1 (mod p). So the values of
* k which should be worked on are k = (-a*b^-1 mod p) + i * p,
* for i = 0, 1, 2,...
*
* Note how this is still easy to use with very large b, if you need it.
* It just requires computing (b mod p) and then finding the multiplicative
* inverse of that.
*
*
* How large a space to search to ensure that one will hit a prime?
* The average density is known, but the primes behave oddly, and sometimes
* there are large gaps. It is conjectured by shanks that the first gap
* of size "delta" will occur at approximately exp(sqrt(delta)), so a delta
* of 65536 is conjectured to be to contain a prime up to e^256.
* Remembering the handy 2<->e conversion ratios:
* ln(2) = 0.693147 log2(e) = 1.442695
* This covers up to 369 bits. Damn, not enough! Still, it'll have to do.
*
* Cramer's conjecture (he proved it for "most" cases) is that in the limit,
* as p goes to infinity, the largest gap after a prime p tends to (ln(p))^2.
* So, for a 1024-bit p, the interval to the next prime is expected to be
* about 709.78^2, or 503791. We'd need to enlarge our space by a factor of
* 8 to be sure. It isn't worth the hassle.
*
* Note that a span of this size is expected to contain 92 primes even
* in the vicinity of 2^1024 (it's 369 at 256 bits and 492 at 192 bits).
* So the probability of failure is pretty low.
*/
#include "pgpConfig.h"
#include "pgpMem.h"
/*
* Some compilers complain about #if FOO if FOO isn't defined,
* so do the ANSI-mandated thing explicitly...
*/
#ifndef NO_ASSERT_H
#define NO_ASSERT_H 0
#endif
#ifndef NO_LIMITS_H
#define NO_LIMITS_H 0
#endif
#ifndef NO_STRING_H
#define NO_STRING_H 0
#endif
#ifndef HAVE_STRINGS_H
#define HAVE_STRINGS_H 0
#endif
#ifndef NEED_MEMORY_H
#define NEED_MEMORY_H 0
#endif
#if !NO_LIMITS_H
#include <limits.h> /* For UINT_MAX */
#endif /* If not avail, default value of 0 is safe */
#if !NO_STRING_H
#include <string.h> /* for memset() */
#elif HAVE_STRINGS_H
#include <strings.h>
#endif
#if NEED_MEMORY_H
#include <memory.h>
#endif
#include "bn.h"
#include "bnsieve.h"
#ifdef MSDOS
#include "bnimem.h"
#endif
#include "bnkludge.h"
#include "pgpDebug.h"
/*
* Each array stores potential primes as 1 bits in little-endian bytes.
* Bit k in an array represents a + k*b, for some parameters a and b
* of the sieve. Currently, b is hardcoded to 2.
*
* Various factors of 16 arise because these are all *byte* sizes, and
* skipping even numbers, 16 numbers fit into a byte's worth of bitmap.
*/
/*
* The first number in the small prime sieve. This could be raised to
* 3 if you want to squeeze bytes out aggressively for a smaller SMALL
* table, and doing so would let one more prime into the end of the array,
* but there is no sense making it larger if you're generating small
* primes up to the limit if 2^16, since it doesn't save any memory and
* would require extra code to ignore 65537 in the last byte, which is
* over the 16-bit limit.
*/
#define SMALLSTART 1
/*
* Size of sieve used to find large primes, in bytes. For compatibility
* with 16-bit-int systems, the largest prime that can appear in it,
* SMALL * 16 + SMALLSTART - 2, must be < 65536. Since 65537 is a prime,
* this is the absolute maximum table size.
*/
#define SMALL (65536/16)
/*
* Compute the multiplicative inverse of x, modulo mod, using the extended
* Euclidean algorithm. The classical EEA returns two results, traditionally
* named s and t, but only one (t) is needed or computed here.
* It is unrolled twice to avoid some variable-swapping, and because negating
* t every other round makes all the number positive and less than the
* modulus, which makes fixed-length arithmetic easier.
*
* If gcd(x, mod) != 1, then this will return 0.
*/
static unsigned
sieveModInvert(unsigned x, unsigned mod)
{
unsigned y;
unsigned t0, t1;
unsigned q;
if (x <= 1)
return x; /* 0 and 1 are self-inverse */
/*
* The first round is simplified based on the
* initial conditions t0 = 1 and t1 = 0.
*/
t1 = mod / x;
y = mod % x;
if (y <= 1)
return y ? mod - t1 : 0;
t0 = 1;
do {
q = x / y;
x = x % y;
t0 += q * t1;
if (x <= 1)
return x ? t0 : 0;
q = y / x;
y = y % x;
t1 += q * t0;
} while (y > 1);
return y ? mod - t1 : 0;
}
/*
* Perform a single sieving operation on an array. Clear bits "start",
* "start+step", "start+2*step", etc. from the array, up to the size
* limit (in BYTES) "size". All of the arguments must fit into 16 bits
* for portability.
*
* This is the core of the sieving operation. In addition to being
* called from the sieving functions, it is useful to call directly if,
* say, you want to exclude primes congruent to 1 mod 3, or whatever.
* (Although in that case, it would be better to change the sieving to
* use a step size of 6 and start == 5 (mod 6).)
*
* Originally, this was inlined in the code below (with various checks
* turned off where they could be inferred from the environment), but it
* turns out that all the sieving is so fast that it makes a negligible
* speed difference and smaller, cleaner code was preferred.
*
* Rather than increment a bit index through the array and clear
* the corresponding bit, this code takes advantage of the fact that
* every eighth increment must use the same bit position in a byte.
* I.e. start + k*step == start + (k+8)*step (mod 8). Thus, a bitmask
* can be computed only eight times and used for all multiples. Thus, the
* outer loop is over (k mod 8) while the inner loop is over (k div 8).
*
* The only further trickiness is that this code is designed to accept
* start, step, and size up to 65535 on 16-bit machines. On such a
* machine, the computation "start+step" can overflow, so we need to
* insert an extra check for that situation.
*/
void
sieveSingle(unsigned char *array, unsigned size, unsigned start, unsigned step)
{
unsigned bit;
unsigned char mask;
unsigned i;
#if UINT_MAX < 0x1ffff
/* Unsigned is small; add checks for wrap */
for (bit = 0; bit < 8; bit++) {
i = start/8;
if (i >= size)
break;
mask = ~(1 << (start & 7));
do {
array[i] &= mask;
i += step;
} while (i >= step && i < size);
start += step;
if (start < step) /* Overflow test */
break;
}
#else
/* Unsigned has the range - no overflow possible */
for (bit = 0; bit < 8; bit++) {
i = start/8;
if (i >= size)
break;
mask = ~(1 << (start & 7));
do {
array[i] &= mask;
i += step;
} while (i < size);
start += step;
}
#endif
}
/*
* Returns the index of the next bit set in the given array. The search
* begins after the specified bit, so if you care about bit 0, you need
* to check it explicitly yourself. This returns 0 if no bits are found.
*
* Note that the size is in bytes, and that it takes and returns BIT
* positions. If the array represents odd numbers only, as usual, the
* returned values must be doubled to turn them into offsets from the
* initial number.
*/
unsigned
sieveSearch(unsigned char const *array, unsigned size, unsigned start)
{
unsigned i; /* Loop index */
unsigned char t; /* Temp */
if (!++start)
return 0;
i = start/8;
if (i >= size)
return 0; /* Done! */
/* Deal with odd-bit beginnings => search the first byte */
if (start & 7) {
t = array[i++] >> (start & 7);
if (t) {
if (!(t & 15)) {
t >>= 4;
start += 4;
}
if (!(t & 3)) {
t >>= 2;
start += 2;
}
if (!(t & 1))
start += 1;
return start;
} else if (i == size) {
return 0; /* Done */
}
}
/* Now the main search loop */
do {
if ((t = array[i]) != 0) {
start = 8*i;
if (!(t & 15)) {
t >>= 4;
start += 4;
}
if (!(t & 3)) {
t >>= 2;
start += 2;
}
if (!(t & 1))
start += 1;
return start;
}
} while (++i < size);
/* Failed */
return 0;
}
/*
* Build a table of small primes for sieving larger primes with. This
* could be cached between calls to sieveBuild, but it's so fast that
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -