sieve.c 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679
  1. /*
  2. * Copyright (c) 1995 Colin Plumb. All rights reserved.
  3. * For licensing and other legal details, see the file legal.c.
  4. *
  5. * sieve.c - Trial division for prime finding.
  6. *
  7. * Finding primes:
  8. * - Sieve 1 to find the small primes for
  9. * - Sieve 2 to find the candidate large primes, then
  10. * - Pseudo-primality test.
  11. *
  12. * An important question is how much trial division by small primes
  13. * should we do? The answer is a LOT. Even a heavily optimized
  14. * Fermat test to the base 2 (the simplest pseudoprimality test)
  15. * is much more expensive than a division.
  16. *
  17. * For an prime of n k-bit words, a Fermat test to the base 2 requires n*k
  18. * modular squarings, each of which involves n*(n+1)/2 signle-word multiplies
  19. * in the squaring and n*(n+1) multiplies in the modular reduction, plus
  20. * some overhead to get into and out of Montgomery form. This is a total
  21. * of 3/2 * k * n^2 * (n+1). Equivalently, if n*k = b bits, it's
  22. * 3/2 * (b/k+1) * b^2 / k.
  23. *
  24. * A modulo operation requires n single-word divides. Let's assume that
  25. * a divide is 4 times the cost of a multiply. That's 4*n multiplies.
  26. * However, you only have to do the division once for your entire
  27. * search. It can be amortized over 10-15 primes. So it's
  28. * really more like n/3 multiplies. This is b/3k.
  29. *
  30. * Now, let's suppose you have a candidate prime t. Your options
  31. * are to a) do trial division by a prime p, then do a Fermat test,
  32. * or to do the Fermat test directly. Doing the trial division
  33. * costs b/3k multiplies, but a certain fraction of the time (1/p), it
  34. * saves you 3/2 b^3 / k^2 multiplies. Thus, it's worth it doing the
  35. * division as long as b/3k < 3/2 * (b/k+1) * b^2 / k / p.
  36. * I.e. p < 9/2 * (b/k + 1) * b = 9/2 * (b^2/k + b).
  37. * E.g. for k=16 and b=256, p < 9/2 * 17 * 256 = 19584.
  38. * Solving for k=16 and k=32 at a few interesting value of b:
  39. *
  40. * k=16, b=256: p < 19584 k=32, b=256: p < 10368
  41. * k=16, b=384: p < 43200 k=32, b=384; p < 22464
  42. * k=16, b=512: p < 76032 k=32, b=512: p < 39168
  43. * k=16, b=640: p < 118080 k=32, b=640: p < 60480
  44. *
  45. * H'm... before using the highly-optimized Fermat test, I got much larger
  46. * numbers (64K to 256K), and designed the sieve for that. Maybe it needs
  47. * to be reduced. It *is* true that the desirable sieve size increases
  48. * rapidly with increasing prime size, and it's the larger primes that are
  49. * worrisome in any case. I'll leave it as is (64K) for now while I
  50. * think about it.
  51. *
  52. * A bit of tweaking the division (we can compute a reciprocal and do
  53. * multiplies instead, turning 4*n into 4 + 2*n) would increase all the
  54. * numbers by a factor of 2 or so.
  55. *
  56. *
  57. * Bit k in a sieve corresponds to the number a + k*b.
  58. * For a given a and b, the sieve's job is to find the values of
  59. * k for which a + k*b == 0 (mod p). Multiplying by b^-1 and
  60. * isolating k, you get k == -a*b^-1 (mod p). So the values of
  61. * k which should be worked on are k = (-a*b^-1 mod p) + i * p,
  62. * for i = 0, 1, 2,...
  63. *
  64. * Note how this is still easy to use with very large b, if you need it.
  65. * It just requires computing (b mod p) and then finding the multiplicative
  66. * inverse of that.
  67. *
  68. *
  69. * How large a space to search to ensure that one will hit a prime?
  70. * The average density is known, but the primes behave oddly, and sometimes
  71. * there are large gaps. It is conjectured by shanks that the first gap
  72. * of size "delta" will occur at approximately exp(sqrt(delta)), so a delta
  73. * of 65536 is conjectured to be to contain a prime up to e^256.
  74. * Remembering the handy 2<->e conversion ratios:
  75. * ln(2) = 0.693147 log2(e) = 1.442695
  76. * This covers up to 369 bits. Damn, not enough! Still, it'll have to do.
  77. *
  78. * Cramer's conjecture (he proved it for "most" cases) is that in the limit,
  79. * as p goes to infinity, the largest gap after a prime p tends to (ln(p))^2.
  80. * So, for a 1024-bit p, the interval to the next prime is expected to be
  81. * about 709.78^2, or 503791. We'd need to enlarge our space by a factor of
  82. * 8 to be sure. It isn't worth the hassle.
  83. *
  84. * Note that a span of this size is expected to contain 92 primes even
  85. * in the vicinity of 2^1024 (it's 369 at 256 bits and 492 at 192 bits).
  86. * So the probability of failure is pretty low.
  87. */
  88. #ifndef HAVE_CONFIG_H
  89. #define HAVE_CONFIG_H 0
  90. #endif
  91. #if HAVE_CONFIG_H
  92. #include "bnconfig.h"
  93. #endif
  94. /*
  95. * Some compilers complain about #if FOO if FOO isn't defined,
  96. * so do the ANSI-mandated thing explicitly...
  97. */
  98. #ifndef NO_ASSERT_H
  99. #define NO_ASSERT_H 0
  100. #endif
  101. #ifndef NO_LIMITS_H
  102. #define NO_LIMITS_H 0
  103. #endif
  104. #ifndef NO_STRING_H
  105. #define NO_STRING_H 0
  106. #endif
  107. #ifndef HAVE_STRINGS_H
  108. #define HAVE_STRINGS_H 0
  109. #endif
  110. #if !NO_ASSERT_H
  111. #include <assert.h>
  112. #else
  113. #define assert(x) (void)0
  114. #endif
  115. #if !NO_LIMITS_H
  116. #include <limits.h> /* For UINT_MAX */
  117. #endif /* If not avail, default value of 0 is safe */
  118. #if !NO_STRING_H
  119. #include <string.h> /* for memset() */
  120. #elif HAVE_STRINGS_H
  121. #include <strings.h>
  122. #endif
  123. #include "bn.h"
  124. #include "sieve.h"
  125. #ifdef MSDOS
  126. #include "lbnmem.h"
  127. #endif
  128. #include "kludge.h"
  129. /*
  130. * Each array stores potential primes as 1 bits in little-endian bytes.
  131. * Bit k in an array represents a + k*b, for some parameters a and b
  132. * of the sieve. Currently, b is hardcoded to 2.
  133. *
  134. * Various factors of 16 arise because these are all *byte* sizes, and
  135. * skipping even numbers, 16 numbers fit into a byte's worth of bitmap.
  136. */
  137. /*
  138. * The first number in the small prime sieve. This could be raised to
  139. * 3 if you want to squeeze bytes out aggressively for a smaller SMALL
  140. * table, and doing so would let one more prime into the end of the array,
  141. * but there is no sense making it larger if you're generating small
  142. * primes up to the limit if 2^16, since it doesn't save any memory and
  143. * would require extra code to ignore 65537 in the last byte, which is
  144. * over the 16-bit limit.
  145. */
  146. #define SMALLSTART 1
  147. /*
  148. * Size of sieve used to find large primes, in bytes. For compatibility
  149. * with 16-bit-int systems, the largest prime that can appear in it,
  150. * SMALL * 16 + SMALLSTART - 2, must be < 65536. Since 65537 is a prime,
  151. * this is the absolute maximum table size.
  152. */
  153. #define SMALL (65536/16)
  154. /*
  155. * Compute the multiplicative inverse of x, modulo mod, using the extended
  156. * Euclidean algorithm. The classical EEA returns two results, traditionally
  157. * named s and t, but only one (t) is needed or computed here.
  158. * It is unrolled twice to avoid some variable-swapping, and because negating
  159. * t every other round makes all the number positive and less than the
  160. * modulus, which makes fixed-length arithmetic easier.
  161. *
  162. * If gcd(x, mod) != 1, then this will return 0.
  163. */
  164. static unsigned
  165. sieveModInvert(unsigned x, unsigned mod)
  166. {
  167. unsigned y;
  168. unsigned t0, t1;
  169. unsigned q;
  170. if (x <= 1)
  171. return x; /* 0 and 1 are self-inverse */
  172. /*
  173. * The first round is simplified based on the
  174. * initial conditions t0 = 1 and t1 = 0.
  175. */
  176. t1 = mod / x;
  177. y = mod % x;
  178. if (y <= 1)
  179. return y ? mod - t1 : 0;
  180. t0 = 1;
  181. do {
  182. q = x / y;
  183. x = x % y;
  184. t0 += q * t1;
  185. if (x <= 1)
  186. return x ? t0 : 0;
  187. q = y / x;
  188. y = y % x;
  189. t1 += q * t0;
  190. } while (y > 1);
  191. return y ? mod - t1 : 0;
  192. }
  193. /*
  194. * Perform a single sieving operation on an array. Clear bits "start",
  195. * "start+step", "start+2*step", etc. from the array, up to the size
  196. * limit (in BYTES) "size". All of the arguments must fit into 16 bits
  197. * for portability.
  198. *
  199. * This is the core of the sieving operation. In addition to being
  200. * called from the sieving functions, it is useful to call directly if,
  201. * say, you want to exclude primes congruent to 1 mod 3, or whatever.
  202. * (Although in that case, it would be better to change the sieving to
  203. * use a step size of 6 and start == 5 (mod 6).)
  204. *
  205. * Originally, this was inlined in the code below (with various checks
  206. * turned off where they could be inferred from the environment), but it
  207. * turns out that all the sieving is so fast that it makes a negligible
  208. * speed difference and smaller, cleaner code was preferred.
  209. *
  210. * Rather than increment a bit index through the array and clear
  211. * the corresponding bit, this code takes advantage of the fact that
  212. * every eighth increment must use the same bit position in a byte.
  213. * I.e. start + k*step == start + (k+8)*step (mod 8). Thus, a bitmask
  214. * can be computed only eight times and used for all multiples. Thus, the
  215. * outer loop is over (k mod 8) while the inner loop is over (k div 8).
  216. *
  217. * The only further trickiness is that this code is designed to accept
  218. * start, step, and size up to 65535 on 16-bit machines. On such a
  219. * machine, the computation "start+step" can overflow, so we need to
  220. * insert an extra check for that situation.
  221. */
  222. void
  223. sieveSingle(unsigned char *array, unsigned size, unsigned start, unsigned step)
  224. {
  225. unsigned bit;
  226. unsigned char mask;
  227. unsigned i;
  228. #if UINT_MAX < 0x1ffff
  229. /* Unsigned is small; add checks for wrap */
  230. for (bit = 0; bit < 8; bit++) {
  231. i = start/8;
  232. if (i >= size)
  233. break;
  234. mask = ~(1 << (start & 7));
  235. do {
  236. array[i] &= mask;
  237. i += step;
  238. } while (i >= step && i < size);
  239. start += step;
  240. if (start < step) /* Overflow test */
  241. break;
  242. }
  243. #else
  244. /* Unsigned has the range - no overflow possible */
  245. for (bit = 0; bit < 8; bit++) {
  246. i = start/8;
  247. if (i >= size)
  248. break;
  249. mask = ~(1 << (start & 7));
  250. do {
  251. array[i] &= mask;
  252. i += step;
  253. } while (i < size);
  254. start += step;
  255. }
  256. #endif
  257. }
  258. /*
  259. * Returns the index of the next bit set in the given array. The search
  260. * begins after the specified bit, so if you care about bit 0, you need
  261. * to check it explicitly yourself. This returns 0 if no bits are found.
  262. *
  263. * Note that the size is in bytes, and that it takes and returns BIT
  264. * positions. If the array represents odd numbers only, as usual, the
  265. * returned values must be doubled to turn them into offsets from the
  266. * initial number.
  267. */
  268. unsigned
  269. sieveSearch(unsigned char const *array, unsigned size, unsigned start)
  270. {
  271. unsigned i; /* Loop index */
  272. unsigned char t; /* Temp */
  273. if (!++start)
  274. return 0;
  275. i = start/8;
  276. if (i >= size)
  277. return 0; /* Done! */
  278. /* Deal with odd-bit beginnings => search the first byte */
  279. if (start & 7) {
  280. t = array[i++] >> (start & 7);
  281. if (t) {
  282. if (!(t & 15)) {
  283. t >>= 4;
  284. start += 4;
  285. }
  286. if (!(t & 3)) {
  287. t >>= 2;
  288. start += 2;
  289. }
  290. if (!(t & 1))
  291. start += 1;
  292. return start;
  293. } else if (i == size) {
  294. return 0; /* Done */
  295. }
  296. }
  297. /* Now the main search loop */
  298. do {
  299. if ((t = array[i]) != 0) {
  300. start = 8*i;
  301. if (!(t & 15)) {
  302. t >>= 4;
  303. start += 4;
  304. }
  305. if (!(t & 3)) {
  306. t >>= 2;
  307. start += 2;
  308. }
  309. if (!(t & 1))
  310. start += 1;
  311. return start;
  312. }
  313. } while (++i < size);
  314. /* Failed */
  315. return 0;
  316. }
  317. /*
  318. * Build a table of small primes for sieving larger primes with. This
  319. * could be cached between calls to sieveBuild, but it's so fast that
  320. * it's really not worth it. This code takes a few milliseconds to run.
  321. */
  322. static void
  323. sieveSmall(unsigned char *array, unsigned size)
  324. {
  325. unsigned i; /* Loop index */
  326. unsigned p; /* The current prime */
  327. /* Initialize to all 1s */
  328. memset(array, 0xFF, size);
  329. #if SMALLSTART == 1
  330. /* Mark 1 as NOT prime */
  331. array[0] = 0xfe;
  332. i = 1; /* Index of first prime */
  333. #else
  334. i = 0; /* Index of first prime */
  335. #endif
  336. /*
  337. * Okay, now sieve via the primes up to 256, obtained from the
  338. * table itself. We know the maximum possible table size is
  339. * 65536, and sieveSingle() can cope with out-of-range inputs
  340. * safely, and the time required is trivial, so it isn't adaptive
  341. * based on the array size.
  342. *
  343. * Convert each bit position into a prime, compute a starting
  344. * sieve position (the square of the prime), and remove multiples
  345. * from the table, using sieveSingle(). I used to have that
  346. * code in line here, but the speed difference was so small it
  347. * wasn't worth it. If a compiler really wants to waste memory,
  348. * it can inline it.
  349. */
  350. do {
  351. p = 2 * i + SMALLSTART;
  352. if (p > 256)
  353. break;
  354. /* Start at square of p */
  355. sieveSingle(array, size, (p*p-SMALLSTART)/2, p);
  356. /* And find the next prime */
  357. i = sieveSearch(array, 16, i);
  358. } while (i);
  359. }
  360. /*
  361. * This is the primary sieving function. It fills in the array with
  362. * a sieve (multiples of small primes removed) beginning at bn and
  363. * proceeding in steps of "step".
  364. *
  365. * It generates a small array to get the primes to sieve by. It's
  366. * generated on the fly - sieveSmall is fast enough to make that
  367. * perfectly acceptable.
  368. *
  369. * The caller should take the array, walk it with sieveSearch, and
  370. * apply a stronger primality test to the numbers that are returned.
  371. *
  372. * If the "dbl" flag non-zero (at least 1), this also sieves 2*bn+1, in
  373. * steps of 2*step. If dbl is 2 or more, this also sieve 4*bn+3,
  374. * in steps of 4*step, and so on for arbitrarily high values of "dbl".
  375. * This is convenient for finding primes such that (p-1)/2 is also prime.
  376. * This is particularly efficient because sieveSingle is controlled by the
  377. * parameter s = -n/step (mod p). (In fact, we find t = -1/step (mod p)
  378. * and multiply that by n (mod p).) If you have -n/step (mod p), then
  379. * finding -(2*n+1)/(2*step) (mod p), which is -n/step - 1/(2*step) (mod p),
  380. * reduces to finding -1/(2*step) (mod p), or t/2 (mod p), and adding that
  381. * to s = -n/step (mod p). Dividing by 2 modulo an odd p is easy -
  382. * if even, divide directly. Otherwise, add p (which produces an even
  383. * sum), and divide by 2. Very simple. And this produces s' and t'
  384. * for step' = 2*step. It can be repeated for step'' = 4*step and so on.
  385. *
  386. * Note that some of the math is complicated by the fact that 2*p might
  387. * not fit into an unsigned, so rather than if (odd(x)) x = (x+p)/2,
  388. * we do if (odd(x)) x = x/2 + p/2 + 1;
  389. *
  390. * TODO: Do the double-sieving by sieving the larger number, and then
  391. * just subtract one from the remainder to get the other parameter.
  392. * (bn-1)/2 is divisible by an odd p iff bn-1 is divisible, which is
  393. * true iff bn == 1 mod p. This requires using a step size of 4.
  394. */
  395. int
  396. sieveBuild(unsigned char *array, unsigned size, struct BigNum const *bn,
  397. unsigned step, unsigned dbl)
  398. {
  399. unsigned i, j; /* Loop index */
  400. unsigned p; /* Current small prime */
  401. unsigned s; /* Where to start operations in the big sieve */
  402. unsigned t; /* Step modulo p, the current prime */
  403. #ifdef MSDOS /* Use dynamic allocation rather than on the stack */
  404. unsigned char *small;
  405. #else
  406. unsigned char small[SMALL];
  407. #endif
  408. assert(array);
  409. #ifdef MSDOS
  410. small = lbnMemAlloc(SMALL); /* Which allocator? Not secure. */
  411. if (!small)
  412. return -1; /* Failed */
  413. #endif
  414. /*
  415. * An odd step is a special case, since we must sieve by 2,
  416. * which isn't in the small prime array and has a few other
  417. * special properties. These are:
  418. * - Since the numbers are stored in binary, we don't need to
  419. * use bnModQ to find the remainder.
  420. * - If step is odd, then t = step % 2 is 1, which allows
  421. * the elimination of a lot of math. Inverting and negating
  422. * t don't change it, and multiplying s by 1 is a no-op,
  423. * so t isn't actually mentioned.
  424. * - Since this is the first sieving, instead of calling
  425. * sieveSingle, we can just use memset to fill the array
  426. * with 0x55 or 0xAA. Since a 1 bit means possible prime
  427. * (i.e. NOT divisible by 2), and the least significant bit
  428. * is first, if bn % 2 == 0, we use 0xAA (bit 0 = bn is NOT
  429. * prime), while if bn % 2 == 1, use 0x55.
  430. * (If step is even, bn must be odd, so fill the array with 0xFF.)
  431. * - Any doublings need not be considered, since 2*bn+1 is odd, and
  432. * 2*step is even, so none of these numbers are divisible by 2.
  433. */
  434. if (step & 1) {
  435. s = bnLSWord(bn) & 1;
  436. memset(array, 0xAA >> s, size);
  437. } else {
  438. /* Initialize the array to all 1's */
  439. memset(array, 255, size);
  440. assert(bnLSWord(bn) & 1);
  441. }
  442. /*
  443. * This could be cached between calls to sieveBuild, but
  444. * it's really not worth it; sieveSmall is *very* fast.
  445. * sieveSmall returns a sieve of odd primes.
  446. */
  447. sieveSmall(small, SMALL);
  448. /*
  449. * Okay, now sieve via the primes up to ssize*16+SMALLSTART-1,
  450. * obtained from the small table.
  451. */
  452. i = (small[0] & 1) ? 0 : sieveSearch(small, SMALL, 0);
  453. do {
  454. p = 2 * i + SMALLSTART;
  455. /*
  456. * Modulo is usually very expensive, but step is usually
  457. * small, so this conditional is worth it.
  458. */
  459. t = (step < p) ? step : step % p;
  460. if (!t) {
  461. /*
  462. * Instead of assert failing, returning all zero
  463. * bits is the "correct" thing to do, but I think
  464. * that the caller should take care of that
  465. * themselves before starting.
  466. */
  467. assert(bnModQ(bn, p) != 0);
  468. continue;
  469. }
  470. /*
  471. * Get inverse of step mod p. 0 < t < p, and p is prime,
  472. * so it has an inverse and sieveModInvert can't return 0.
  473. */
  474. t = sieveModInvert(t, p);
  475. assert(t);
  476. /* Negate t, so now t == -1/step (mod p) */
  477. t = p - t;
  478. /* Now get the bignum modulo the prime. */
  479. s = bnModQ(bn, p);
  480. /* Multiply by t, the negative inverse of step size */
  481. #if UINT_MAX/0xffff < 0xffff
  482. s = (unsigned)(((unsigned long)s * t) % p);
  483. #else
  484. s = (s * t) % p;
  485. #endif
  486. /* s is now the starting bit position, so sieve */
  487. sieveSingle(array, size, s, p);
  488. /* Now do the double sieves as desired. */
  489. for (j = 0; j < dbl; j++) {
  490. /* Halve t modulo p */
  491. #if UINT_MAX < 0x1ffff
  492. t = (t & 1) ? p/2 + t/2 + 1 : t/2;
  493. /* Add t to s, modulo p with overflow checks. */
  494. s += t;
  495. if (s >= p || s < t)
  496. s -= p;
  497. #else
  498. if (t & 1)
  499. t += p;
  500. t /= 2;
  501. /* Add t to s, modulo p */
  502. s += t;
  503. if (s >= p)
  504. s -= p;
  505. #endif
  506. sieveSingle(array, size, s, p);
  507. }
  508. /* And find the next prime */
  509. } while ((i = sieveSearch(small, SMALL, i)) != 0);
  510. #ifdef MSDOS
  511. lbnMemFree(small, SMALL);
  512. #endif
  513. return 0; /* Success */
  514. }
  515. /*
  516. * Similar to the above, but use "step" (which must be even) as a step
  517. * size rather than a fixed value of 2. If "step" has any small divisors
  518. * other than 2, this will blow up.
  519. *
  520. * Returns -1 on out of memory (MSDOS only, actually), and -2
  521. * if step is found to be non-prime.
  522. */
  523. int
  524. sieveBuildBig(unsigned char *array, unsigned size, struct BigNum const *bn,
  525. struct BigNum const *step, unsigned dbl)
  526. {
  527. unsigned i, j; /* Loop index */
  528. unsigned p; /* Current small prime */
  529. unsigned s; /* Where to start operations in the big sieve */
  530. unsigned t; /* step modulo p, the current prime */
  531. #ifdef MSDOS /* Use dynamic allocation rather than on the stack */
  532. unsigned char *small;
  533. #else
  534. unsigned char small[SMALL];
  535. #endif
  536. assert(array);
  537. #ifdef MSDOS
  538. small = lbnMemAlloc(SMALL); /* Which allocator? Not secure. */
  539. if (!small)
  540. return -1; /* Failed */
  541. #endif
  542. /*
  543. * An odd step is a special case, since we must sieve by 2,
  544. * which isn't in the small prime array and has a few other
  545. * special properties. These are:
  546. * - Since the numbers are stored in binary, we don't need to
  547. * use bnModQ to find the remainder.
  548. * - If step is odd, then t = step % 2 is 1, which allows
  549. * the elimination of a lot of math. Inverting and negating
  550. * t don't change it, and multiplying s by 1 is a no-op,
  551. * so t isn't actually mentioned.
  552. * - Since this is the first sieving, instead of calling
  553. * sieveSingle, we can just use memset to fill the array
  554. * with 0x55 or 0xAA. Since a 1 bit means possible prime
  555. * (i.e. NOT divisible by 2), and the least significant bit
  556. * is first, if bn % 2 == 0, we use 0xAA (bit 0 = bn is NOT
  557. * prime), while if bn % 2 == 1, use 0x55.
  558. * (If step is even, bn must be odd, so fill the array with 0xFF.)
  559. * - Any doublings need not be considered, since 2*bn+1 is odd, and
  560. * 2*step is even, so none of these numbers are divisible by 2.
  561. */
  562. if (bnLSWord(step) & 1) {
  563. s = bnLSWord(bn) & 1;
  564. memset(array, 0xAA >> s, size);
  565. } else {
  566. /* Initialize the array to all 1's */
  567. memset(array, 255, size);
  568. assert(bnLSWord(bn) & 1);
  569. }
  570. /*
  571. * This could be cached between calls to sieveBuild, but
  572. * it's really not worth it; sieveSmall is *very* fast.
  573. * sieveSmall returns a sieve of the odd primes.
  574. */
  575. sieveSmall(small, SMALL);
  576. /*
  577. * Okay, now sieve via the primes up to ssize*16+SMALLSTART-1,
  578. * obtained from the small table.
  579. */
  580. i = (small[0] & 1) ? 0 : sieveSearch(small, SMALL, 0);
  581. do {
  582. p = 2 * i + SMALLSTART;
  583. t = bnModQ(step, p);
  584. if (!t) {
  585. assert(bnModQ(bn, p) != 0);
  586. continue;
  587. }
  588. /* Get negative inverse of step */
  589. t = sieveModInvert(bnModQ(step, p), p);
  590. assert(t);
  591. t = p-t;
  592. /* Okay, we have a prime - get the remainder */
  593. s = bnModQ(bn, p);
  594. /* Now multiply s by the negative inverse of step (mod p) */
  595. #if UINT_MAX/0xffff < 0xffff
  596. s = (unsigned)(((unsigned long)s * t) % p);
  597. #else
  598. s = (s * t) % p;
  599. #endif
  600. /* We now have the starting bit pos */
  601. sieveSingle(array, size, s, p);
  602. /* Now do the double sieves as desired. */
  603. for (j = 0; j < dbl; j++) {
  604. /* Halve t modulo p */
  605. #if UINT_MAX < 0x1ffff
  606. t = (t & 1) ? p/2 + t/2 + 1 : t/2;
  607. /* Add t to s, modulo p with overflow checks. */
  608. s += t;
  609. if (s >= p || s < t)
  610. s -= p;
  611. #else
  612. if (t & 1)
  613. t += p;
  614. t /= 2;
  615. /* Add t to s, modulo p */
  616. s += t;
  617. if (s >= p)
  618. s -= p;
  619. #endif
  620. sieveSingle(array, size, s, p);
  621. }
  622. /* And find the next prime */
  623. } while ((i = sieveSearch(small, SMALL, i)) != 0);
  624. #ifdef MSDOS
  625. lbnMemFree(small, SMALL);
  626. #endif
  627. return 0; /* Success */
  628. }