red floyd <
no.spam.here@its.invalid> writes:
So I'm a little late, but here's my effort to use the modulo 30
trick.
>
Using g++ 12.4.0, Cygwin under Windows 11 22631, Ryzen 5 5600x,
64GB RAM
>
g++ -O3 -std=c++17
5761455 primess less than 100 million in 0.182269s
50847534 primes less than 1 billion in 2.841167s
455052511 primes less than 10billion in 53.009133s
I appreciate both the effort and your posting of results.
[..program..]
There are several ways that the program is doing noticeably more
work than it needs to. I haven't done any measurements but I
believe this extra work accounts for the biggest part of its
relative slowness. Here is a short simple program to illustrate
some ways that your program could be sped up.
(The short simple program is the rest of this posting.)
#include <stdio.h>
#include <stdlib.h>
typedef unsigned long long Index, Count, Bits, NN, N235;
static const unsigned char mods[8] = { 1, 7, 11, 13, 17, 19, 23, 29, };
static unsigned char bytes[ 1ULL * 50 * 1000 * 1000 * 1000 ];
static void mod30_sieve( NN );
static NN product_NN( N235, N235 );
static _Bool is_marked_composite( N235 );
static void mark_composite( NN );
static Count unmarked_less_than( NN );
int
main( int argc, char *argv[] ){
NN ceiling = argc > 1 ? strtoull( argv[1], 0, 10 ) : 1000000000;
setbuf( stdout, 0 );
if( ceiling >= 30 * sizeof bytes ){
printf( " urk.. limit must be less than %zu\n", 30 * sizeof bytes );
exit( 0 );
}
printf( " determining primes less than %llu ...\n", ceiling );
mod30_sieve( ceiling );
printf( " ... found %llu primes.\n", unmarked_less_than( ceiling ) );
return 0;
}
void
mod30_sieve( NN ceiling ){
N235 i, j;
NN ijNN;
for( bytes[0] = 1, i = 0; product_NN( i, i ) < ceiling; i++ ){
if( is_marked_composite( i ) ) continue;
for( j = i; ijNN = product_NN( i, j ), ijNN < ceiling; j++ ){
mark_composite( ijNN );
}
}
}
NN
product_NN( N235 x, N235 y ){
NN xnn = (x>>3)*30 + mods[x&7];
NN ynn = (y>>3)*30 + mods[y&7];
return xnn * ynn;
}
_Bool
is_marked_composite( N235 t ){
return bytes[t>>3] & 1<<(t&7);
}
void
mark_composite( NN nn ){
static const unsigned char masks[] = {
[1]= 1, [7]= 2, [11]= 4, [13]= 8, [17]= 16, [19]= 32, [23]=64, [29]=128,
};
bytes[ nn/30 ] |= masks[ nn%30 ];
}
Count
unmarked_less_than( NN ceiling ){
Index i = ceiling/30;
NN extra = ceiling - i*30;
Count r = (ceiling > 2) + (ceiling > 3) + (ceiling > 5);
for( Index j = 0; j < 8 && mods[j] < extra; j++ ){
r += bytes[i]>>j &1 ^1;
}
while( i && i-- ){
for( Bits b = bytes[i] ^0xFF; b; b >>= 1 ) r += b&1;
}
return r;
}