Sujet : Re: constexpr is really very smart!
De : already5chosen (at) *nospam* yahoo.com (Michael S)
Groupes : comp.lang.c++Date : 19. Dec 2024, 22:45:49
Autres entêtes
Organisation : A noiseless patient Spider
Message-ID : <20241219234549.00001902@yahoo.com>
References : 1 2 3 4 5 6 7
User-Agent : Claws Mail 4.1.1 (GTK 3.24.34; x86_64-w64-mingw32)
On Thu, 19 Dec 2024 00:57:20 -0800
Tim Rentsch <
tr.17687@z991.linuxsc.com> wrote:
Michael S <already5chosen@yahoo.com> writes:
On Wed, 18 Dec 2024 09:45:38 -0800
Tim Rentsch <tr.17687@z991.linuxsc.com> wrote:
Michael S <already5chosen@yahoo.com> writes:
On Tue, 17 Dec 2024 12:54:19 -0800
Tim Rentsch <tr.17687@z991.linuxsc.com> wrote:
Michael S <already5chosen@yahoo.com> writes:
>
[...]
#include <stdio.h>
#include <stdlib.h>
#include <intrin.h>
>
static long long fib(long n)
{
if (fib <= 0)
return 0;
long long f0 = 0, f1 = 1;
for (long i = 1; i < n; ++i) {
long long f2 = f0 + f1;
f0 = f1;
f1 = f2;
}
return f1;
}
>
Here is my second fastest fibonacci calculation code (for
relatively small inputs):
>
typedef long unsigned long ULL;
>
ULL
fibonacci( unsigned n ){
ULL b = n&1, a = b^1;
>
if( n & 2 ) a += b, b += a;
if( n & 4 ) a += b, b += a, a += b, b += a;
if( n & 8 ){
ULL na = 13*a+21*b, nb = 21*a+34*b;
a = na, b = nb;
}
>
n >>= 4;
while( n-- ){
ULL na = 610*a + 987*b, nb = 987*a + 1597*b;
a = na, b = nb;
}
>
return b;
}
>
From BigO perspective this code looks like it's still O(n) so no
better than simple loop.
>
It is O(n) but it has a smaller constant.
According to my measurement gear, in range 0 to 92 there are few
points where it is faster than simple loop, but in majority of
cases it is slower.
>
I'm at a loss to understand how this could happen. In my own
measurements, the code shown above runs faster than a simple loop
in all cases above n > 12, more than twice as fast when n > 17,
more than three times as fast when n > 42, and going up from
there. What might account for these radically different results?
>
May be, number of repetitions?
I run the test only once. That gives relative advantage to smaller
code which is less sensitive to cold ICache and to cold branch
predictors.
That's an interesting idea. Can you also run a measurement where
the code is run inside loops? I think it would be instructive
to compare the results under the two approaches.
(The server I use seems not to have the necessary support to
measure very fast code segments that are run only once.)
Also, because I am running it only once, my test is not so good in
seeing differences between various "good" algorithms.
But I feel that running test once is more realistic.
I think an argument could be made that repeatedly running the code
in a loop gives a more relevant result. Any short code segment that
is run only a handful of times will never have a significant impact
on overall program performance. It's better to optimize under the
assumption that the code will be "hot": if it is hot then the
optimization will be worthwhile, and if it isn't hot then the loss
will be way down in the noise and not worth worrying about.
I feel that running fib(n) with the same n in loop too unrealistic.
So I decided to run, at least, with different values of n.
Ended up spending about a hour just to build a test bench.
The answer is - in a loop of more than dozen iterations your code is
indeed faster. Esp. so for hundred or more iterations.
Here is my test bench.
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <intrin.h>
long long fib(long n);
volatile long long dummy;
int main(int argz, char** argv)
{
if (argz < 2) {
printf("Go away!\n");
return 1;
}
char* endp;
long n = strtol(argv[1], &endp, 0);
if (endp == argv[1]) {
printf("%s is not a number. Go away!\n", argv[1]);
return 1;
}
if (argz == 2) {
unsigned long long t0 = _rdtsc();
long long f = fib(n);
unsigned long long t1 = _rdtsc();
printf("fib(%ld)=%lld. %lld clock cycles.\n", n, f, t1 - t0);
return 0;
}
// argz > 2
if (argv[2][0]=='t') {
long long f0 = 0, f1 = 1;
for (long i = 0; i <= n; ++i) {
long long res = fib(i);
if (res != f0) {
printf("Mistake at n = %ld. %lld instead of %lld\n"
, i, res, f0);
return 1;
}
long long f2 = f0 + f1;
f0 = f1;
f1 = f2;
}
printf("o.k.\n");
return 0;
}
long m = strtol(argv[2], &endp, 0);
if (endp == argv[2]) {
printf("%s is not a number. Go away!\n", argv[2]);
return 1;
}
if (n > m) {
long tmp = n; n = m; m = tmp;
}
unsigned long dn = m - n + 1;
if (dn > 1000000) {
printf(
"Range [%ld..%ld] is wider than 1M. Unsupported. I am sorry.\n"
, n, m);
return 1;
}
size_t n_iter = 10;
if (argz > 3) { // get non-default number of iterations
unsigned long long val = strtoull(argv[3], &endp, 0);
if (endp == argv[3]) {
printf("%s is not a number. Go away!\n", argv[3]);
return 1;
}
if (val < 1 || val > 10000000) {
printf(
"number of iterations %s is out of range "
"[1..10,000,000]. Go away!\n", argv[3]);
return 1;
}
n_iter = val;
}
long* an = malloc(n_iter*sizeof(long));
if (an) {
// prepare array of random arguments
srand(1);
for (size_t i = 0; i < n_iter; ++i)
an[i] = (long)((((uint64_t)(rand() & 0x7fff) * dn) >> 15) + n);
dummy = 0;
unsigned long long t0 = _rdtsc();
for (size_t i = 0; i < n_iter; ++i)
dummy += fib(an[i]);
unsigned long long t1 = _rdtsc();
dummy = 0;
printf(
"n in range [%ld..%ld]. %zu iterations."
" %llu clocks. %llu clocks/iter\n"
,n, m, n_iter, t1-t0, (t1-t0)/n_iter);
free(an);
} else {
perror("malloc()");
return 1;
}
return 0;
}