Sujet : Re: My PC's cores can synchronize to about 1.000 clock cycles accuracy
De : Bonita.Montero (at) *nospam* gmail.com (Bonita Montero)
Groupes : comp.lang.c comp.lang.c++Suivi-à : comp.lang.c++Date : 13. Sep 2024, 17:49:51
Autres entêtes
Organisation : A noiseless patient Spider
Message-ID : <vc1qf5$v5bi$2@raubtier-asyl.eternal-september.org>
References : 1
User-Agent : Mozilla Thunderbird
Sorry, wrong newsgroup.
Am 13.09.2024 um 18:45 schrieb Bonita Montero:
#include <iostream>
#include <barrier>
#include <thread>
#include <vector>
#if defined(_WIN32)
#include <intrin.h>
#elif defined(__linux__)
#include <x86intrin.h>
#endif
using namespace std;
int main()
{
unsigned hc = thread::hardware_concurrency();
barrier bar( hc );
atomic_uint synch( hc );
atomic_uint64_t zero( 0 );
atomic_int64_t diffs( 0 );
auto thr = [&]()
{
int64_t sum = 0;
for( unsigned t = 1'000; t; --t )
{
bar.arrive_and_wait();
if( synch.fetch_sub( 1, memory_order_relaxed ) > 1 )
while( synch.load( memory_order_relaxed ) );
uint64_t tsc = __rdtsc(), expected = 0;
if( !zero.compare_exchange_weak( expected, tsc, memory_order_relaxed, memory_order_relaxed ) )
sum += abs( (int64_t)(expected - tsc) );
bar.arrive_and_wait();
synch.store( hc );
zero.store( 0, memory_order_relaxed );
}
diffs.fetch_add( sum, memory_order_relaxed );
};
vector<jthread> threads;
threads.reserve( hc - 1 );
for( unsigned t = hc - 1; t; --t )
threads.emplace_back( thr );
thr();
threads.resize( 0 );
cout << (double)diffs.load( memory_order_relaxed ) / (1'000.0 * hc) << endl;
}
My PC is a AMD 7950X 16-core system.