Sujet : Re: notifying from inside or outside
De : Bonita.Montero (at) *nospam* gmail.com (Bonita Montero)
Groupes : comp.lang.c++ de.comp.lang.cSuivi-à : comp.lang.c++Date : 09. May 2025, 14:06:41
Autres entêtes
Organisation : A noiseless patient Spider
Message-ID : <vvkukt$2qbbc$3@raubtier-asyl.eternal-september.org>
References : 1
User-Agent : Mozilla Thunderbird
Am 09.05.2025 um 15:05 schrieb Bonita Montero:
These are the results:
10000 rounds
inside:
notify_one:
2901.1 context switches per thread
notify_all:
2851.94 context switches per thread
outside:
notify_one:
10003.3 context switches per thread
notify_all:
7292.81 context switches per thread
notify_one is done n times, notify_all only once.
So with glibc it's better to notify while holding the mutex.
For Windows I've got only the CPU-times:
10000 rounds
inside:
one:
2.29688 seconds
all:
5.5 seconds
outside:
one:
6.10938 seconds
all:
7.39062 seconds
So for Windows it's the best to notify individually while holding the
mutex.
All tests are with 31 threads waiting for a notification and one thread
which is notifying.
#if defined(_WIN32)
#include <Windows.h>
#endif
#include <iostream>
#include <thread>
#include <mutex>
#include <condition_variable>
#include <atomic>
#include <semaphore>
#include <vector>
#include <string_view>
#if defined(__unix__)
#include <sys/resource.h>
#endif
using namespace std;
struct params
{
params( unsigned argc, char **argv );
bool outside, add, all;
};
int main( int argc, char **argv )
{
constexpr size_t N = 10'000;
cout << N << " rounds" << endl;
int hc = thread::hardware_concurrency(), nClients = hc - 1;
for( unsigned outside = 0; outside <= 1; ++outside )
{
cout << (outside ? "outside:" : "inside:") << endl;
for( unsigned all = 0; all <= 1; ++all )
{
cout << (all ? "\tall:" : "\tone:") << endl;
mutex mtx;
int signalled = 0;
condition_variable cv;
atomic_int ai( 0 );
binary_semaphore bs( false );
vector<jthread> threads;
atomic_int64_t nVoluntary( 0 );
atomic_bool stop( false );
for( int c = nClients; c; --c )
threads.emplace_back( [&]
{
for( size_t r = N; r; --r )
{
unique_lock lock( mtx );
cv.wait( lock, [&] { return (bool)signalled; } );
--signalled;
lock.unlock();
if( ai.fetch_sub( 1, memory_order_relaxed ) == 1 )
bs.release( 1 );
}
#if defined(__unix__)
rusage ru;
getrusage( RUSAGE_THREAD, &ru );
nVoluntary.fetch_add( ru.ru_nvcsw, memory_order_relaxed );
#endif
} );
for( size_t r = N; r; --r )
{
auto notify = [&]
{
if( all )
cv.notify_all();
else
for( int c = nClients; c; cv.notify_one(), --c );
};
unique_lock lock( mtx );
signalled = nClients;
if( !outside )
notify();
ai.store( nClients, memory_order_relaxed );
lock.unlock();
if( outside )
notify();
bs.acquire();
}
stop.store( true, memory_order_relaxed );
threads.resize( 0 );
#if defined(_WIN32)
FILETIME ftDummy, ftKernel, ftUser;
GetProcessTimes( GetCurrentProcess(), &ftDummy, &ftDummy, &ftKernel, &ftUser );
auto ftToU64 = []( FILETIME const &ft ) { return (uint64_t)ft.dwHighDateTime << 32 | ft.dwLowDateTime; };
int64_t t = ftToU64( ftKernel ) + ftToU64( ftUser );
cout << "\t\t" << t / 1.0e7 << " seconds" << endl;
#elif defined(__unix__)
cout << "\t\t" << (double)nVoluntary.load( memory_order_relaxed ) / nClients << " context switches per thread" << endl;
#endif
}
}
}