Sujet : notifying from inside or outside De : Bonita.Montero (at) *nospam* gmail.com (Bonita Montero) Groupes :comp.lang.c Date : 09. May 2025, 14:05:48 Autres entêtes Organisation : A noiseless patient Spider Message-ID :<vvkuj8$2qbbc$1@raubtier-asyl.eternal-september.org> User-Agent : Mozilla Thunderbird
These are the results: 10000 rounds inside: notify_one: 2901.1 context switches per thread notify_all: 2851.94 context switches per thread outside: notify_one: 10003.3 context switches per thread notify_all: 7292.81 context switches per thread notify_one is done n times, notify_all only once. So with glibc it's better to notify while holding the mutex. For Windows I've got only the CPU-times: 10000 rounds inside: one: 2.29688 seconds all: 5.5 seconds outside: one: 6.10938 seconds all: 7.39062 seconds So for Windows it's the best to notify individually while holding the mutex. All tests are with 31 threads waiting for a notification and one thread which is notifying. #if defined(_WIN32) #include <Windows.h> #endif #include <iostream> #include <thread> #include <mutex> #include <condition_variable> #include <atomic> #include <semaphore> #include <vector> #include <string_view> #if defined(__unix__) #include <sys/resource.h> #endif using namespace std; struct params { params( unsigned argc, char **argv ); bool outside, add, all; }; int main( int argc, char **argv ) { constexpr size_t N = 10'000; cout << N << " rounds" << endl; int hc = thread::hardware_concurrency(), nClients = hc - 1; for( unsigned outside = 0; outside <= 1; ++outside ) { cout << (outside ? "outside:" : "inside:") << endl; for( unsigned all = 0; all <= 1; ++all ) { cout << (all ? "\tall:" : "\tone:") << endl; mutex mtx; int signalled = 0; condition_variable cv; atomic_int ai( 0 ); binary_semaphore bs( false ); vector<jthread> threads; atomic_int64_t nVoluntary( 0 ); atomic_bool stop( false ); for( int c = nClients; c; --c ) threads.emplace_back( [&] { for( size_t r = N; r; --r ) { unique_lock lock( mtx ); cv.wait( lock, [&] { return (bool)signalled; } ); --signalled; lock.unlock(); if( ai.fetch_sub( 1, memory_order_relaxed ) == 1 ) bs.release( 1 ); } #if defined(__unix__) rusage ru; getrusage( RUSAGE_THREAD, &ru ); nVoluntary.fetch_add( ru.ru_nvcsw, memory_order_relaxed ); #endif } ); for( size_t r = N; r; --r ) { auto notify = [&] { if( all ) cv.notify_all(); else for( int c = nClients; c; cv.notify_one(), --c ); }; unique_lock lock( mtx ); signalled = nClients; if( !outside ) notify(); ai.store( nClients, memory_order_relaxed ); lock.unlock(); if( outside ) notify(); bs.acquire(); } stop.store( true, memory_order_relaxed ); threads.resize( 0 ); #if defined(_WIN32) FILETIME ftDummy, ftKernel, ftUser; GetProcessTimes( GetCurrentProcess(), &ftDummy, &ftDummy, &ftKernel, &ftUser ); auto ftToU64 = []( FILETIME const &ft ) { return (uint64_t)ft.dwHighDateTime << 32 | ft.dwLowDateTime; }; int64_t t = ftToU64( ftKernel ) + ftToU64( ftUser ); cout << "\t\t" << t / 1.0e7 << " seconds" << endl; #elif defined(__unix__) cout << "\t\t" << (double)nVoluntary.load( memory_order_relaxed ) / nClients << " context switches per thread" << endl; #endif } } }