From 8c8bcc5d1737002a9d153105c16b262d2e201efa Mon Sep 17 00:00:00 2001 From: rpj Date: Tue, 19 Oct 2004 13:24:40 +0000 Subject: Semaphore speedups - alpha, but passes testsuite --- ChangeLog | 107 ++++++++++++++++++++++++++-------------------- Nmakefile.tests | 12 +++++- implement.h | 1 + ptw32_semwait.c | 13 +++++- sem_getvalue.c | 60 +++----------------------- sem_init.c | 3 +- sem_post.c | 3 +- sem_post_multiple.c | 6 ++- sem_timedwait.c | 10 ++++- sem_trywait.c | 3 +- sem_wait.c | 10 ++++- tests/ChangeLog | 12 ++++-- tests/GNUmakefile | 4 +- tests/Makefile | 8 ++-- tests/semaphore2.c | 2 +- tests/semaphore3.c | 120 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 16 files changed, 255 insertions(+), 119 deletions(-) create mode 100644 tests/semaphore3.c diff --git a/ChangeLog b/ChangeLog index c1fe46f..88756a2 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,40 +1,55 @@ -2004-10-15 Ross Johnson +2004-10-19 Ross Johnson - * implement.h (othread_mutex_t_): Use an event in place of - the POSIX semaphore. - * pthread_mutex_init.c: Create the event; remove semaphore init. - * pthread_mutex_destroy.c: Delete the event. - * pthread_mutex_lock.c: Replace the semaphore wait with the event wait. - * pthread_mutex_trylock.c: Likewise. - * pthread_mutex_timedlock.c: Likewise. - * pthread_mutex_unlock.c: Set the event. - + * sem_init.c (sem_init): New semaphore model based on the same idea + as mutexes, i.e. user space interlocked check to avoid + unnecessarily entering kernel space. Wraps the Win32 semaphore and + keeps it's own counter. + * sem_wait.c (sem_wait): Implemented user space check model. + * sem_post.c (sem_post): Likewise. + * sem_trywait.c (sem_trywait): Likewise. + * sem_timedwait.c (sem_timedwait): Likewise. + * sem_post_multiple.c (sem_post_multiple): Likewise. + * sem_getvalue.c (sem_getvalue): Likewise. + * ptw32_semwait.c (ptw32_semwait): Likewise. + * implement.h (sem_t_): Add counter element. + +2004-10-15 Ross Johnson + + * implement.h (othread_mutex_t_): Use an event in place of + the POSIX semaphore. + * pthread_mutex_init.c: Create the event; remove semaphore init. + * pthread_mutex_destroy.c: Delete the event. + * pthread_mutex_lock.c: Replace the semaphore wait with the event wait. + * pthread_mutex_trylock.c: Likewise. + * pthread_mutex_timedlock.c: Likewise. + * pthread_mutex_unlock.c: Set the event. + 2004-10-14 Ross Johnson - * pthread_mutex_lock.c (pthread_mutex_lock): New algorithm using - Terekhov's xchg based variation of Drepper's cmpxchg model. - Theoretically, xchg uses fewer clock cycles than cmpxchg (using IA-32 - as a reference), however, in my opinion bus locking dominates the - equation on smp systems, so the model with the least number of bus - lock operations in the execution path should win, which is Terekhov's - variant. On IA-32 uni-processor systems, it's faster to use the - CMPXCHG instruction without locking the bus than to use the XCHG - instruction, which always locks the bus. This makes the two variants - equal for the non-contended lock (fast lane) execution path on up - IA-32. Testing shows that the xchg variant is faster on up IA-32 as - well if the test forces higher lock contention frequency, even though - kernel calls should be dominating the times (on up IA-32, both - variants used CMPXCHG instructions and neither locked the bus). + * pthread_mutex_lock.c (pthread_mutex_lock): New algorithm using + Terekhov's xchg based variation of Drepper's cmpxchg model. + Theoretically, xchg uses fewer clock cycles than cmpxchg (using IA-32 + as a reference), however, in my opinion bus locking dominates the + equation on smp systems, so the model with the least number of bus + lock operations in the execution path should win, which is Terekhov's + variant. On IA-32 uni-processor systems, it's faster to use the + CMPXCHG instruction without locking the bus than to use the XCHG + instruction, which always locks the bus. This makes the two variants + equal for the non-contended lock (fast lane) execution path on up + IA-32. Testing shows that the xchg variant is faster on up IA-32 as + well if the test forces higher lock contention frequency, even though + kernel calls should be dominating the times (on up IA-32, both + variants used CMPXCHG instructions and neither locked the bus). * pthread_mutex_timedlock.c pthread_mutex_timedlock(): Similarly. * pthread_mutex_trylock.c (pthread_mutex_trylock): Similarly. * pthread_mutex_unlock.c (pthread_mutex_unlock): Similarly. - * ptw32_InterlockedCompareExchange.c (ptw32_InterlockExchange): New - function. - (PTW32_INTERLOCKED_EXCHANGE): Sets up macro to use inlined + * ptw32_InterlockedCompareExchange.c (ptw32_InterlockExchange): New + function. + (PTW32_INTERLOCKED_EXCHANGE): Sets up macro to use inlined ptw32_InterlockedExchange. - * implement.h (PTW32_INTERLOCKED_EXCHANGE): Set default to + * implement.h (PTW32_INTERLOCKED_EXCHANGE): Set default to InterlockedExchange(). - * Makefile: Building using /Ob2 so that asm sections within inline + * Makefile: Building using /Ob2 so that asm sections within inline functions are inlined. 2004-10-08 Ross Johnson @@ -42,16 +57,16 @@ * pthread_mutex_destroy.c (pthread_mutex_destroy): Critical Section element is no longer required. * pthread_mutex_init.c (pthread_mutex_init): Likewise. - * pthread_mutex_lock.c (pthread_mutex_lock): New algorithm following - Drepper's paper at http://people.redhat.com/drepper/futex.pdf, but - using the existing semaphore in place of the futex described in the + * pthread_mutex_lock.c (pthread_mutex_lock): New algorithm following + Drepper's paper at http://people.redhat.com/drepper/futex.pdf, but + using the existing semaphore in place of the futex described in the paper. Idea suggested by Alexander Terekhov - see: http://sources.redhat.com/ml/pthreads-win32/2003/msg00108.html * pthread_mutex_timedlock.c pthread_mutex_timedlock(): Similarly. * pthread_mutex_trylock.c (pthread_mutex_trylock): Similarly. * pthread_mutex_unlock.c (pthread_mutex_unlock): Similarly. - * pthread_barrier_wait.c (pthread_barrier_wait): Use inlined version - of InterlockedCompareExchange() if possible - determined at + * pthread_barrier_wait.c (pthread_barrier_wait): Use inlined version + of InterlockedCompareExchange() if possible - determined at build-time. * pthread_spin_destroy.c pthread_spin_destroy(): Likewise. * pthread_spin_lock.c pthread_spin_lock():Likewise. @@ -59,29 +74,29 @@ * pthread_spin_unlock.c (pthread_spin_unlock):Likewise. * ptw32_InterlockedCompareExchange.c: Sets up macro for inlined use. * implement.h (pthread_mutex_t_): Remove Critical Section element. - (PTW32_INTERLOCKED_COMPARE_EXCHANGE): Set to default non-inlined + (PTW32_INTERLOCKED_COMPARE_EXCHANGE): Set to default non-inlined version of InterlockedCompareExchange(). - * private.c: Include ptw32_InterlockedCompareExchange.c first for + * private.c: Include ptw32_InterlockedCompareExchange.c first for inlining. - * GNUmakefile: Add commandline option to use inlined + * GNUmakefile: Add commandline option to use inlined InterlockedCompareExchange(). * Makefile: Likewise. 2004-09-27 Ross Johnson - * pthread_mutex_lock.c (pthread_mutex_lock): Separate - PTHREAD_MUTEX_NORMAL logic since we do not need to keep or check some - state required by other mutex types; do not check mutex pointer arg - for validity - leave this to the system since we are only checking - for NULL pointers. This should improve speed of NORMAL mutexes and + * pthread_mutex_lock.c (pthread_mutex_lock): Separate + PTHREAD_MUTEX_NORMAL logic since we do not need to keep or check some + state required by other mutex types; do not check mutex pointer arg + for validity - leave this to the system since we are only checking + for NULL pointers. This should improve speed of NORMAL mutexes and marginally improve speed of other type. * pthread_mutex_trylock.c (pthread_mutex_trylock): Likewise. * pthread_mutex_unlock.c (pthread_mutex_unlock): Likewise; also avoid - entering the critical section for the no-waiters case, with approx. + entering the critical section for the no-waiters case, with approx. 30% reduction in lock/unlock overhead for this case. * pthread_mutex_timedlock.c (pthread_mutex_timedlock): Likewise; also - no longer keeps mutex if post-timeout second attempt succeeds - this - will assist applications that wish to impose strict lock deadlines, + no longer keeps mutex if post-timeout second attempt succeeds - this + will assist applications that wish to impose strict lock deadlines, rather than simply to escape from frozen locks. 2004-09-09 Tristan Savatier @@ -92,7 +107,7 @@ [Maintainer's note: the race condition is harmless on SPU systems and only a problem on MPU systems if concurrent access results in an exception (presumably generated by a hardware interrupt). There are - other instances of similar harmless race conditions that have not + other instances of similar harmless race conditions that have not been identified as issues.] 2004-09-09 Ross Johnson diff --git a/Nmakefile.tests b/Nmakefile.tests index 8f2f527..4647273 100644 --- a/Nmakefile.tests +++ b/Nmakefile.tests @@ -93,6 +93,7 @@ rwlock4:: rwlock4.c rwlock5:: rwlock5.c rwlock6:: rwlock6.c rwlock7:: rwlock7.c +rwlock8:: rwlock8.c rwlock2_t:: rwlock2_t.c rwlock3_t:: rwlock3_t.c rwlock4_t:: rwlock4_t.c @@ -101,6 +102,7 @@ rwlock6_t:: rwlock6_t.c rwlock6_t2:: rwlock6_t2.c semaphore1:: semaphore1.c semaphore2:: semaphore2.c +semaphore3:: semaphore3.c context1:: context1.c cancel3:: cancel3.c cancel4:: cancel4.c @@ -138,10 +140,14 @@ cancel9:: cancel9.c sizes: :test: sizes loadfree: :test: -semaphore1 :test: loadfree -semaphore2 :test: loadfree mutex5 :test: loadfree mutex1 :test: loadfree +mutex1n :test: loadfree +mutex1r :test: loadfree +mutex1e :test: loadfree +semaphore1 :test: loadfree +semaphore2 :test: loadfree +semaphore3 :test: loadfree mutex2 :test: loadfree mutex2r :test: loadfree mutex2e :test: loadfree @@ -209,6 +215,8 @@ rwlock3 :test: rwlock2 rwlock4 :test: rwlock3 rwlock5 :test: rwlock4 rwlock6 :test: rwlock5 +rwlock7 :test: rwlock6 +rwlock8 :test: rwlock7 rwlock2_t :test: rwlock2 rwlock3_t :test: rwlock2_t rwlock4_t :test: rwlock3_t diff --git a/implement.h b/implement.h index f71f506..9cb3cb3 100644 --- a/implement.h +++ b/implement.h @@ -172,6 +172,7 @@ struct sem_t_ CRITICAL_SECTION sem_lock_cs; HANDLE event; #else /* NEED_SEM */ + int value; HANDLE sem; #endif /* NEED_SEM */ }; diff --git a/ptw32_semwait.c b/ptw32_semwait.c index 38f31f5..2e9c883 100644 --- a/ptw32_semwait.c +++ b/ptw32_semwait.c @@ -83,7 +83,15 @@ ptw32_semwait (sem_t * sem) #else /* NEED_SEM */ - status = WaitForSingleObject ((*sem)->sem, INFINITE); + if (InterlockedDecrement((LPLONG) &(*sem)->value) < 0) + { + /* Must wait */ + status = WaitForSingleObject ((*sem)->sem, INFINITE); + } + else + { + return 0; + } #endif @@ -100,6 +108,9 @@ ptw32_semwait (sem_t * sem) } else { +#ifndef NEED_SEM + (void) InterlockedIncrement((LPLONG) &(*sem)->value); +#endif result = EINVAL; } } diff --git a/sem_getvalue.c b/sem_getvalue.c index c34739a..be80bea 100644 --- a/sem_getvalue.c +++ b/sem_getvalue.c @@ -77,15 +77,14 @@ sem_getvalue (sem_t * sem, int *sval) * pointed to by sem in the int pointed to by sval. */ { - int result = 0; - long value; - if (sem == NULL || *sem == NULL || sval == NULL) { - result = EINVAL; + errno = EINVAL; + return -1; } else { + long value; register sem_t s = *sem; #ifdef NEED_SEM @@ -96,59 +95,12 @@ sem_getvalue (sem_t * sem, int *sval) #else - /* - * There appears to be NO atomic means of determining the - * value of the semaphore. Using only ReleaseSemaphore() - * with either a zero or oversized count parameter has been - * suggested but this trick doesn't produce consistent results - * across Windows versions (the technique uses values that are - * knowingly illegal but hopes to extract the current value - * anyway - the zero parameter appears to work for Win9x but - * neither work reliably for WinNT). - * - * The intrusive method below will give false results - * at times but it at least errs on the side of - * caution. Competing threads might occasionally believe - * the semaphore has a count of one less than it actually - * would have and possibly block momentarily unecessarily, - * but they will never see a higher semaphore value than - * there should be. - * - * - * Multiple threads calling sem_getvalue() at the same time - * may not all return the same value (assuming no calls to - * other semaphore routines). They will always return the - * correct value or a lesser value. This problem could be fixed - * with a global or a per-semaphore critical section here. - * - * An equally approximate but IMO slightly riskier approach - * would be to keep a separate per-semaphore counter and - * decrement/increment it inside of sem_wait() and sem_post() - * etc using the Interlocked* functions. - */ - if (WaitForSingleObject (s->sem, 0) == WAIT_TIMEOUT) - { - /* Failed - must be zero */ - value = 0; - } - else - { - /* Decremented semaphore - release it and note the value */ - (void) ReleaseSemaphore (s->sem, 1L, &value); - value++; - } + value = s->value; #endif - } - - if (result != 0) - { - errno = result; - return -1; + *sval = value; + return 0; } - *sval = value; - return 0; - } /* sem_getvalue */ diff --git a/sem_init.c b/sem_init.c index eca0026..af0e285 100644 --- a/sem_init.c +++ b/sem_init.c @@ -127,8 +127,9 @@ sem_init (sem_t * sem, int pshared, unsigned int value) #else /* NEED_SEM */ + s->value = value; s->sem = CreateSemaphore (NULL, /* Always NULL */ - (long) value, /* Initial value */ + (long) 0, /* Force threads to wait */ (long) _POSIX_SEM_VALUE_MAX, /* Maximum value */ NULL); /* Name */ diff --git a/sem_post.c b/sem_post.c index 3281643..1d607e8 100644 --- a/sem_post.c +++ b/sem_post.c @@ -85,7 +85,8 @@ sem_post (sem_t * sem) #else /* NEED_SEM */ - else if (!ReleaseSemaphore ((*sem)->sem, 1, 0)) + else if (InterlockedExchangeAdd((LPLONG) &(*sem)->value, (LONG) 1) < 0 + && !ReleaseSemaphore((*sem)->sem, 1, NULL)) #endif /* NEED_SEM */ diff --git a/sem_post_multiple.c b/sem_post_multiple.c index 43c71a6..e30e01a 100644 --- a/sem_post_multiple.c +++ b/sem_post_multiple.c @@ -76,6 +76,9 @@ sem_post_multiple (sem_t * sem, int count) */ { int result = 0; +#ifndef NEED_SEM + long waiters; +#endif if (sem == NULL || *sem == NULL || count <= 0) { @@ -88,7 +91,8 @@ sem_post_multiple (sem_t * sem, int count) #else /* NEED_SEM */ - else if (!ReleaseSemaphore ((*sem)->sem, count, 0)) + else if ((waiters = -InterlockedExchangeAdd((LPLONG) &(*sem)->value, (LONG) count)) > 0 + && !ReleaseSemaphore((*sem)->sem, (waiters<=count)?waiters:count, 0)) #endif /* NEED_SEM */ diff --git a/sem_timedwait.c b/sem_timedwait.c index e374006..e7fa55c 100644 --- a/sem_timedwait.c +++ b/sem_timedwait.c @@ -189,7 +189,15 @@ sem_timedwait (sem_t * sem, const struct timespec *abstime) #else /* NEED_SEM */ - result = (pthreadCancelableTimedWait ((*sem)->sem, milliseconds)); + if (InterlockedDecrement((LPLONG) &(*sem)->value) < 0) + { + /* Must wait */ + result = pthreadCancelableTimedWait ((*sem)->sem, milliseconds); + if (result != 0) + { + (void) InterlockedIncrement((LPLONG) &(*sem)->value); + } + } #endif diff --git a/sem_trywait.c b/sem_trywait.c index 342ee06..253c10e 100644 --- a/sem_trywait.c +++ b/sem_trywait.c @@ -92,8 +92,9 @@ sem_trywait (sem_t * sem) { result = EINVAL; } - else if (WaitForSingleObject ((*sem)->sem, 0) == WAIT_TIMEOUT) + else if (InterlockedDecrement((LPLONG) &(*sem)->value) < 0) { + (void) InterlockedIncrement((LPLONG) &(*sem)->value); result = EAGAIN; } diff --git a/sem_wait.c b/sem_wait.c index e2e171c..0c2ed59 100644 --- a/sem_wait.c +++ b/sem_wait.c @@ -92,8 +92,14 @@ sem_wait (sem_t * sem) #else /* NEED_SEM */ - result = pthreadCancelableWait ((*sem)->sem); - + sem_t s = *sem; + + if (InterlockedDecrement((LPLONG) &s->value) < 0) + { + /* Must wait */ + result = pthreadCancelableWait (s->sem); + } + #endif /* NEED_SEM */ } diff --git a/tests/ChangeLog b/tests/ChangeLog index f55b2b6..65d52f1 100644 --- a/tests/ChangeLog +++ b/tests/ChangeLog @@ -1,8 +1,12 @@ -2004-10-14 Ross Johnson +2004-10-19 Ross Johnson - * rwlock7.c (main): Tidy up statistics reporting; randomise - update accesses. - * rwlock8.c: New test. + * semaphore3.c: New test. + +2004-10-14 Ross Johnson + + * rwlock7.c (main): Tidy up statistics reporting; randomise + update accesses. + * rwlock8.c: New test. 2004-09-08 Alexandre Girao diff --git a/tests/GNUmakefile b/tests/GNUmakefile index 0979fda..aecd718 100644 --- a/tests/GNUmakefile +++ b/tests/GNUmakefile @@ -70,7 +70,8 @@ COPYFILES = $(HDR) $(LIB) $(DLL) $(QAPC) # stop. TESTS = sizes loadfree \ - self1 mutex5 mutex1 mutex1e mutex1n mutex1r semaphore1 semaphore2 \ + self1 mutex5 mutex1 mutex1e mutex1n mutex1r \ + semaphore1 semaphore2 semaphore3 \ condvar1 condvar1_1 condvar1_2 condvar2 condvar2_1 exit1 \ create1 create2 reuse1 reuse2 equal1 \ kill1 valid1 valid2 \ @@ -251,6 +252,7 @@ self1.pass: self2.pass: create1.pass semaphore1.pass: semaphore2.pass: +semaphore3.pass: semaphore2.pass sizes.pass: spin1.pass: spin2.pass: spin1.pass diff --git a/tests/Makefile b/tests/Makefile index 535347f..71154b3 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -80,8 +80,10 @@ EHFLAGS = # stop. PASSES= sizes.pass loadfree.pass \ - semaphore1.pass semaphore2.pass self1.pass mutex5.pass \ - mutex1.pass mutex1n.pass mutex1e.pass mutex1r.pass mutex2.pass mutex3.pass \ + self1.pass mutex5.pass \ + mutex1.pass mutex1n.pass mutex1e.pass mutex1r.pass \ + semaphore1.pass semaphore2.pass semaphore3.pass \ + mutex2.pass mutex3.pass \ mutex2r.pass mutex2e.pass mutex3r.pass mutex3e.pass \ condvar1.pass condvar1_1.pass condvar1_2.pass condvar2.pass condvar2_1.pass \ exit1.pass create1.pass create2.pass reuse1.pass reuse2.pass equal1.pass \ @@ -100,7 +102,7 @@ PASSES= sizes.pass loadfree.pass \ condvar4.pass condvar5.pass condvar6.pass \ condvar7.pass condvar8.pass condvar9.pass \ errno1.pass \ - rwlock1.pass rwlock2.pass rwlock3.pass rwlock4.pass \ + rwlock1.pass rwlock2.pass rwlock3.pass rwlock4.pass \ rwlock5.pass rwlock6.pass rwlock7.pass rwlock8.pass \ rwlock2_t.pass rwlock3_t.pass rwlock4_t.pass rwlock5_t.pass rwlock6_t.pass rwlock6_t2.pass \ context1.pass \ diff --git a/tests/semaphore2.c b/tests/semaphore2.c index 8e264ea..e3663d9 100644 --- a/tests/semaphore2.c +++ b/tests/semaphore2.c @@ -73,7 +73,7 @@ #include "test.h" -#define MAX_COUNT 100000 +#define MAX_COUNT 100 int main() diff --git a/tests/semaphore3.c b/tests/semaphore3.c new file mode 100644 index 0000000..c6570f8 --- /dev/null +++ b/tests/semaphore3.c @@ -0,0 +1,120 @@ +/* + * File: semaphore3.c + * + * + * -------------------------------------------------------------------------- + * + * Pthreads-win32 - POSIX Threads Library for Win32 + * Copyright(C) 1998 John E. Bossom + * Copyright(C) 1999,2003 Pthreads-win32 contributors + * + * Contact Email: rpj@callisto.canberra.edu.au + * + * The current list of contributors is contained + * in the file CONTRIBUTORS included with the source + * code distribution. The list can also be seen at the + * following World Wide Web location: + * http://sources.redhat.com/pthreads-win32/contributors.html + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library in the file COPYING.LIB; + * if not, write to the Free Software Foundation, Inc., + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * -------------------------------------------------------------------------- + * + * Test Synopsis: Verify sem_getvalue returns the correct number of waiters. + * - + * + * Test Method (Validation or Falsification): + * - Validation + * + * Requirements Tested: + * - + * + * Features Tested: + * - + * + * Cases Tested: + * - + * + * Description: + * - + * + * Environment: + * - + * + * Input: + * - None. + * + * Output: + * - File name, Line number, and failed expression on failure. + * - No output on success. + * + * Assumptions: + * - + * + * Pass Criteria: + * - Process returns zero exit status. + * + * Fail Criteria: + * - Process returns non-zero exit status. + */ + +#include "test.h" + +#define MAX_COUNT 100 + +sem_t s; + +void * +thr (void * arg) +{ + assert(pthread_detach(pthread_self()) == 0); + assert(sem_wait(&s) == 0); + + return NULL; +} + +int +main() +{ + int value = 0; + int i; + pthread_t t[MAX_COUNT]; + + assert(sem_init(&s, PTHREAD_PROCESS_PRIVATE, 0) == 0); + assert(sem_getvalue(&s, &value) == 0); + assert(value == 0); +// printf("Value = %ld\n", value); + + for (i = 1; i <= MAX_COUNT; i++) + { + assert(pthread_create(&t[i-1], NULL, thr, NULL) == 0); + sched_yield(); + assert(sem_getvalue(&s, &value) == 0); +// printf("Value = %ld\n", value); + assert(-value == i); + } + + for (i = MAX_COUNT - 1; i >= 0; i--) + { + assert(sem_post(&s) == 0); + assert(sem_getvalue(&s, &value) == 0); +// printf("Value = %ld\n", value); + assert(-value == i); + } + + return 0; +} + -- cgit v1.2.3