From 8c8bcc5d1737002a9d153105c16b262d2e201efa Mon Sep 17 00:00:00 2001
From: rpj <rpj>
Date: Tue, 19 Oct 2004 13:24:40 +0000
Subject: Semaphore speedups - alpha, but passes testsuite

---
 ChangeLog           | 107 ++++++++++++++++++++++++++--------------------
 Nmakefile.tests     |  12 +++++-
 implement.h         |   1 +
 ptw32_semwait.c     |  13 +++++-
 sem_getvalue.c      |  60 +++-----------------------
 sem_init.c          |   3 +-
 sem_post.c          |   3 +-
 sem_post_multiple.c |   6 ++-
 sem_timedwait.c     |  10 ++++-
 sem_trywait.c       |   3 +-
 sem_wait.c          |  10 ++++-
 tests/ChangeLog     |  12 ++++--
 tests/GNUmakefile   |   4 +-
 tests/Makefile      |   8 ++--
 tests/semaphore2.c  |   2 +-
 tests/semaphore3.c  | 120 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 16 files changed, 255 insertions(+), 119 deletions(-)
 create mode 100644 tests/semaphore3.c
diff --git a/ChangeLog b/ChangeLog
index c1fe46f..88756a2 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,40 +1,55 @@
-2004-10-15  Ross Johnson  <rpj at callisto.canberra.edu.au>
+2004-10-19  Ross Johnson  <rpj at callisto.canberra.edu.au>
 
-	* implement.h (othread_mutex_t_): Use an event in place of
-	the POSIX semaphore.
-	* pthread_mutex_init.c: Create the event; remove semaphore init.
-	* pthread_mutex_destroy.c: Delete the event.
-	* pthread_mutex_lock.c: Replace the semaphore wait with the event wait.
-	* pthread_mutex_trylock.c: Likewise.
-	* pthread_mutex_timedlock.c: Likewise.
-	* pthread_mutex_unlock.c: Set the event.
-	
+	* sem_init.c (sem_init): New semaphore model based on the same idea
+	as mutexes, i.e. user space interlocked check to avoid 
+	unnecessarily entering kernel space. Wraps the Win32 semaphore and
+	keeps it's own counter.
+	* sem_wait.c (sem_wait): Implemented user space check model.
+	* sem_post.c (sem_post): Likewise.
+	* sem_trywait.c (sem_trywait): Likewise.
+	* sem_timedwait.c (sem_timedwait): Likewise.
+	* sem_post_multiple.c (sem_post_multiple): Likewise.
+	* sem_getvalue.c (sem_getvalue): Likewise.
+	* ptw32_semwait.c (ptw32_semwait): Likewise.
+	* implement.h (sem_t_): Add counter element.
+
+2004-10-15  Ross Johnson  <rpj at callisto.canberra.edu.au>
+
+	* implement.h (othread_mutex_t_): Use an event in place of
+	the POSIX semaphore.
+	* pthread_mutex_init.c: Create the event; remove semaphore init.
+	* pthread_mutex_destroy.c: Delete the event.
+	* pthread_mutex_lock.c: Replace the semaphore wait with the event wait.
+	* pthread_mutex_trylock.c: Likewise.
+	* pthread_mutex_timedlock.c: Likewise.
+	* pthread_mutex_unlock.c: Set the event.
+	
 2004-10-14  Ross Johnson  <rpj at callisto.canberra.edu.au>
 
-	* pthread_mutex_lock.c (pthread_mutex_lock): New algorithm using
-	Terekhov's xchg based variation of Drepper's cmpxchg model.
-	Theoretically, xchg uses fewer clock cycles than cmpxchg (using IA-32
-	as a reference), however, in my opinion bus locking dominates the
-	equation on smp systems, so the model with the least number of bus
-	lock operations in the execution path should win, which is Terekhov's
-	variant. On IA-32 uni-processor systems, it's faster to use the
-	CMPXCHG instruction without locking the bus than to use the XCHG
-	instruction, which always locks the bus. This makes the two variants
-	equal for the non-contended lock (fast lane) execution path on up
-	IA-32. Testing shows that the xchg variant is faster on up IA-32 as
-	well if the test forces higher lock contention frequency, even though
-	kernel calls should be dominating the times (on up IA-32, both
-	variants used CMPXCHG instructions and neither locked the bus).
+	* pthread_mutex_lock.c (pthread_mutex_lock): New algorithm using
+	Terekhov's xchg based variation of Drepper's cmpxchg model.
+	Theoretically, xchg uses fewer clock cycles than cmpxchg (using IA-32
+	as a reference), however, in my opinion bus locking dominates the
+	equation on smp systems, so the model with the least number of bus
+	lock operations in the execution path should win, which is Terekhov's
+	variant. On IA-32 uni-processor systems, it's faster to use the
+	CMPXCHG instruction without locking the bus than to use the XCHG
+	instruction, which always locks the bus. This makes the two variants
+	equal for the non-contended lock (fast lane) execution path on up
+	IA-32. Testing shows that the xchg variant is faster on up IA-32 as
+	well if the test forces higher lock contention frequency, even though
+	kernel calls should be dominating the times (on up IA-32, both
+	variants used CMPXCHG instructions and neither locked the bus).
 	* pthread_mutex_timedlock.c pthread_mutex_timedlock(): Similarly.
 	* pthread_mutex_trylock.c (pthread_mutex_trylock): Similarly.
 	* pthread_mutex_unlock.c (pthread_mutex_unlock): Similarly.
-	* ptw32_InterlockedCompareExchange.c (ptw32_InterlockExchange): New
-	function.
-	(PTW32_INTERLOCKED_EXCHANGE): Sets up macro to use inlined
+	* ptw32_InterlockedCompareExchange.c (ptw32_InterlockExchange): New
+	function.
+	(PTW32_INTERLOCKED_EXCHANGE): Sets up macro to use inlined
 	ptw32_InterlockedExchange.
-	* implement.h (PTW32_INTERLOCKED_EXCHANGE): Set default to
+	* implement.h (PTW32_INTERLOCKED_EXCHANGE): Set default to
 	InterlockedExchange().
-	* Makefile: Building using /Ob2 so that asm sections within inline
+	* Makefile: Building using /Ob2 so that asm sections within inline
 	functions are inlined.
 
 2004-10-08  Ross Johnson  <rpj at callisto.canberra.edu.au>
@@ -42,16 +57,16 @@
 	* pthread_mutex_destroy.c (pthread_mutex_destroy): Critical Section
 	element is no longer required.
 	* pthread_mutex_init.c (pthread_mutex_init): Likewise.
-	* pthread_mutex_lock.c (pthread_mutex_lock): New algorithm following
-	Drepper's paper at http://people.redhat.com/drepper/futex.pdf, but
-	using the existing semaphore in place of the futex described in the
+	* pthread_mutex_lock.c (pthread_mutex_lock): New algorithm following
+	Drepper's paper at http://people.redhat.com/drepper/futex.pdf, but
+	using the existing semaphore in place of the futex described in the
 	paper. Idea suggested by Alexander Terekhov - see:
 	http://sources.redhat.com/ml/pthreads-win32/2003/msg00108.html
 	* pthread_mutex_timedlock.c pthread_mutex_timedlock(): Similarly.
 	* pthread_mutex_trylock.c (pthread_mutex_trylock): Similarly.
 	* pthread_mutex_unlock.c (pthread_mutex_unlock): Similarly.
-	* pthread_barrier_wait.c (pthread_barrier_wait): Use inlined version
-	of InterlockedCompareExchange() if possible - determined at
+	* pthread_barrier_wait.c (pthread_barrier_wait): Use inlined version
+	of InterlockedCompareExchange() if possible - determined at
 	build-time.
 	* pthread_spin_destroy.c pthread_spin_destroy(): Likewise.
 	* pthread_spin_lock.c pthread_spin_lock():Likewise.
@@ -59,29 +74,29 @@
 	* pthread_spin_unlock.c (pthread_spin_unlock):Likewise.
 	* ptw32_InterlockedCompareExchange.c: Sets up macro for inlined use.
 	* implement.h (pthread_mutex_t_): Remove Critical Section element.
-	(PTW32_INTERLOCKED_COMPARE_EXCHANGE): Set to default non-inlined
+	(PTW32_INTERLOCKED_COMPARE_EXCHANGE): Set to default non-inlined
 	version of InterlockedCompareExchange().
-	* private.c: Include ptw32_InterlockedCompareExchange.c first for
+	* private.c: Include ptw32_InterlockedCompareExchange.c first for
 	inlining.
-	* GNUmakefile: Add commandline option to use inlined
+	* GNUmakefile: Add commandline option to use inlined
 	InterlockedCompareExchange().
 	* Makefile: Likewise.
 
 2004-09-27  Ross Johnson  <rpj at callisto.canberra.edu.au>
 
-	* pthread_mutex_lock.c (pthread_mutex_lock): Separate
-	PTHREAD_MUTEX_NORMAL logic since we do not need to keep or check some
-	state required by other mutex types; do not check mutex pointer arg
-	for validity - leave this to the system since we are only checking
-	for NULL pointers. This should improve speed of NORMAL mutexes and
+	* pthread_mutex_lock.c (pthread_mutex_lock): Separate
+	PTHREAD_MUTEX_NORMAL logic since we do not need to keep or check some
+	state required by other mutex types; do not check mutex pointer arg
+	for validity - leave this to the system since we are only checking
+	for NULL pointers. This should improve speed of NORMAL mutexes and
 	marginally improve speed of other type.
 	* pthread_mutex_trylock.c (pthread_mutex_trylock): Likewise.
 	* pthread_mutex_unlock.c (pthread_mutex_unlock): Likewise; also avoid
-	entering the critical section for the no-waiters case, with approx.
+	entering the critical section for the no-waiters case, with approx.
 	30% reduction in lock/unlock overhead for this case.
 	* pthread_mutex_timedlock.c (pthread_mutex_timedlock): Likewise; also
-	no longer keeps mutex if post-timeout second attempt succeeds - this
-	will assist applications that wish to impose strict lock deadlines,
+	no longer keeps mutex if post-timeout second attempt succeeds - this
+	will assist applications that wish to impose strict lock deadlines,
 	rather than simply to escape from frozen locks.
 
 2004-09-09  Tristan Savatier  <tristan at mpegtv.com>
@@ -92,7 +107,7 @@
 	[Maintainer's note: the race condition is harmless on SPU systems
 	and only a problem on MPU systems if concurrent access results in an
 	exception (presumably generated by a hardware interrupt). There are
-	other instances of similar harmless race conditions that have not
+	other instances of similar harmless race conditions that have not
 	been identified as issues.]
 
 2004-09-09  Ross Johnson  <rpj at callisto.canberra.edu.au>
diff --git a/Nmakefile.tests b/Nmakefile.tests
index 8f2f527..4647273 100644
--- a/Nmakefile.tests
+++ b/Nmakefile.tests
@@ -93,6 +93,7 @@ rwlock4::	rwlock4.c
 rwlock5::	rwlock5.c
 rwlock6::	rwlock6.c
 rwlock7::	rwlock7.c
+rwlock8::	rwlock8.c
 rwlock2_t::	rwlock2_t.c
 rwlock3_t::	rwlock3_t.c
 rwlock4_t::	rwlock4_t.c
@@ -101,6 +102,7 @@ rwlock6_t::	rwlock6_t.c
 rwlock6_t2::	rwlock6_t2.c
 semaphore1::	semaphore1.c
 semaphore2::	semaphore2.c
+semaphore3::	semaphore3.c
 context1::	context1.c
 cancel3::	cancel3.c
 cancel4::	cancel4.c
@@ -138,10 +140,14 @@ cancel9::	cancel9.c
 
 sizes:		:test:	sizes
 loadfree:	:test:
-semaphore1	:test:	loadfree
-semaphore2	:test:	loadfree
 mutex5		:test:	loadfree
 mutex1		:test:	loadfree
+mutex1n		:test:	loadfree
+mutex1r		:test:	loadfree
+mutex1e		:test:	loadfree
+semaphore1	:test:	loadfree
+semaphore2	:test:	loadfree
+semaphore3	:test:	loadfree
 mutex2		:test:	loadfree
 mutex2r		:test:	loadfree
 mutex2e		:test:	loadfree
@@ -209,6 +215,8 @@ rwlock3		:test:	rwlock2
 rwlock4		:test:	rwlock3
 rwlock5		:test:	rwlock4
 rwlock6		:test:	rwlock5
+rwlock7		:test:	rwlock6
+rwlock8		:test:	rwlock7
 rwlock2_t	:test:	rwlock2
 rwlock3_t	:test:	rwlock2_t
 rwlock4_t	:test:	rwlock3_t
diff --git a/implement.h b/implement.h
index f71f506..9cb3cb3 100644
--- a/implement.h
+++ b/implement.h
@@ -172,6 +172,7 @@ struct sem_t_
   CRITICAL_SECTION sem_lock_cs;
   HANDLE event;
 #else				/* NEED_SEM */
+  int value;
   HANDLE sem;
 #endif				/* NEED_SEM */
 };
diff --git a/ptw32_semwait.c b/ptw32_semwait.c
index 38f31f5..2e9c883 100644
--- a/ptw32_semwait.c
+++ b/ptw32_semwait.c
@@ -83,7 +83,15 @@ ptw32_semwait (sem_t * sem)
 
 #else /* NEED_SEM */
 
-      status = WaitForSingleObject ((*sem)->sem, INFINITE);
+      if (InterlockedDecrement((LPLONG) &(*sem)->value) < 0)
+        {
+          /* Must wait */
+          status = WaitForSingleObject ((*sem)->sem, INFINITE);
+        }
+      else
+	{
+	  return 0;
+	}
 
 #endif
 
@@ -100,6 +108,9 @@ ptw32_semwait (sem_t * sem)
 	}
       else
 	{
+#ifndef NEED_SEM
+	  (void) InterlockedIncrement((LPLONG) &(*sem)->value);
+#endif
 	  result = EINVAL;
 	}
     }
diff --git a/sem_getvalue.c b/sem_getvalue.c
index c34739a..be80bea 100644
--- a/sem_getvalue.c
+++ b/sem_getvalue.c
@@ -77,15 +77,14 @@ sem_getvalue (sem_t * sem, int *sval)
       *      pointed to by sem in the int pointed to by sval.
       */
 {
-  int result = 0;
-  long value;
-
   if (sem == NULL || *sem == NULL || sval == NULL)
     {
-      result = EINVAL;
+      errno = EINVAL;
+      return -1;
     }
   else
     {
+      long value;
       register sem_t s = *sem;
 
 #ifdef NEED_SEM
@@ -96,59 +95,12 @@ sem_getvalue (sem_t * sem, int *sval)
 
 #else
 
-      /*
-       * There appears to be NO atomic means of determining the
-       * value of the semaphore. Using only ReleaseSemaphore()
-       * with either a zero or oversized count parameter has been
-       * suggested but this trick doesn't produce consistent results
-       * across Windows versions (the technique uses values that are
-       * knowingly illegal but hopes to extract the current value
-       * anyway - the zero parameter appears to work for Win9x but
-       * neither work reliably for WinNT).
-       *
-       * The intrusive method below will give false results
-       * at times but it at least errs on the side of
-       * caution. Competing threads might occasionally believe
-       * the semaphore has a count of one less than it actually
-       * would have and possibly block momentarily unecessarily,
-       * but they will never see a higher semaphore value than
-       * there should be.
-       *
-       *
-       * Multiple threads calling sem_getvalue() at the same time
-       * may not all return the same value (assuming no calls to
-       * other semaphore routines). They will always return the
-       * correct value or a lesser value. This problem could be fixed
-       * with a global or a per-semaphore critical section here.
-       *
-       * An equally approximate but IMO slightly riskier approach
-       * would be to keep a separate per-semaphore counter and
-       * decrement/increment it inside of sem_wait() and sem_post()
-       * etc using the Interlocked* functions.
-       */
-      if (WaitForSingleObject (s->sem, 0) == WAIT_TIMEOUT)
-	{
-	  /* Failed - must be zero */
-	  value = 0;
-	}
-      else
-	{
-	  /* Decremented semaphore - release it and note the value */
-	  (void) ReleaseSemaphore (s->sem, 1L, &value);
-	  value++;
-	}
+      value = s->value;
 
 #endif
-    }
 
-
-  if (result != 0)
-    {
-      errno = result;
-      return -1;
+      *sval = value;
+      return 0;
     }
 
-  *sval = value;
-  return 0;
-
 }				/* sem_getvalue */
diff --git a/sem_init.c b/sem_init.c
index eca0026..af0e285 100644
--- a/sem_init.c
+++ b/sem_init.c
@@ -127,8 +127,9 @@ sem_init (sem_t * sem, int pshared, unsigned int value)
 
 #else /* NEED_SEM */
 
+	  s->value = value;
 	  s->sem = CreateSemaphore (NULL,	/* Always NULL */
-				    (long) value,	/* Initial value */
+				    (long) 0,	/* Force threads to wait */
 				    (long) _POSIX_SEM_VALUE_MAX,	/* Maximum value */
 				    NULL);	/* Name */
 
diff --git a/sem_post.c b/sem_post.c
index 3281643..1d607e8 100644
--- a/sem_post.c
+++ b/sem_post.c
@@ -85,7 +85,8 @@ sem_post (sem_t * sem)
 
 #else /* NEED_SEM */
 
-  else if (!ReleaseSemaphore ((*sem)->sem, 1, 0))
+    else if (InterlockedExchangeAdd((LPLONG) &(*sem)->value, (LONG) 1) < 0
+	     && !ReleaseSemaphore((*sem)->sem, 1, NULL))
 
 #endif /* NEED_SEM */
 
diff --git a/sem_post_multiple.c b/sem_post_multiple.c
index 43c71a6..e30e01a 100644
--- a/sem_post_multiple.c
+++ b/sem_post_multiple.c
@@ -76,6 +76,9 @@ sem_post_multiple (sem_t * sem, int count)
       */
 {
   int result = 0;
+#ifndef NEED_SEM
+  long waiters;
+#endif
 
   if (sem == NULL || *sem == NULL || count <= 0)
     {
@@ -88,7 +91,8 @@ sem_post_multiple (sem_t * sem, int count)
 
 #else /* NEED_SEM */
 
-  else if (!ReleaseSemaphore ((*sem)->sem, count, 0))
+    else if ((waiters = -InterlockedExchangeAdd((LPLONG) &(*sem)->value, (LONG) count)) > 0
+	     && !ReleaseSemaphore((*sem)->sem,  (waiters<=count)?waiters:count, 0))
 
 #endif /* NEED_SEM */
 
diff --git a/sem_timedwait.c b/sem_timedwait.c
index e374006..e7fa55c 100644
--- a/sem_timedwait.c
+++ b/sem_timedwait.c
@@ -189,7 +189,15 @@ sem_timedwait (sem_t * sem, const struct timespec *abstime)
 
 #else /* NEED_SEM */
 
-      result = (pthreadCancelableTimedWait ((*sem)->sem, milliseconds));
+      if (InterlockedDecrement((LPLONG) &(*sem)->value) < 0)
+        {
+          /* Must wait */
+	  result = pthreadCancelableTimedWait ((*sem)->sem, milliseconds);
+	  if (result != 0)
+	    {
+              (void) InterlockedIncrement((LPLONG) &(*sem)->value);
+	    }
+        }
 
 #endif
 
diff --git a/sem_trywait.c b/sem_trywait.c
index 342ee06..253c10e 100644
--- a/sem_trywait.c
+++ b/sem_trywait.c
@@ -92,8 +92,9 @@ sem_trywait (sem_t * sem)
     {
       result = EINVAL;
     }
-  else if (WaitForSingleObject ((*sem)->sem, 0) == WAIT_TIMEOUT)
+  else if (InterlockedDecrement((LPLONG) &(*sem)->value) < 0)
     {
+      (void) InterlockedIncrement((LPLONG) &(*sem)->value);
       result = EAGAIN;
     }
 
diff --git a/sem_wait.c b/sem_wait.c
index e2e171c..0c2ed59 100644
--- a/sem_wait.c
+++ b/sem_wait.c
@@ -92,8 +92,14 @@ sem_wait (sem_t * sem)
 
 #else /* NEED_SEM */
 
-      result = pthreadCancelableWait ((*sem)->sem);
-
+      sem_t s = *sem;
+
+      if (InterlockedDecrement((LPLONG) &s->value) < 0)
+	{
+	  /* Must wait */
+	  result = pthreadCancelableWait (s->sem);
+	}
+     
 #endif /* NEED_SEM */
 
     }
diff --git a/tests/ChangeLog b/tests/ChangeLog
index f55b2b6..65d52f1 100644
--- a/tests/ChangeLog
+++ b/tests/ChangeLog
@@ -1,8 +1,12 @@
-2004-10-14  Ross Johnson  <rpj@callisto.canberra.edu.au>
+2004-10-19  Ross Johnson  <rpj@callisto.canberra.edu.au>
 
-	* rwlock7.c (main): Tidy up statistics reporting; randomise
-	update accesses.
-	* rwlock8.c: New test.
+	* semaphore3.c: New test.
+
+2004-10-14  Ross Johnson  <rpj@callisto.canberra.edu.au>
+
+	* rwlock7.c (main): Tidy up statistics reporting; randomise
+	update accesses.
+	* rwlock8.c: New test.
 
 2004-09-08  Alexandre Girao  <alexgirao@gmail.com>
 
diff --git a/tests/GNUmakefile b/tests/GNUmakefile
index 0979fda..aecd718 100644
--- a/tests/GNUmakefile
+++ b/tests/GNUmakefile
@@ -70,7 +70,8 @@ COPYFILES	= $(HDR) $(LIB) $(DLL) $(QAPC)
 # stop.
 
 TESTS	= sizes loadfree \
-	  self1 mutex5 mutex1 mutex1e mutex1n mutex1r semaphore1 semaphore2 \
+	  self1 mutex5 mutex1 mutex1e mutex1n mutex1r \
+	  semaphore1 semaphore2 semaphore3 \
 	  condvar1 condvar1_1 condvar1_2 condvar2 condvar2_1 exit1 \
 	  create1 create2 reuse1 reuse2 equal1 \
 	  kill1 valid1 valid2 \
@@ -251,6 +252,7 @@ self1.pass:
 self2.pass: create1.pass
 semaphore1.pass:
 semaphore2.pass:
+semaphore3.pass: semaphore2.pass
 sizes.pass:
 spin1.pass:
 spin2.pass: spin1.pass
diff --git a/tests/Makefile b/tests/Makefile
index 535347f..71154b3 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -80,8 +80,10 @@ EHFLAGS	=
 # stop.
 
 PASSES= sizes.pass  loadfree.pass \
-	  semaphore1.pass  semaphore2.pass  self1.pass  mutex5.pass  \
-	  mutex1.pass  mutex1n.pass  mutex1e.pass  mutex1r.pass  mutex2.pass  mutex3.pass  \
+	  self1.pass  mutex5.pass  \
+	  mutex1.pass  mutex1n.pass  mutex1e.pass  mutex1r.pass  \
+	  semaphore1.pass  semaphore2.pass  semaphore3.pass  \
+	  mutex2.pass  mutex3.pass  \
 	  mutex2r.pass  mutex2e.pass  mutex3r.pass  mutex3e.pass  \
 	  condvar1.pass  condvar1_1.pass  condvar1_2.pass  condvar2.pass  condvar2_1.pass  \
 	  exit1.pass  create1.pass  create2.pass  reuse1.pass  reuse2.pass  equal1.pass  \
@@ -100,7 +102,7 @@ PASSES= sizes.pass  loadfree.pass \
 	  condvar4.pass  condvar5.pass  condvar6.pass  \
 	  condvar7.pass  condvar8.pass  condvar9.pass  \
 	  errno1.pass  \
-	  rwlock1.pass  rwlock2.pass  rwlock3.pass  rwlock4.pass  \
+	  rwlock1.pass  rwlock2.pass  rwlock3.pass  rwlock4.pass  \
 	  rwlock5.pass  rwlock6.pass  rwlock7.pass  rwlock8.pass  \
 	  rwlock2_t.pass  rwlock3_t.pass  rwlock4_t.pass  rwlock5_t.pass  rwlock6_t.pass  rwlock6_t2.pass  \
 	  context1.pass  \
diff --git a/tests/semaphore2.c b/tests/semaphore2.c
index 8e264ea..e3663d9 100644
--- a/tests/semaphore2.c
+++ b/tests/semaphore2.c
@@ -73,7 +73,7 @@
 
 #include "test.h"
 
-#define MAX_COUNT 100000
+#define MAX_COUNT 100
 
 int
 main()
diff --git a/tests/semaphore3.c b/tests/semaphore3.c
new file mode 100644
index 0000000..c6570f8
--- /dev/null
+++ b/tests/semaphore3.c
@@ -0,0 +1,120 @@
+/*
+ * File: semaphore3.c
+ *
+ *
+ * --------------------------------------------------------------------------
+ *
+ *      Pthreads-win32 - POSIX Threads Library for Win32
+ *      Copyright(C) 1998 John E. Bossom
+ *      Copyright(C) 1999,2003 Pthreads-win32 contributors
+ * 
+ *      Contact Email: rpj@callisto.canberra.edu.au
+ * 
+ *      The current list of contributors is contained
+ *      in the file CONTRIBUTORS included with the source
+ *      code distribution. The list can also be seen at the
+ *      following World Wide Web location:
+ *      http://sources.redhat.com/pthreads-win32/contributors.html
+ * 
+ *      This library is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU Lesser General Public
+ *      License as published by the Free Software Foundation; either
+ *      version 2 of the License, or (at your option) any later version.
+ * 
+ *      This library is distributed in the hope that it will be useful,
+ *      but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *      Lesser General Public License for more details.
+ * 
+ *      You should have received a copy of the GNU Lesser General Public
+ *      License along with this library in the file COPYING.LIB;
+ *      if not, write to the Free Software Foundation, Inc.,
+ *      59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * --------------------------------------------------------------------------
+ *
+ * Test Synopsis: Verify sem_getvalue returns the correct number of waiters.
+ * - 
+ *
+ * Test Method (Validation or Falsification):
+ * - Validation
+ *
+ * Requirements Tested:
+ * - 
+ *
+ * Features Tested:
+ * - 
+ *
+ * Cases Tested:
+ * -
+ *
+ * Description:
+ * -
+ *
+ * Environment:
+ * -
+ *
+ * Input:
+ * - None.
+ *
+ * Output:
+ * - File name, Line number, and failed expression on failure.
+ * - No output on success.
+ *
+ * Assumptions:
+ * -
+ *
+ * Pass Criteria:
+ * - Process returns zero exit status.
+ *
+ * Fail Criteria:
+ * - Process returns non-zero exit status.
+ */
+
+#include "test.h"
+
+#define MAX_COUNT 100
+
+sem_t s;
+
+void *
+thr (void * arg)
+{
+  assert(pthread_detach(pthread_self()) == 0);
+  assert(sem_wait(&s) == 0);
+
+  return NULL;
+}
+
+int
+main()
+{
+	int value = 0;
+	int i;
+	pthread_t t[MAX_COUNT];
+
+  assert(sem_init(&s, PTHREAD_PROCESS_PRIVATE, 0) == 0);
+	assert(sem_getvalue(&s, &value) == 0);
+	assert(value == 0);
+//	  printf("Value = %ld\n", value);
+
+	for (i = 1; i <= MAX_COUNT; i++)
+		{
+			assert(pthread_create(&t[i-1], NULL, thr, NULL) == 0);
+			sched_yield();
+			assert(sem_getvalue(&s, &value) == 0);
+//			  printf("Value = %ld\n", value);
+			assert(-value == i);
+		}
+
+	for (i = MAX_COUNT - 1; i >= 0; i--)
+		{
+			assert(sem_post(&s) == 0);
+			assert(sem_getvalue(&s, &value) == 0);
+//			  printf("Value = %ld\n", value);
+			assert(-value == i);
+		}
+
+  return 0;
+}
+
-- 
cgit v1.2.3