summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorrpj <rpj>2004-10-16 02:34:44 +0000
committerrpj <rpj>2004-10-16 02:34:44 +0000
commit45b1b8cb2a6588f9316f780d8cefe11c181a9a17 (patch)
tree24753e298d9933d48d764177baf183ef97f04156
parent9da8fdcb33373b4b2e1de2a8b7af3ed4b5811245 (diff)
Mutex speedups cont'd
-rw-r--r--ChangeLog117
-rw-r--r--Makefile6
-rw-r--r--global.c2
-rw-r--r--implement.h28
-rw-r--r--pthread_mutex_destroy.c11
-rw-r--r--pthread_mutex_init.c18
-rw-r--r--pthread_mutex_lock.c73
-rw-r--r--pthread_mutex_timedlock.c147
-rw-r--r--pthread_mutex_trylock.c9
-rw-r--r--pthread_mutex_unlock.c36
-rw-r--r--pthread_win32_attach_detach_np.c32
-rw-r--r--ptw32_InterlockedCompareExchange.c259
-rw-r--r--tests/ChangeLog20
-rw-r--r--tests/GNUmakefile5
-rw-r--r--tests/Makefile4
-rw-r--r--tests/benchtest1.c2
-rw-r--r--tests/rwlock7.c56
-rw-r--r--tests/rwlock8.c205
18 files changed, 702 insertions, 328 deletions
diff --git a/ChangeLog b/ChangeLog
index ec65d84..c1fe46f 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,45 +1,88 @@
-2004-10-08 Ross Johnson <rpj at callisto.canberra.edu.au>
+2004-10-15 Ross Johnson <rpj at callisto.canberra.edu.au>
- * pthread_mutex_destroy.c (pthread_mutex_destroy): Critical Section
- element is no longer required.
- * pthread_mutex_init.c (pthread_mutex_init): Likewise.
- * pthread_mutex_lock.c (pthread_mutex_lock): New algorithm following Drepper's
- paper at http://people.redhat.com/drepper/futex.pdf, but using the existing
- semaphore in place of the futex described in the paper. Idea suggested by
- Alexander Terekhov - see:
- http://sources.redhat.com/ml/pthreads-win32/2003/msg00108.html
- * pthread_mutex_timedlock.c pthread_mutex_timedlock(): Similarly.
- * pthread_mutex_trylock.c (pthread_mutex_trylock): Similarly.
- * pthread_mutex_unlock.c (pthread_mutex_unlock): Similarly.
- * pthread_barrier_wait.c (pthread_barrier_wait): Use inlined version of
- InterlockedCompareExchange() if possible - determined at build-time.
- * pthread_spin_destroy.c pthread_spin_destroy(): Likewise.
- * pthread_spin_lock.c pthread_spin_lock():Likewise.
- * pthread_spin_trylock.c (pthread_spin_trylock):Likewise.
- * pthread_spin_unlock.c (pthread_spin_unlock):Likewise.
- * ptw32_InterlockedCompareExchange.c: Sets up macro for inlined use.
- * implement.h (pthread_mutex_t_): Remove Critical Section element.
- (PTW32_INTERLOCKED_COMPARE_EXCHANGE): Set to default non-inlined version of
- InterlockedCompareExchange().
- * private.c: Include ptw32_InterlockedCompareExchange.c first for inlining.
- * GNUmakefile: Add commandline option to use inlined InterlockedCompareExchange().
- * Makefile: Likewise.
+ * implement.h (othread_mutex_t_): Use an event in place of
+ the POSIX semaphore.
+ * pthread_mutex_init.c: Create the event; remove semaphore init.
+ * pthread_mutex_destroy.c: Delete the event.
+ * pthread_mutex_lock.c: Replace the semaphore wait with the event wait.
+ * pthread_mutex_trylock.c: Likewise.
+ * pthread_mutex_timedlock.c: Likewise.
+ * pthread_mutex_unlock.c: Set the event.
+
+2004-10-14 Ross Johnson <rpj at callisto.canberra.edu.au>
+
+ * pthread_mutex_lock.c (pthread_mutex_lock): New algorithm using
+ Terekhov's xchg based variation of Drepper's cmpxchg model.
+ Theoretically, xchg uses fewer clock cycles than cmpxchg (using IA-32
+ as a reference), however, in my opinion bus locking dominates the
+ equation on smp systems, so the model with the least number of bus
+ lock operations in the execution path should win, which is Terekhov's
+ variant. On IA-32 uni-processor systems, it's faster to use the
+ CMPXCHG instruction without locking the bus than to use the XCHG
+ instruction, which always locks the bus. This makes the two variants
+ equal for the non-contended lock (fast lane) execution path on up
+ IA-32. Testing shows that the xchg variant is faster on up IA-32 as
+ well if the test forces higher lock contention frequency, even though
+ kernel calls should be dominating the times (on up IA-32, both
+ variants used CMPXCHG instructions and neither locked the bus).
+ * pthread_mutex_timedlock.c pthread_mutex_timedlock(): Similarly.
+ * pthread_mutex_trylock.c (pthread_mutex_trylock): Similarly.
+ * pthread_mutex_unlock.c (pthread_mutex_unlock): Similarly.
+ * ptw32_InterlockedCompareExchange.c (ptw32_InterlockExchange): New
+ function.
+ (PTW32_INTERLOCKED_EXCHANGE): Sets up macro to use inlined
+ ptw32_InterlockedExchange.
+ * implement.h (PTW32_INTERLOCKED_EXCHANGE): Set default to
+ InterlockedExchange().
+ * Makefile: Building using /Ob2 so that asm sections within inline
+ functions are inlined.
+
+2004-10-08 Ross Johnson <rpj at callisto.canberra.edu.au>
+
+ * pthread_mutex_destroy.c (pthread_mutex_destroy): Critical Section
+ element is no longer required.
+ * pthread_mutex_init.c (pthread_mutex_init): Likewise.
+ * pthread_mutex_lock.c (pthread_mutex_lock): New algorithm following
+ Drepper's paper at http://people.redhat.com/drepper/futex.pdf, but
+ using the existing semaphore in place of the futex described in the
+ paper. Idea suggested by Alexander Terekhov - see:
+ http://sources.redhat.com/ml/pthreads-win32/2003/msg00108.html
+ * pthread_mutex_timedlock.c pthread_mutex_timedlock(): Similarly.
+ * pthread_mutex_trylock.c (pthread_mutex_trylock): Similarly.
+ * pthread_mutex_unlock.c (pthread_mutex_unlock): Similarly.
+ * pthread_barrier_wait.c (pthread_barrier_wait): Use inlined version
+ of InterlockedCompareExchange() if possible - determined at
+ build-time.
+ * pthread_spin_destroy.c pthread_spin_destroy(): Likewise.
+ * pthread_spin_lock.c pthread_spin_lock():Likewise.
+ * pthread_spin_trylock.c (pthread_spin_trylock):Likewise.
+ * pthread_spin_unlock.c (pthread_spin_unlock):Likewise.
+ * ptw32_InterlockedCompareExchange.c: Sets up macro for inlined use.
+ * implement.h (pthread_mutex_t_): Remove Critical Section element.
+ (PTW32_INTERLOCKED_COMPARE_EXCHANGE): Set to default non-inlined
+ version of InterlockedCompareExchange().
+ * private.c: Include ptw32_InterlockedCompareExchange.c first for
+ inlining.
+ * GNUmakefile: Add commandline option to use inlined
+ InterlockedCompareExchange().
+ * Makefile: Likewise.
2004-09-27 Ross Johnson <rpj at callisto.canberra.edu.au>
- * pthread_mutex_lock.c (pthread_mutex_lock): Separate PTHREAD_MUTEX_NORMAL
- logic since we do not need to keep or check some state required by other
- mutex types; do not check mutex pointer arg for validity - leave this to
- the system since we are only checking for NULL pointers. This should improve
- speed of NORMAL mutexes and marginally improve speed of other type.
+ * pthread_mutex_lock.c (pthread_mutex_lock): Separate
+ PTHREAD_MUTEX_NORMAL logic since we do not need to keep or check some
+ state required by other mutex types; do not check mutex pointer arg
+ for validity - leave this to the system since we are only checking
+ for NULL pointers. This should improve speed of NORMAL mutexes and
+ marginally improve speed of other type.
* pthread_mutex_trylock.c (pthread_mutex_trylock): Likewise.
* pthread_mutex_unlock.c (pthread_mutex_unlock): Likewise; also avoid
- entering the critical section for the no-waiters case, with approx. 30%
- reduction in lock/unlock overhead for this case..
+ entering the critical section for the no-waiters case, with approx.
+ 30% reduction in lock/unlock overhead for this case.
* pthread_mutex_timedlock.c (pthread_mutex_timedlock): Likewise; also
- no longer keeps mutex if post-timeout second attempt succeeds - this will
- assist applications that wish to impose strict lock deadlines, rather than
- simply to escape from frozen locks.
+ no longer keeps mutex if post-timeout second attempt succeeds - this
+ will assist applications that wish to impose strict lock deadlines,
+ rather than simply to escape from frozen locks.
2004-09-09 Tristan Savatier <tristan at mpegtv.com>
* pthread.h (struct pthread_once_t_): Qualify the 'done' element
@@ -49,8 +92,8 @@
[Maintainer's note: the race condition is harmless on SPU systems
and only a problem on MPU systems if concurrent access results in an
exception (presumably generated by a hardware interrupt). There are
- other instances of similar harmless race conditions that have not been
- identified as issues.]
+ other instances of similar harmless race conditions that have not
+ been identified as issues.]
2004-09-09 Ross Johnson <rpj at callisto.canberra.edu.au>
diff --git a/Makefile b/Makefile
index 9fb97bb..f39412e 100644
--- a/Makefile
+++ b/Makefile
@@ -375,13 +375,13 @@ VC:
# inlining optimisation turned on.
#
VCE-inlined:
- @ nmake /nologo EHFLAGS="/O2 /Ob1 $(VCEFLAGS) /DPTW32_BUILD_INLINED" pthreadVCE.stamp
+ @ nmake /nologo EHFLAGS="/O2 /Ob2 $(VCEFLAGS) /DPTW32_BUILD_INLINED" pthreadVCE.stamp
VSE-inlined:
- @ nmake /nologo EHFLAGS="/O2 /Ob1 $(VSEFLAGS) /DPTW32_BUILD_INLINED" pthreadVSE.stamp
+ @ nmake /nologo EHFLAGS="/O2 /Ob2 $(VSEFLAGS) /DPTW32_BUILD_INLINED" pthreadVSE.stamp
VC-inlined:
- @ nmake /nologo EHFLAGS="/O2 /Ob1 $(VCFLAGS) /DPTW32_BUILD_INLINED" pthreadVC.stamp
+ @ nmake /nologo EHFLAGS="/O2 /Ob2 $(VCFLAGS) /DPTW32_BUILD_INLINED" pthreadVC.stamp
realclean: clean
if exist *.dll del *.dll
diff --git a/global.c b/global.c
index fc9ab91..f3f18cb 100644
--- a/global.c
+++ b/global.c
@@ -51,6 +51,8 @@ int ptw32_concurrency = 0;
/* What features have been auto-detaected */
int ptw32_features = 0;
+BOOL ptw32_smp_system = PTW32_TRUE; /* Safer if assumed true initially. */
+
/*
* Function pointer to InterlockedCompareExchange if it exists, otherwise
* it will be set at runtime to a substitute local version with the same
diff --git a/implement.h b/implement.h
index 2e320bb..f71f506 100644
--- a/implement.h
+++ b/implement.h
@@ -182,15 +182,17 @@ struct sem_t_
struct pthread_mutex_t_
{
LONG lock_idx; /* Provides exclusive access to mutex state
- via the Interlocked* mechanism, as well
- as a count of the number of threads
- waiting on the mutex. */
+ via the Interlocked* mechanism.
+ 0: unlocked/free.
+ 1: locked - no other waiters.
+ -1: locked - with possible other waiters.
+ */
int recursive_count; /* Number of unlocks a thread needs to perform
before the lock is released (recursive
mutexes only). */
int kind; /* Mutex type. */
pthread_t ownerThread;
- sem_t wait_sema; /* Mutex release notification to waiting
+ HANDLE event; /* Mutex release notification to waiting
threads. */
};
@@ -451,6 +453,8 @@ extern int ptw32_concurrency;
extern int ptw32_features;
+extern BOOL ptw32_smp_system; /* True: SMP system, False: Uni-processor system */
+
extern CRITICAL_SECTION ptw32_thread_reuse_lock;
extern CRITICAL_SECTION ptw32_mutex_test_init_lock;
extern CRITICAL_SECTION ptw32_cond_list_lock;
@@ -486,7 +490,11 @@ extern "C"
PTW32_INTERLOCKED_LONG value,
PTW32_INTERLOCKED_LONG comparand);
- DWORD
+ LONG WINAPI
+ ptw32_InterlockedExchange (LPLONG location,
+ LONG value);
+
+ DWORD
ptw32_RegisterCancelation (PAPCFUNC callback,
HANDLE threadH, DWORD callback_arg);
@@ -560,8 +568,7 @@ extern "C"
unsigned, void *);
_CRTIMP void __cdecl _endthread (void);
_CRTIMP unsigned long __cdecl _beginthreadex (void *, unsigned,
- unsigned (__stdcall *) (void
- *),
+ unsigned (__stdcall *) (void *),
void *, unsigned, unsigned *);
_CRTIMP void __cdecl _endthreadex (unsigned);
# ifdef __cplusplus
@@ -574,12 +581,17 @@ extern "C"
/*
- * When not building the inlined version of the dll.
+ * Defaults. Could be overridden when building the inlined version of the dll.
+ * See ptw32_InterlockedCompareExchange.c
*/
#ifndef PTW32_INTERLOCKED_COMPARE_EXCHANGE
#define PTW32_INTERLOCKED_COMPARE_EXCHANGE ptw32_interlocked_compare_exchange
#endif
+#ifndef PTW32_INTERLOCKED_EXCHANGE
+#define PTW32_INTERLOCKED_EXCHANGE InterlockedExchange
+#endif
+
/*
* Check for old and new versions of cygwin. See the FAQ file:
diff --git a/pthread_mutex_destroy.c b/pthread_mutex_destroy.c
index 1ff9ebe..4b0043a 100644
--- a/pthread_mutex_destroy.c
+++ b/pthread_mutex_destroy.c
@@ -82,8 +82,15 @@ pthread_mutex_destroy (pthread_mutex_t * mutex)
if (result == 0)
{
- (void) sem_destroy (&mx->wait_sema);
- free (mx);
+ if (!CloseHandle (mx->event))
+ {
+ *mutex = mx;
+ result = EINVAL;
+ }
+ else
+ {
+ free (mx);
+ }
}
else
{
diff --git a/pthread_mutex_init.c b/pthread_mutex_init.c
index fdb6017..0709690 100644
--- a/pthread_mutex_init.c
+++ b/pthread_mutex_init.c
@@ -80,18 +80,22 @@ pthread_mutex_init (pthread_mutex_t * mutex, const pthread_mutexattr_t * attr)
}
else
{
- mx->lock_idx = -1;
+ mx->lock_idx = 0;
mx->recursive_count = 0;
mx->kind = (attr == NULL || *attr == NULL
? PTHREAD_MUTEX_DEFAULT : (*attr)->kind);
mx->ownerThread = NULL;
- if (0 != sem_init (&mx->wait_sema, 0, 0))
- {
- result = EAGAIN;
- free (mx);
- mx = NULL;
- }
+ mx->event = CreateEvent (NULL, PTW32_FALSE, /* manual reset = No */
+ PTW32_FALSE, /* initial state = not signaled */
+ NULL); /* event name */
+
+ if (0 == mx->event)
+ {
+ result = ENOSPC;
+ free (mx);
+ mx = NULL;
+ }
}
*mutex = mx;
diff --git a/pthread_mutex_lock.c b/pthread_mutex_lock.c
index 6695907..7be77b5 100644
--- a/pthread_mutex_lock.c
+++ b/pthread_mutex_lock.c
@@ -44,7 +44,6 @@ int
pthread_mutex_lock (pthread_mutex_t * mutex)
{
int result = 0;
- LONG c;
pthread_mutex_t mx;
/*
@@ -69,40 +68,29 @@ pthread_mutex_lock (pthread_mutex_t * mutex)
if (mx->kind == PTHREAD_MUTEX_NORMAL)
{
- if ((c = (LONG) PTW32_INTERLOCKED_COMPARE_EXCHANGE(
- (PTW32_INTERLOCKED_LPLONG) &mx->lock_idx,
- (PTW32_INTERLOCKED_LONG) 0,
- (PTW32_INTERLOCKED_LONG) -1)) != -1)
+ if ((LONG) PTW32_INTERLOCKED_EXCHANGE(
+ (LPLONG) &mx->lock_idx,
+ (LONG) 1) != 0)
{
- do
+ while ((LONG) PTW32_INTERLOCKED_EXCHANGE(
+ (LPLONG) &mx->lock_idx,
+ (LONG) -1) != 0)
{
- if (c == 1 ||
- (LONG) PTW32_INTERLOCKED_COMPARE_EXCHANGE(
- (PTW32_INTERLOCKED_LPLONG) &mx->lock_idx,
- (PTW32_INTERLOCKED_LONG) 1,
- (PTW32_INTERLOCKED_LONG) 0) != -1)
- {
- if (ptw32_semwait (&mx->wait_sema) != 0)
- {
- result = errno;
- break;
- }
- }
+ if (WAIT_OBJECT_0 != WaitForSingleObject (mx->event, INFINITE))
+ {
+ result = EINVAL;
+ break;
+ }
}
- while ((c = (LONG) PTW32_INTERLOCKED_COMPARE_EXCHANGE(
- (PTW32_INTERLOCKED_LPLONG) &mx->lock_idx,
- (PTW32_INTERLOCKED_LONG) 1,
- (PTW32_INTERLOCKED_LONG) -1)) != -1);
}
}
else
{
pthread_t self = pthread_self();
- if ((c = (LONG) PTW32_INTERLOCKED_COMPARE_EXCHANGE(
- (PTW32_INTERLOCKED_LPLONG) &mx->lock_idx,
- (PTW32_INTERLOCKED_LONG) 0,
- (PTW32_INTERLOCKED_LONG) -1)) == -1)
+ if ((LONG) PTW32_INTERLOCKED_EXCHANGE(
+ (LPLONG) &mx->lock_idx,
+ (LONG) 1) == 0)
{
mx->recursive_count = 1;
mx->ownerThread = self;
@@ -122,30 +110,21 @@ pthread_mutex_lock (pthread_mutex_t * mutex)
}
else
{
- do
+ while ((LONG) PTW32_INTERLOCKED_EXCHANGE(
+ (LPLONG) &mx->lock_idx,
+ (LONG) -1) != 0)
{
- if (c == 1 ||
- (LONG) PTW32_INTERLOCKED_COMPARE_EXCHANGE(
- (PTW32_INTERLOCKED_LPLONG) &mx->lock_idx,
- (PTW32_INTERLOCKED_LONG) 1,
- (PTW32_INTERLOCKED_LONG) 0) != -1)
+ if (WAIT_OBJECT_0 == WaitForSingleObject (mx->event, INFINITE))
{
- if (ptw32_semwait (&mx->wait_sema) == 0)
- {
- mx->recursive_count = 1;
- mx->ownerThread = self;
- }
- else
- {
- result = errno;
- break;
- }
+ mx->recursive_count = 1;
+ mx->ownerThread = self;
+ }
+ else
+ {
+ result = EINVAL;
+ break;
}
- }
- while ((c = (LONG) PTW32_INTERLOCKED_COMPARE_EXCHANGE(
- (PTW32_INTERLOCKED_LPLONG) &mx->lock_idx,
- (PTW32_INTERLOCKED_LONG) 1,
- (PTW32_INTERLOCKED_LONG) -1)) != -1);
+ }
}
}
}
diff --git a/pthread_mutex_timedlock.c b/pthread_mutex_timedlock.c
index cf929bc..97a625c 100644
--- a/pthread_mutex_timedlock.c
+++ b/pthread_mutex_timedlock.c
@@ -45,15 +45,12 @@
static INLINE int
-ptw32_timed_semwait (sem_t * sem, const struct timespec *abstime)
+ptw32_timed_eventwait (HANDLE event, const struct timespec *abstime)
/*
* ------------------------------------------------------
* DESCRIPTION
- * This function waits on a POSIX semaphore. If the
- * semaphore value is greater than zero, it decreases
- * its value by one. If the semaphore value is zero, then
- * the calling thread (or process) is blocked until it can
- * successfully decrease the value or until abstime.
+ * This function waits on an event until signaled or until
+ * abstime passes.
* If abstime has passed when this routine is called then
* it returns a result to indicate this.
*
@@ -61,25 +58,16 @@ ptw32_timed_semwait (sem_t * sem, const struct timespec *abstime)
* block until it can successfully decrease the value or
* until interrupted by a signal.
*
- * Unlike sem_timedwait(), this routine is not a cancelation point.
- *
- * Unlike sem_timedwait(), this routine is non-cancelable.
+ * This routine is not a cancelation point.
*
* RESULTS
- * 2 abstime has passed already
- * 1 abstime timed out while waiting
- * 0 successfully decreased semaphore,
- * -1 failed, error in errno.
- * ERRNO
- * EINVAL 'sem' is not a valid semaphore,
- * ENOSYS semaphores are not supported,
- * EINTR the function was interrupted by a signal,
- * EDEADLK a deadlock condition was detected.
+ * 0 successfully signaled,
+ * ETIMEDOUT abstime passed
+ * EINVAL 'event' is not a valid event,
*
* ------------------------------------------------------
*/
{
- int result = 0;
#ifdef NEED_FTIME
@@ -96,9 +84,9 @@ ptw32_timed_semwait (sem_t * sem, const struct timespec *abstime)
DWORD milliseconds;
DWORD status;
- if (sem == NULL)
+ if (event == NULL)
{
- result = EINVAL;
+ return EINVAL;
}
else
{
@@ -156,47 +144,26 @@ ptw32_timed_semwait (sem_t * sem, const struct timespec *abstime)
if (((int) milliseconds) < 0)
{
- return 2;
+ return ETIMEDOUT;
}
}
-#ifdef NEED_SEM
-
- status = WaitForSingleObject ((*sem)->event, milliseconds);
-
-#else /* NEED_SEM */
-
- status = WaitForSingleObject ((*sem)->sem, milliseconds);
-
-#endif
+ status = WaitForSingleObject (event, milliseconds);
if (status == WAIT_OBJECT_0)
{
-
-#ifdef NEED_SEM
-
- ptw32_decrease_semaphore (sem);
-
-#endif /* NEED_SEM */
-
return 0;
}
else if (status == WAIT_TIMEOUT)
{
- return 1;
+ return ETIMEDOUT;
}
else
{
- result = EINVAL;
+ return EINVAL;
}
}
- if (result != 0)
- {
- errno = result;
- return -1;
- }
-
return 0;
} /* ptw32_timed_semwait */
@@ -206,7 +173,7 @@ int
pthread_mutex_timedlock (pthread_mutex_t * mutex,
const struct timespec *abstime)
{
- LONG c;
+ int result;
pthread_mutex_t mx;
#ifdef NEED_SEM
@@ -226,8 +193,6 @@ pthread_mutex_timedlock (pthread_mutex_t * mutex,
*/
if (*mutex >= PTHREAD_ERRORCHECK_MUTEX_INITIALIZER)
{
- int result;
-
if ((result = ptw32_mutex_check_need_init (mutex)) != 0)
{
return (result);
@@ -238,51 +203,28 @@ pthread_mutex_timedlock (pthread_mutex_t * mutex,
if (mx->kind == PTHREAD_MUTEX_NORMAL)
{
- if ((c = (LONG) PTW32_INTERLOCKED_COMPARE_EXCHANGE(
- (PTW32_INTERLOCKED_LPLONG) &mx->lock_idx,
- (PTW32_INTERLOCKED_LONG) 0,
- (PTW32_INTERLOCKED_LONG) -1)) != -1)
+ if ((LONG) PTW32_INTERLOCKED_EXCHANGE(
+ (LPLONG) &mx->lock_idx,
+ (LONG) 1) != 0)
{
- do
+ while ((LONG) PTW32_INTERLOCKED_EXCHANGE(
+ (LPLONG) &mx->lock_idx,
+ (LONG) -1) != 0)
{
- if (c == 1 ||
- (LONG) PTW32_INTERLOCKED_COMPARE_EXCHANGE(
- (PTW32_INTERLOCKED_LPLONG) &mx->lock_idx,
- (PTW32_INTERLOCKED_LONG) 1,
- (PTW32_INTERLOCKED_LONG) 0) != -1)
- {
- switch (ptw32_timed_semwait (&mx->wait_sema, abstime))
- {
- case 0: /* We got woken up so try get the lock again. */
- {
- break;
- }
- case 1: /* Timed out. */
- case 2: /* abstime passed before we started to wait. */
- {
- return ETIMEDOUT;
- }
- default:
- {
- return errno;
- }
- }
+ if (0 != (result = ptw32_timed_eventwait (mx->event, abstime)))
+ {
+ return result;
}
- }
- while ((c = (LONG) PTW32_INTERLOCKED_COMPARE_EXCHANGE(
- (PTW32_INTERLOCKED_LPLONG) &mx->lock_idx,
- (PTW32_INTERLOCKED_LONG) 1,
- (PTW32_INTERLOCKED_LONG) -1)) != -1);
+ }
}
}
else
{
pthread_t self = pthread_self();
- if ((c = (LONG) PTW32_INTERLOCKED_COMPARE_EXCHANGE(
- (PTW32_INTERLOCKED_LPLONG) &mx->lock_idx,
- (PTW32_INTERLOCKED_LONG) 0,
- (PTW32_INTERLOCKED_LONG) -1)) == -1)
+ if ((LONG) PTW32_INTERLOCKED_EXCHANGE(
+ (LPLONG) &mx->lock_idx,
+ (LONG) 1) == 0)
{
mx->recursive_count = 1;
mx->ownerThread = self;
@@ -302,36 +244,15 @@ pthread_mutex_timedlock (pthread_mutex_t * mutex,
}
else
{
- do
+ while ((LONG) PTW32_INTERLOCKED_EXCHANGE(
+ (LPLONG) &mx->lock_idx,
+ (LONG) -1) != 0)
{
- if (c == 1 ||
- (LONG) PTW32_INTERLOCKED_COMPARE_EXCHANGE(
- (PTW32_INTERLOCKED_LPLONG) &mx->lock_idx,
- (PTW32_INTERLOCKED_LONG) 1,
- (PTW32_INTERLOCKED_LONG) 0) != -1)
- {
- switch (ptw32_timed_semwait (&mx->wait_sema, abstime))
- {
- case 0: /* We got woken up so try get the lock again. */
- {
- break;
- }
- case 1: /* Timed out. */
- case 2: /* abstime passed before we started to wait. */
- {
- return ETIMEDOUT;
- }
- default:
- {
- return errno;
- }
- }
+ if (0 != (result = ptw32_timed_eventwait (mx->event, abstime)))
+ {
+ return result;
}
- }
- while ((c = (LONG) PTW32_INTERLOCKED_COMPARE_EXCHANGE(
- (PTW32_INTERLOCKED_LPLONG) &mx->lock_idx,
- (PTW32_INTERLOCKED_LONG) 1,
- (PTW32_INTERLOCKED_LONG) -1)) != -1);
+ }
mx->recursive_count = 1;
mx->ownerThread = self;
diff --git a/pthread_mutex_trylock.c b/pthread_mutex_trylock.c
index e8ea57b..a0f063d 100644
--- a/pthread_mutex_trylock.c
+++ b/pthread_mutex_trylock.c
@@ -64,11 +64,10 @@ pthread_mutex_trylock (pthread_mutex_t * mutex)
mx = *mutex;
- if ((PTW32_INTERLOCKED_LONG) -1 ==
- PTW32_INTERLOCKED_COMPARE_EXCHANGE ((PTW32_INTERLOCKED_LPLONG) &
- mx->lock_idx,
- (PTW32_INTERLOCKED_LONG) 0,
- (PTW32_INTERLOCKED_LONG) -1))
+ if (0 == (LONG) PTW32_INTERLOCKED_COMPARE_EXCHANGE (
+ (PTW32_INTERLOCKED_LPLONG) &mx->lock_idx,
+ (PTW32_INTERLOCKED_LONG) 1,
+ (PTW32_INTERLOCKED_LONG) 0))
{
if (mx->kind != PTHREAD_MUTEX_NORMAL)
{
diff --git a/pthread_mutex_unlock.c b/pthread_mutex_unlock.c
index d853178..e28b38a 100644
--- a/pthread_mutex_unlock.c
+++ b/pthread_mutex_unlock.c
@@ -61,30 +61,26 @@ pthread_mutex_unlock (pthread_mutex_t * mutex)
{
LONG idx;
- idx = (LONG) PTW32_INTERLOCKED_COMPARE_EXCHANGE ((PTW32_INTERLOCKED_LPLONG)
- &mx->lock_idx,
- (PTW32_INTERLOCKED_LONG) -1,
- (PTW32_INTERLOCKED_LONG) 0);
-
+ idx = (LONG) PTW32_INTERLOCKED_EXCHANGE ((LPLONG) &mx->lock_idx,
+ (LONG) 0);
if (idx != 0)
{
- if (idx > 0)
+ if (idx < 0)
{
- mx->lock_idx = -1;
/* Someone may be waiting on that mutex */
- if (sem_post (&mx->wait_sema) != 0)
+ if (SetEvent (mx->event) == 0)
{
- result = errno;
+ result = EINVAL;
}
- }
- else
- {
- /*
- * Was not locked (so can't be owned by us).
- */
- result = EPERM;
}
}
+ else
+ {
+ /*
+ * Was not locked (so can't be owned by us).
+ */
+ result = EPERM;
+ }
}
else
{
@@ -95,13 +91,13 @@ pthread_mutex_unlock (pthread_mutex_t * mutex)
{
mx->ownerThread = NULL;
- if (InterlockedDecrement (&mx->lock_idx) >= 0)
+ if ((LONG) PTW32_INTERLOCKED_EXCHANGE ((LPLONG) &mx->lock_idx,
+ (LONG) 0) < 0)
{
/* Someone may be waiting on that mutex */
- mx->lock_idx = -1;
- if (sem_post (&mx->wait_sema) != 0)
+ if (SetEvent (mx->event) == 0)
{
- result = errno;
+ result = EINVAL;
}
}
}
diff --git a/pthread_win32_attach_detach_np.c b/pthread_win32_attach_detach_np.c
index ea0a307..4aedccc 100644
--- a/pthread_win32_attach_detach_np.c
+++ b/pthread_win32_attach_detach_np.c
@@ -52,14 +52,46 @@ BOOL
pthread_win32_process_attach_np ()
{
BOOL result = TRUE;
+ DWORD_PTR vProcessCPUs;
+ DWORD_PTR vSystemCPUs;
result = ptw32_processInitialize ();
+
#ifdef _UWIN
pthread_count++;
#endif
ptw32_features = 0;
+
+#if defined(NEED_PROCESS_AFFINITY_MASK)
+
+ ptw32_smp_system = PTW32_FALSE;
+
+#else
+
+ if (GetProcessAffinityMask (GetCurrentProcess (),
+ &vProcessCPUs, &vSystemCPUs))
+ {
+ int CPUs = 0;
+ DWORD_PTR bit;
+
+ for (bit = 1; bit != 0; bit <<= 1)
+ {
+ if (vSystemCPUs & bit)
+ {
+ CPUs++;
+ }
+ }
+ ptw32_smp_system = (CPUs > 1);
+ }
+ else
+ {
+ ptw32_smp_system = PTW32_FALSE;
+ }
+
+#endif
+
#ifndef TEST_ICE
/*
diff --git a/ptw32_InterlockedCompareExchange.c b/ptw32_InterlockedCompareExchange.c
index e3c5162..6da2bec 100644
--- a/ptw32_InterlockedCompareExchange.c
+++ b/ptw32_InterlockedCompareExchange.c
@@ -42,13 +42,8 @@
/*
* ptw32_InterlockedCompareExchange --
*
- * Needed because W95 doesn't support InterlockedCompareExchange.
- * It is only used when running the dll on W95. Other versions of
- * Windows use the Win32 supported version, which may be running on
- * different processor types.
- *
- * This can't be inlined because we need to know it's address so that
- * we can call it through a pointer.
+ * Originally needed because W9x doesn't support InterlockedCompareExchange.
+ * We now use this version wherever possible so we can inline it.
*/
INLINE PTW32_INTERLOCKED_LONG WINAPI
@@ -64,69 +59,217 @@ ptw32_InterlockedCompareExchange (PTW32_INTERLOCKED_LPLONG location,
PTW32_INTERLOCKED_LONG result;
+ /*
+ * Using the LOCK prefix on uni-processor machines is significantly slower
+ * and it is not necessary. The overhead of the conditional below is
+ * negligible in comparison. Since an optimised DLL will inline this
+ * routine, this will be faster than calling the system supplied
+ * Interlocked routine, which appears to avoid the LOCK prefix on
+ * uniprocessor systems. So one DLL works for all systems.
+ */
+ if (ptw32_smp_system)
+
/* *INDENT-OFF* */
#if defined(_M_IX86) || defined(_X86_)
-#if defined(_MSC_VER) || defined(__WATCOMC__)
+#if defined(_MSC_VER) || defined(__WATCOMC__) || defined(__BORLAND__)
#define HAVE_INLINABLE_INTERLOCKED_CMPXCHG
- _asm {
- PUSH ecx
- PUSH edx
- MOV ecx,dword ptr [location]
- MOV edx,dword ptr [value]
- MOV eax,dword ptr [comparand]
- LOCK CMPXCHG dword ptr [ecx],edx ; if (EAX == [ECX]),
- ; [ECX] = EDX
- ; else
- ; EAX = [ECX]
- MOV dword ptr [result], eax
- POP edx
- POP ecx
- }
-
-#elif defined(__BORLANDC__)
+ {
+ _asm {
+ PUSH ecx
+ PUSH edx
+ MOV ecx,dword ptr [location]
+ MOV edx,dword ptr [value]
+ MOV eax,dword ptr [comparand]
+ LOCK CMPXCHG dword ptr [ecx],edx
+ MOV dword ptr [result], eax
+ POP edx
+ POP ecx
+ }
+ }
+ else
+ {
+ _asm {
+ PUSH ecx
+ PUSH edx
+ MOV ecx,dword ptr [location]
+ MOV edx,dword ptr [value]
+ MOV eax,dword ptr [comparand]
+ CMPXCHG dword ptr [ecx],edx
+ MOV dword ptr [result], eax
+ POP edx
+ POP ecx
+ }
+ }
+
+#elif defined(__GNUC__)
#define HAVE_INLINABLE_INTERLOCKED_CMPXCHG
- _asm {
- PUSH ecx
- PUSH edx
- MOV ecx,dword ptr [location]
- MOV edx,dword ptr [value]
- MOV eax,dword ptr [comparand]
- LOCK CMPXCHG dword ptr [ecx],edx /* if (EAX == [ECX]) */
- /* [ECX] = EDX */
- /* else */
- /* EAX = [ECX] */
- MOV dword ptr [result], eax
- POP edx
- POP ecx
- }
+ {
+ __asm__ __volatile__
+ (
+ "lock\n\t"
+ "cmpxchgl %2,%1" /* if (EAX == [location]) */
+ /* [location] = value */
+ /* else */
+ /* EAX = [location] */
+ :"=a" (result)
+ :"m" (*location), "r" (value), "a" (comparand));
+ }
+ else
+ {
+ __asm__ __volatile__
+ (
+ "cmpxchgl %2,%1" /* if (EAX == [location]) */
+ /* [location] = value */
+ /* else */
+ /* EAX = [location] */
+ :"=a" (result)
+ :"m" (*location), "r" (value), "a" (comparand));
+ }
+
+#endif
+
+#else
+
+ /*
+ * If execution gets to here then we're running on a currently
+ * unsupported processor or compiler.
+ */
+
+ result = 0;
+
+#endif
+
+/* *INDENT-ON* */
+
+ return result;
+
+#if defined(__WATCOMC__)
+#pragma enable_message (200)
+#endif
+
+}
+
+/*
+ * ptw32_InterlockedExchange --
+ *
+ * We now use this version wherever possible so we can inline it.
+ */
+
+INLINE LONG WINAPI
+ptw32_InterlockedExchange (LPLONG location,
+ LONG value)
+{
+
+#if defined(__WATCOMC__)
+/* Don't report that result is not assigned a value before being referenced */
+#pragma disable_message (200)
+#endif
+
+ LONG result;
+
+ /*
+ * The XCHG instruction always locks the bus with or without the
+ * LOCKED prefix. This makes it significantly slower than CMPXCHG on
+ * uni-processor machines. The Windows InterlockedExchange function
+ * is nearly 3 times faster than the XCHG instruction, so this routine
+ * is not yet very useful for speeding up pthreads.
+ */
+ if (ptw32_smp_system)
+
+/* *INDENT-OFF* */
+
+#if defined(_M_IX86) || defined(_X86_)
+
+#if defined(_MSC_VER) || defined(__WATCOMC__) || defined(__BORLAND__)
+#define HAVE_INLINABLE_INTERLOCKED_XCHG
+
+ {
+ _asm {
+ PUSH ecx
+ MOV ecx,dword ptr [location]
+ MOV eax,dword ptr [value]
+ XCHG dword ptr [ecx],eax
+ MOV dword ptr [result], eax
+ POP ecx
+ }
+ }
+ else
+ {
+ /*
+ * Faster version of XCHG for uni-processor systems because
+ * it doesn't lock the bus. If an interrupt or context switch
+ * occurs between the MOV and the CMPXCHG then the value in
+ * 'location' may have changed, in which case we will loop
+ * back to do the MOV again. Because both instructions
+ * reference the same location, they will not be re-ordered
+ * in the pipeline.
+ * Tests show that this routine has almost identical timing
+ * to Win32's InterlockedExchange(), which is much faster than
+ * using the an inlined 'xchg' instruction, so it's probably
+ * doing something similar to this (on UP systems).
+ *
+ * Can we do without the PUSH/POP instructions?
+ */
+ _asm {
+ PUSH ecx
+ PUSH edx
+ MOV ecx,dword ptr [location]
+ MOV edx,dword ptr [value]
+L1: MOV eax,dword ptr [ecx]
+ CMPXCHG dword ptr [ecx],edx
+ JNZ L1
+ MOV dword ptr [result], eax
+ POP edx
+ POP ecx
+ }
+ }
#elif defined(__GNUC__)
-#define HAVE_INLINABLE_INTERLOCKED_CMPXCHG
+#define HAVE_INLINABLE_INTERLOCKED_XCHG
- __asm__
- (
- "lock\n\t"
- "cmpxchgl %3,(%0)" /* if (EAX == [location]) */
- /* [location] = value */
- /* else */
- /* EAX = [location] */
- :"=r" (location), "=a" (result)
- :"0" (location), "q" (value), "a" (comparand)
- : "memory" );
+ {
+ __asm__ __volatile__
+ (
+ "xchgl %2,%1"
+ :"=r" (result)
+ :"m" (*location), "0" (value));
+ }
+ else
+ {
+ /*
+ * Faster version of XCHG for uni-processor systems because
+ * it doesn't lock the bus. If an interrupt or context switch
+ * occurs between the movl and the cmpxchgl then the value in
+ * 'location' may have changed, in which case we will loop
+ * back to do the movl again. Because both instructions
+ * reference the same location, they will not be re-ordered
+ * in the pipeline.
+ * Tests show that this routine has almost identical timing
+ * to Win32's InterlockedExchange(), which is much faster than
+ * using the an inlined 'xchg' instruction, so it's probably
+ * doing something similar to this (on UP systems).
+ */
+ __asm__ __volatile__
+ (
+ "0:\n\t"
+ "movl %1,%%eax\n\t"
+ "cmpxchgl %2,%1\n\t"
+ "jnz 0b"
+ :"=&a" (result)
+ :"m" (*location), "r" (value));
+ }
#endif
#else
/*
- * If execution gets to here then we should be running on a Win95 system
- * but either running on something other than an X86 processor, or a
- * compiler other than MSVC or GCC. Pthreads-win32 doesn't support that
- * platform (yet).
+ * If execution gets to here then we're running on a currently
+ * unsupported processor or compiler.
*/
result = 0;
@@ -143,9 +286,17 @@ ptw32_InterlockedCompareExchange (PTW32_INTERLOCKED_LPLONG location,
}
-#if 0
+
+#if 1
+
#if defined(PTW32_BUILD_INLINED) && defined(HAVE_INLINABLE_INTERLOCKED_CMPXCHG)
#undef PTW32_INTERLOCKED_COMPARE_EXCHANGE
#define PTW32_INTERLOCKED_COMPARE_EXCHANGE ptw32_InterlockedCompareExchange
#endif
+
+#if defined(PTW32_BUILD_INLINED) && defined(HAVE_INLINABLE_INTERLOCKED_XCHG)
+#undef PTW32_INTERLOCKED_EXCHANGE
+#define PTW32_INTERLOCKED_EXCHANGE ptw32_InterlockedExchange
+#endif
+
#endif
diff --git a/tests/ChangeLog b/tests/ChangeLog
index 2d0c570..f55b2b6 100644
--- a/tests/ChangeLog
+++ b/tests/ChangeLog
@@ -1,14 +1,20 @@
-2004-09-08 Alexandre Girao <alexgirao@gmail.com>
+2004-10-14 Ross Johnson <rpj@callisto.canberra.edu.au>
- * cancel7.c (main): Win98 wants a valid (non-NULL) location
- for the last arg of _beginthreadex().
- * cancel8.c (main): Likewise.
- * exit4.c (main): Likewise.
- * exit5.c (main): Likewise.
+ * rwlock7.c (main): Tidy up statistics reporting; randomise
+ update accesses.
+ * rwlock8.c: New test.
+
+2004-09-08 Alexandre Girao <alexgirao@gmail.com>
+
+ * cancel7.c (main): Win98 wants a valid (non-NULL) location
+ for the last arg of _beginthreadex().
+ * cancel8.c (main): Likewise.
+ * exit4.c (main): Likewise.
+ * exit5.c (main): Likewise.
2004-08-26 Ross Johnson <rpj@callisto.canberra.edu.au>
- * create3.c: New test.
+ * create3.c: New test.
2004-06-21 Ross Johnson <rpj@callisto.canberra.edu.au>
diff --git a/tests/GNUmakefile b/tests/GNUmakefile
index cc36b53..0979fda 100644
--- a/tests/GNUmakefile
+++ b/tests/GNUmakefile
@@ -70,7 +70,7 @@ COPYFILES = $(HDR) $(LIB) $(DLL) $(QAPC)
# stop.
TESTS = sizes loadfree \
- semaphore1 semaphore2 self1 mutex5 mutex1 mutex1e mutex1n mutex1r \
+ self1 mutex5 mutex1 mutex1e mutex1n mutex1r semaphore1 semaphore2 \
condvar1 condvar1_1 condvar1_2 condvar2 condvar2_1 exit1 \
create1 create2 reuse1 reuse2 equal1 \
kill1 valid1 valid2 \
@@ -85,7 +85,7 @@ TESTS = sizes loadfree \
condvar3 condvar3_1 condvar3_2 condvar3_3 \
condvar4 condvar5 condvar6 condvar7 condvar8 condvar9 \
errno1 \
- rwlock1 rwlock2 rwlock3 rwlock4 rwlock5 rwlock6 rwlock7 \
+ rwlock1 rwlock2 rwlock3 rwlock4 rwlock5 rwlock6 rwlock7 rwlock8 \
rwlock2_t rwlock3_t rwlock4_t rwlock5_t rwlock6_t rwlock6_t2 \
context1 cancel3 cancel4 cancel5 cancel6a cancel6d \
cancel7 cancel8 \
@@ -240,6 +240,7 @@ rwlock4.pass: rwlock3.pass
rwlock5.pass: rwlock4.pass
rwlock6.pass: rwlock5.pass
rwlock7.pass: rwlock6.pass
+rwlock8.pass: rwlock7.pass
rwlock2_t.pass: rwlock2.pass
rwlock3_t.pass: rwlock2_t.pass
rwlock4_t.pass: rwlock3_t.pass
diff --git a/tests/Makefile b/tests/Makefile
index 31979a4..535347f 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -100,7 +100,8 @@ PASSES= sizes.pass loadfree.pass \
condvar4.pass condvar5.pass condvar6.pass \
condvar7.pass condvar8.pass condvar9.pass \
errno1.pass \
- rwlock1.pass rwlock2.pass rwlock3.pass rwlock4.pass rwlock5.pass rwlock6.pass rwlock7.pass \
+ rwlock1.pass rwlock2.pass rwlock3.pass rwlock4.pass \
+ rwlock5.pass rwlock6.pass rwlock7.pass rwlock8.pass \
rwlock2_t.pass rwlock3_t.pass rwlock4_t.pass rwlock5_t.pass rwlock6_t.pass rwlock6_t2.pass \
context1.pass \
cancel3.pass cancel4.pass cancel5.pass cancel6a.pass cancel6d.pass \
@@ -324,6 +325,7 @@ rwlock4.pass: rwlock3.pass
rwlock5.pass: rwlock4.pass
rwlock6.pass: rwlock5.pass
rwlock7.pass: rwlock6.pass
+rwlock8.pass: rwlock7.pass
rwlock2_t.pass: rwlock2.pass
rwlock3_t.pass: rwlock2_t.pass
rwlock4_t.pass: rwlock3_t.pass
diff --git a/tests/benchtest1.c b/tests/benchtest1.c
index ce45cf2..120eb19 100644
--- a/tests/benchtest1.c
+++ b/tests/benchtest1.c
@@ -179,7 +179,7 @@ main (int argc, char *argv[])
durationMilliSecs = GetDurationMilliSecs(currSysTimeStart, currSysTimeStop) - overHeadMilliSecs;
printf( "%-45s %15ld %15.3f\n",
- "Simple Critical Section x 2",
+ "Simple Critical Section",
durationMilliSecs,
(float) durationMilliSecs * 1E3 / ITERATIONS);
diff --git a/tests/rwlock7.c b/tests/rwlock7.c
index 8706d4a..91466e4 100644
--- a/tests/rwlock7.c
+++ b/tests/rwlock7.c
@@ -13,7 +13,7 @@
#endif
#define THREADS 5
-#define DATASIZE 15
+#define DATASIZE 7
#define ITERATIONS 1000000
/*
@@ -24,7 +24,8 @@ typedef struct thread_tag {
pthread_t thread_id;
int updates;
int reads;
- int interval;
+ int changed;
+ int seed;
} thread_t;
/*
@@ -45,9 +46,12 @@ static data_t data[DATASIZE];
void *thread_routine (void *arg)
{
thread_t *self = (thread_t*)arg;
- int repeats = 0;
int iteration;
int element = 0;
+ int seed = self->seed;
+ int interval = 1 + rand_r (&seed) % 71;
+
+ self->changed = 0;
for (iteration = 0; iteration < ITERATIONS; iteration++)
{
@@ -61,12 +65,13 @@ void *thread_routine (void *arg)
* update operation (write lock instead of read
* lock).
*/
- if ((iteration % self->interval) == 0)
+ if ((iteration % interval) == 0)
{
assert(pthread_rwlock_wrlock (&data[element].lock) == 0);
data[element].data = self->thread_num;
data[element].updates++;
self->updates++;
+ interval = 1 + rand_r (&seed) % 71;
assert(pthread_rwlock_unlock (&data[element].lock) == 0);
} else {
/*
@@ -78,27 +83,17 @@ void *thread_routine (void *arg)
self->reads++;
- if (data[element].data == self->thread_num)
+ if (data[element].data != self->thread_num)
{
- repeats++;
+ self->changed++;
+ interval = 1 + self->changed % 71;
}
assert(pthread_rwlock_unlock (&data[element].lock) == 0);
}
- element++;
-
- if (element >= DATASIZE)
- {
- element = 0;
- }
- }
+ element = (element + 1) % DATASIZE;
- if (repeats > 0)
- {
- printf ("\nThread %d found unchanged elements %d times",
- self->thread_num, repeats);
- fflush(stdout);
}
return NULL;
@@ -137,7 +132,7 @@ main (int argc, char *argv[])
threads[count].thread_num = count;
threads[count].updates = 0;
threads[count].reads = 0;
- threads[count].interval = rand_r (&seed) % 71;
+ threads[count].seed = 1 + rand_r (&seed) % 71;
assert(pthread_create (&threads[count].thread_id,
NULL, thread_routine, (void*)&threads[count]) == 0);
@@ -150,9 +145,28 @@ main (int argc, char *argv[])
for (count = 0; count < THREADS; count++)
{
assert(pthread_join (threads[count].thread_id, NULL) == 0);
+ }
+
+ putchar('\n');
+ fflush(stdout);
+
+ for (count = 0; count < THREADS; count++)
+ {
+ if (threads[count].changed > 0)
+ {
+ printf ("Thread %d found changed elements %d times\n",
+ count, threads[count].changed);
+ }
+ }
+
+ putchar('\n');
+ fflush(stdout);
+
+ for (count = 0; count < THREADS; count++)
+ {
thread_updates += threads[count].updates;
- printf ("\n%02d: interval %d, updates %d, reads %d\n",
- count, threads[count].interval,
+ printf ("%02d: seed %d, updates %d, reads %d\n",
+ count, threads[count].seed,
threads[count].updates, threads[count].reads);
}
diff --git a/tests/rwlock8.c b/tests/rwlock8.c
new file mode 100644
index 0000000..c83a775
--- /dev/null
+++ b/tests/rwlock8.c
@@ -0,0 +1,205 @@
+/*
+ * rwlock8.c
+ *
+ * Hammer on a bunch of rwlocks to test robustness and fairness.
+ * Printed stats should be roughly even for each thread.
+ *
+ * Yield during each access to exercise lock contention code paths
+ * more than rwlock7.c does (particularly on uni-processor systems).
+ */
+
+#include "test.h"
+#include <sys/timeb.h>
+
+#ifdef __GNUC__
+#include <stdlib.h>
+#endif
+
+#define THREADS 5
+#define DATASIZE 7
+#define ITERATIONS 100000
+
+/*
+ * Keep statistics for each thread.
+ */
+typedef struct thread_tag {
+ int thread_num;
+ pthread_t thread_id;
+ int updates;
+ int reads;
+ int changed;
+ int seed;
+} thread_t;
+
+/*
+ * Read-write lock and shared data
+ */
+typedef struct data_tag {
+ pthread_rwlock_t lock;
+ int data;
+ int updates;
+} data_t;
+
+static thread_t threads[THREADS];
+static data_t data[DATASIZE];
+
+/*
+ * Thread start routine that uses read-write locks
+ */
+void *thread_routine (void *arg)
+{
+ thread_t *self = (thread_t*)arg;
+ int iteration;
+ int element = 0;
+ int seed = self->seed;
+ int interval = 1 + rand_r (&seed) % 71;
+
+ self->changed = 0;
+
+ for (iteration = 0; iteration < ITERATIONS; iteration++)
+ {
+ if (iteration % (ITERATIONS / 10) == 0)
+ {
+ putchar('.');
+ fflush(stdout);
+ }
+ /*
+ * Each "self->interval" iterations, perform an
+ * update operation (write lock instead of read
+ * lock).
+ */
+ if ((iteration % interval) == 0)
+ {
+ assert(pthread_rwlock_wrlock (&data[element].lock) == 0);
+ data[element].data = self->thread_num;
+ data[element].updates++;
+ self->updates++;
+ interval = 1 + rand_r (&seed) % 71;
+ sched_yield();
+ assert(pthread_rwlock_unlock (&data[element].lock) == 0);
+ } else {
+ /*
+ * Look at the current data element to see whether
+ * the current thread last updated it. Count the
+ * times, to report later.
+ */
+ assert(pthread_rwlock_rdlock (&data[element].lock) == 0);
+
+ self->reads++;
+
+ if (data[element].data != self->thread_num)
+ {
+ self->changed++;
+ interval = 1 + self->changed % 71;
+ }
+
+ sched_yield();
+
+ assert(pthread_rwlock_unlock (&data[element].lock) == 0);
+ }
+
+ element = (element + 1) % DATASIZE;
+
+ }
+
+ return NULL;
+}
+
+int
+main (int argc, char *argv[])
+{
+ int count;
+ int data_count;
+ int thread_updates = 0;
+ int data_updates = 0;
+ int seed = 1;
+
+ struct _timeb currSysTime1;
+ struct _timeb currSysTime2;
+
+ /*
+ * Initialize the shared data.
+ */
+ for (data_count = 0; data_count < DATASIZE; data_count++)
+ {
+ data[data_count].data = 0;
+ data[data_count].updates = 0;
+
+ assert(pthread_rwlock_init (&data[data_count].lock, NULL) == 0);
+ }
+
+ _ftime(&currSysTime1);
+
+ /*
+ * Create THREADS threads to access shared data.
+ */
+ for (count = 0; count < THREADS; count++)
+ {
+ threads[count].thread_num = count;
+ threads[count].updates = 0;
+ threads[count].reads = 0;
+ threads[count].seed = 1 + rand_r (&seed) % 71;
+
+ assert(pthread_create (&threads[count].thread_id,
+ NULL, thread_routine, (void*)&threads[count]) == 0);
+ }
+
+ /*
+ * Wait for all threads to complete, and collect
+ * statistics.
+ */
+ for (count = 0; count < THREADS; count++)
+ {
+ assert(pthread_join (threads[count].thread_id, NULL) == 0);
+ }
+
+ putchar('\n');
+ fflush(stdout);
+
+ for (count = 0; count < THREADS; count++)
+ {
+ if (threads[count].changed > 0)
+ {
+ printf ("Thread %d found changed elements %d times\n",
+ count, threads[count].changed);
+ }
+ }
+
+ putchar('\n');
+ fflush(stdout);
+
+ for (count = 0; count < THREADS; count++)
+ {
+ thread_updates += threads[count].updates;
+ printf ("%02d: seed %d, updates %d, reads %d\n",
+ count, threads[count].seed,
+ threads[count].updates, threads[count].reads);
+ }
+
+ putchar('\n');
+ fflush(stdout);
+
+ /*
+ * Collect statistics for the data.
+ */
+ for (data_count = 0; data_count < DATASIZE; data_count++)
+ {
+ data_updates += data[data_count].updates;
+ printf ("data %02d: value %d, %d updates\n",
+ data_count, data[data_count].data, data[data_count].updates);
+ assert(pthread_rwlock_destroy (&data[data_count].lock) == 0);
+ }
+
+ printf ("%d thread updates, %d data updates\n",
+ thread_updates, data_updates);
+
+ _ftime(&currSysTime2);
+
+ printf( "\nstart: %ld/%d, stop: %ld/%d, duration:%ld\n",
+ currSysTime1.time,currSysTime1.millitm,
+ currSysTime2.time,currSysTime2.millitm,
+ (currSysTime2.time*1000+currSysTime2.millitm) -
+ (currSysTime1.time*1000+currSysTime1.millitm));
+
+ return 0;
+}