From 45b1b8cb2a6588f9316f780d8cefe11c181a9a17 Mon Sep 17 00:00:00 2001 From: rpj Date: Sat, 16 Oct 2004 02:34:44 +0000 Subject: Mutex speedups cont'd --- ChangeLog | 117 +++++++++++------ Makefile | 6 +- global.c | 2 + implement.h | 28 ++-- pthread_mutex_destroy.c | 11 +- pthread_mutex_init.c | 18 ++- pthread_mutex_lock.c | 73 ++++------- pthread_mutex_timedlock.c | 147 +++++---------------- pthread_mutex_trylock.c | 9 +- pthread_mutex_unlock.c | 36 +++--- pthread_win32_attach_detach_np.c | 32 +++++ ptw32_InterlockedCompareExchange.c | 259 +++++++++++++++++++++++++++++-------- tests/ChangeLog | 20 ++- tests/GNUmakefile | 5 +- tests/Makefile | 4 +- tests/benchtest1.c | 2 +- tests/rwlock7.c | 56 +++++--- tests/rwlock8.c | 205 +++++++++++++++++++++++++++++ 18 files changed, 702 insertions(+), 328 deletions(-) create mode 100644 tests/rwlock8.c diff --git a/ChangeLog b/ChangeLog index ec65d84..c1fe46f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,45 +1,88 @@ -2004-10-08 Ross Johnson +2004-10-15 Ross Johnson - * pthread_mutex_destroy.c (pthread_mutex_destroy): Critical Section - element is no longer required. - * pthread_mutex_init.c (pthread_mutex_init): Likewise. - * pthread_mutex_lock.c (pthread_mutex_lock): New algorithm following Drepper's - paper at http://people.redhat.com/drepper/futex.pdf, but using the existing - semaphore in place of the futex described in the paper. Idea suggested by - Alexander Terekhov - see: - http://sources.redhat.com/ml/pthreads-win32/2003/msg00108.html - * pthread_mutex_timedlock.c pthread_mutex_timedlock(): Similarly. - * pthread_mutex_trylock.c (pthread_mutex_trylock): Similarly. - * pthread_mutex_unlock.c (pthread_mutex_unlock): Similarly. - * pthread_barrier_wait.c (pthread_barrier_wait): Use inlined version of - InterlockedCompareExchange() if possible - determined at build-time. - * pthread_spin_destroy.c pthread_spin_destroy(): Likewise. - * pthread_spin_lock.c pthread_spin_lock():Likewise. - * pthread_spin_trylock.c (pthread_spin_trylock):Likewise. - * pthread_spin_unlock.c (pthread_spin_unlock):Likewise. - * ptw32_InterlockedCompareExchange.c: Sets up macro for inlined use. - * implement.h (pthread_mutex_t_): Remove Critical Section element. - (PTW32_INTERLOCKED_COMPARE_EXCHANGE): Set to default non-inlined version of - InterlockedCompareExchange(). - * private.c: Include ptw32_InterlockedCompareExchange.c first for inlining. - * GNUmakefile: Add commandline option to use inlined InterlockedCompareExchange(). - * Makefile: Likewise. + * implement.h (othread_mutex_t_): Use an event in place of + the POSIX semaphore. + * pthread_mutex_init.c: Create the event; remove semaphore init. + * pthread_mutex_destroy.c: Delete the event. + * pthread_mutex_lock.c: Replace the semaphore wait with the event wait. + * pthread_mutex_trylock.c: Likewise. + * pthread_mutex_timedlock.c: Likewise. + * pthread_mutex_unlock.c: Set the event. + +2004-10-14 Ross Johnson + + * pthread_mutex_lock.c (pthread_mutex_lock): New algorithm using + Terekhov's xchg based variation of Drepper's cmpxchg model. + Theoretically, xchg uses fewer clock cycles than cmpxchg (using IA-32 + as a reference), however, in my opinion bus locking dominates the + equation on smp systems, so the model with the least number of bus + lock operations in the execution path should win, which is Terekhov's + variant. On IA-32 uni-processor systems, it's faster to use the + CMPXCHG instruction without locking the bus than to use the XCHG + instruction, which always locks the bus. This makes the two variants + equal for the non-contended lock (fast lane) execution path on up + IA-32. Testing shows that the xchg variant is faster on up IA-32 as + well if the test forces higher lock contention frequency, even though + kernel calls should be dominating the times (on up IA-32, both + variants used CMPXCHG instructions and neither locked the bus). + * pthread_mutex_timedlock.c pthread_mutex_timedlock(): Similarly. + * pthread_mutex_trylock.c (pthread_mutex_trylock): Similarly. + * pthread_mutex_unlock.c (pthread_mutex_unlock): Similarly. + * ptw32_InterlockedCompareExchange.c (ptw32_InterlockExchange): New + function. + (PTW32_INTERLOCKED_EXCHANGE): Sets up macro to use inlined + ptw32_InterlockedExchange. + * implement.h (PTW32_INTERLOCKED_EXCHANGE): Set default to + InterlockedExchange(). + * Makefile: Building using /Ob2 so that asm sections within inline + functions are inlined. + +2004-10-08 Ross Johnson + + * pthread_mutex_destroy.c (pthread_mutex_destroy): Critical Section + element is no longer required. + * pthread_mutex_init.c (pthread_mutex_init): Likewise. + * pthread_mutex_lock.c (pthread_mutex_lock): New algorithm following + Drepper's paper at http://people.redhat.com/drepper/futex.pdf, but + using the existing semaphore in place of the futex described in the + paper. Idea suggested by Alexander Terekhov - see: + http://sources.redhat.com/ml/pthreads-win32/2003/msg00108.html + * pthread_mutex_timedlock.c pthread_mutex_timedlock(): Similarly. + * pthread_mutex_trylock.c (pthread_mutex_trylock): Similarly. + * pthread_mutex_unlock.c (pthread_mutex_unlock): Similarly. + * pthread_barrier_wait.c (pthread_barrier_wait): Use inlined version + of InterlockedCompareExchange() if possible - determined at + build-time. + * pthread_spin_destroy.c pthread_spin_destroy(): Likewise. + * pthread_spin_lock.c pthread_spin_lock():Likewise. + * pthread_spin_trylock.c (pthread_spin_trylock):Likewise. + * pthread_spin_unlock.c (pthread_spin_unlock):Likewise. + * ptw32_InterlockedCompareExchange.c: Sets up macro for inlined use. + * implement.h (pthread_mutex_t_): Remove Critical Section element. + (PTW32_INTERLOCKED_COMPARE_EXCHANGE): Set to default non-inlined + version of InterlockedCompareExchange(). + * private.c: Include ptw32_InterlockedCompareExchange.c first for + inlining. + * GNUmakefile: Add commandline option to use inlined + InterlockedCompareExchange(). + * Makefile: Likewise. 2004-09-27 Ross Johnson - * pthread_mutex_lock.c (pthread_mutex_lock): Separate PTHREAD_MUTEX_NORMAL - logic since we do not need to keep or check some state required by other - mutex types; do not check mutex pointer arg for validity - leave this to - the system since we are only checking for NULL pointers. This should improve - speed of NORMAL mutexes and marginally improve speed of other type. + * pthread_mutex_lock.c (pthread_mutex_lock): Separate + PTHREAD_MUTEX_NORMAL logic since we do not need to keep or check some + state required by other mutex types; do not check mutex pointer arg + for validity - leave this to the system since we are only checking + for NULL pointers. This should improve speed of NORMAL mutexes and + marginally improve speed of other type. * pthread_mutex_trylock.c (pthread_mutex_trylock): Likewise. * pthread_mutex_unlock.c (pthread_mutex_unlock): Likewise; also avoid - entering the critical section for the no-waiters case, with approx. 30% - reduction in lock/unlock overhead for this case.. + entering the critical section for the no-waiters case, with approx. + 30% reduction in lock/unlock overhead for this case. * pthread_mutex_timedlock.c (pthread_mutex_timedlock): Likewise; also - no longer keeps mutex if post-timeout second attempt succeeds - this will - assist applications that wish to impose strict lock deadlines, rather than - simply to escape from frozen locks. + no longer keeps mutex if post-timeout second attempt succeeds - this + will assist applications that wish to impose strict lock deadlines, + rather than simply to escape from frozen locks. 2004-09-09 Tristan Savatier * pthread.h (struct pthread_once_t_): Qualify the 'done' element @@ -49,8 +92,8 @@ [Maintainer's note: the race condition is harmless on SPU systems and only a problem on MPU systems if concurrent access results in an exception (presumably generated by a hardware interrupt). There are - other instances of similar harmless race conditions that have not been - identified as issues.] + other instances of similar harmless race conditions that have not + been identified as issues.] 2004-09-09 Ross Johnson diff --git a/Makefile b/Makefile index 9fb97bb..f39412e 100644 --- a/Makefile +++ b/Makefile @@ -375,13 +375,13 @@ VC: # inlining optimisation turned on. # VCE-inlined: - @ nmake /nologo EHFLAGS="/O2 /Ob1 $(VCEFLAGS) /DPTW32_BUILD_INLINED" pthreadVCE.stamp + @ nmake /nologo EHFLAGS="/O2 /Ob2 $(VCEFLAGS) /DPTW32_BUILD_INLINED" pthreadVCE.stamp VSE-inlined: - @ nmake /nologo EHFLAGS="/O2 /Ob1 $(VSEFLAGS) /DPTW32_BUILD_INLINED" pthreadVSE.stamp + @ nmake /nologo EHFLAGS="/O2 /Ob2 $(VSEFLAGS) /DPTW32_BUILD_INLINED" pthreadVSE.stamp VC-inlined: - @ nmake /nologo EHFLAGS="/O2 /Ob1 $(VCFLAGS) /DPTW32_BUILD_INLINED" pthreadVC.stamp + @ nmake /nologo EHFLAGS="/O2 /Ob2 $(VCFLAGS) /DPTW32_BUILD_INLINED" pthreadVC.stamp realclean: clean if exist *.dll del *.dll diff --git a/global.c b/global.c index fc9ab91..f3f18cb 100644 --- a/global.c +++ b/global.c @@ -51,6 +51,8 @@ int ptw32_concurrency = 0; /* What features have been auto-detaected */ int ptw32_features = 0; +BOOL ptw32_smp_system = PTW32_TRUE; /* Safer if assumed true initially. */ + /* * Function pointer to InterlockedCompareExchange if it exists, otherwise * it will be set at runtime to a substitute local version with the same diff --git a/implement.h b/implement.h index 2e320bb..f71f506 100644 --- a/implement.h +++ b/implement.h @@ -182,15 +182,17 @@ struct sem_t_ struct pthread_mutex_t_ { LONG lock_idx; /* Provides exclusive access to mutex state - via the Interlocked* mechanism, as well - as a count of the number of threads - waiting on the mutex. */ + via the Interlocked* mechanism. + 0: unlocked/free. + 1: locked - no other waiters. + -1: locked - with possible other waiters. + */ int recursive_count; /* Number of unlocks a thread needs to perform before the lock is released (recursive mutexes only). */ int kind; /* Mutex type. */ pthread_t ownerThread; - sem_t wait_sema; /* Mutex release notification to waiting + HANDLE event; /* Mutex release notification to waiting threads. */ }; @@ -451,6 +453,8 @@ extern int ptw32_concurrency; extern int ptw32_features; +extern BOOL ptw32_smp_system; /* True: SMP system, False: Uni-processor system */ + extern CRITICAL_SECTION ptw32_thread_reuse_lock; extern CRITICAL_SECTION ptw32_mutex_test_init_lock; extern CRITICAL_SECTION ptw32_cond_list_lock; @@ -486,7 +490,11 @@ extern "C" PTW32_INTERLOCKED_LONG value, PTW32_INTERLOCKED_LONG comparand); - DWORD + LONG WINAPI + ptw32_InterlockedExchange (LPLONG location, + LONG value); + + DWORD ptw32_RegisterCancelation (PAPCFUNC callback, HANDLE threadH, DWORD callback_arg); @@ -560,8 +568,7 @@ extern "C" unsigned, void *); _CRTIMP void __cdecl _endthread (void); _CRTIMP unsigned long __cdecl _beginthreadex (void *, unsigned, - unsigned (__stdcall *) (void - *), + unsigned (__stdcall *) (void *), void *, unsigned, unsigned *); _CRTIMP void __cdecl _endthreadex (unsigned); # ifdef __cplusplus @@ -574,12 +581,17 @@ extern "C" /* - * When not building the inlined version of the dll. + * Defaults. Could be overridden when building the inlined version of the dll. + * See ptw32_InterlockedCompareExchange.c */ #ifndef PTW32_INTERLOCKED_COMPARE_EXCHANGE #define PTW32_INTERLOCKED_COMPARE_EXCHANGE ptw32_interlocked_compare_exchange #endif +#ifndef PTW32_INTERLOCKED_EXCHANGE +#define PTW32_INTERLOCKED_EXCHANGE InterlockedExchange +#endif + /* * Check for old and new versions of cygwin. See the FAQ file: diff --git a/pthread_mutex_destroy.c b/pthread_mutex_destroy.c index 1ff9ebe..4b0043a 100644 --- a/pthread_mutex_destroy.c +++ b/pthread_mutex_destroy.c @@ -82,8 +82,15 @@ pthread_mutex_destroy (pthread_mutex_t * mutex) if (result == 0) { - (void) sem_destroy (&mx->wait_sema); - free (mx); + if (!CloseHandle (mx->event)) + { + *mutex = mx; + result = EINVAL; + } + else + { + free (mx); + } } else { diff --git a/pthread_mutex_init.c b/pthread_mutex_init.c index fdb6017..0709690 100644 --- a/pthread_mutex_init.c +++ b/pthread_mutex_init.c @@ -80,18 +80,22 @@ pthread_mutex_init (pthread_mutex_t * mutex, const pthread_mutexattr_t * attr) } else { - mx->lock_idx = -1; + mx->lock_idx = 0; mx->recursive_count = 0; mx->kind = (attr == NULL || *attr == NULL ? PTHREAD_MUTEX_DEFAULT : (*attr)->kind); mx->ownerThread = NULL; - if (0 != sem_init (&mx->wait_sema, 0, 0)) - { - result = EAGAIN; - free (mx); - mx = NULL; - } + mx->event = CreateEvent (NULL, PTW32_FALSE, /* manual reset = No */ + PTW32_FALSE, /* initial state = not signaled */ + NULL); /* event name */ + + if (0 == mx->event) + { + result = ENOSPC; + free (mx); + mx = NULL; + } } *mutex = mx; diff --git a/pthread_mutex_lock.c b/pthread_mutex_lock.c index 6695907..7be77b5 100644 --- a/pthread_mutex_lock.c +++ b/pthread_mutex_lock.c @@ -44,7 +44,6 @@ int pthread_mutex_lock (pthread_mutex_t * mutex) { int result = 0; - LONG c; pthread_mutex_t mx; /* @@ -69,40 +68,29 @@ pthread_mutex_lock (pthread_mutex_t * mutex) if (mx->kind == PTHREAD_MUTEX_NORMAL) { - if ((c = (LONG) PTW32_INTERLOCKED_COMPARE_EXCHANGE( - (PTW32_INTERLOCKED_LPLONG) &mx->lock_idx, - (PTW32_INTERLOCKED_LONG) 0, - (PTW32_INTERLOCKED_LONG) -1)) != -1) + if ((LONG) PTW32_INTERLOCKED_EXCHANGE( + (LPLONG) &mx->lock_idx, + (LONG) 1) != 0) { - do + while ((LONG) PTW32_INTERLOCKED_EXCHANGE( + (LPLONG) &mx->lock_idx, + (LONG) -1) != 0) { - if (c == 1 || - (LONG) PTW32_INTERLOCKED_COMPARE_EXCHANGE( - (PTW32_INTERLOCKED_LPLONG) &mx->lock_idx, - (PTW32_INTERLOCKED_LONG) 1, - (PTW32_INTERLOCKED_LONG) 0) != -1) - { - if (ptw32_semwait (&mx->wait_sema) != 0) - { - result = errno; - break; - } - } + if (WAIT_OBJECT_0 != WaitForSingleObject (mx->event, INFINITE)) + { + result = EINVAL; + break; + } } - while ((c = (LONG) PTW32_INTERLOCKED_COMPARE_EXCHANGE( - (PTW32_INTERLOCKED_LPLONG) &mx->lock_idx, - (PTW32_INTERLOCKED_LONG) 1, - (PTW32_INTERLOCKED_LONG) -1)) != -1); } } else { pthread_t self = pthread_self(); - if ((c = (LONG) PTW32_INTERLOCKED_COMPARE_EXCHANGE( - (PTW32_INTERLOCKED_LPLONG) &mx->lock_idx, - (PTW32_INTERLOCKED_LONG) 0, - (PTW32_INTERLOCKED_LONG) -1)) == -1) + if ((LONG) PTW32_INTERLOCKED_EXCHANGE( + (LPLONG) &mx->lock_idx, + (LONG) 1) == 0) { mx->recursive_count = 1; mx->ownerThread = self; @@ -122,30 +110,21 @@ pthread_mutex_lock (pthread_mutex_t * mutex) } else { - do + while ((LONG) PTW32_INTERLOCKED_EXCHANGE( + (LPLONG) &mx->lock_idx, + (LONG) -1) != 0) { - if (c == 1 || - (LONG) PTW32_INTERLOCKED_COMPARE_EXCHANGE( - (PTW32_INTERLOCKED_LPLONG) &mx->lock_idx, - (PTW32_INTERLOCKED_LONG) 1, - (PTW32_INTERLOCKED_LONG) 0) != -1) + if (WAIT_OBJECT_0 == WaitForSingleObject (mx->event, INFINITE)) { - if (ptw32_semwait (&mx->wait_sema) == 0) - { - mx->recursive_count = 1; - mx->ownerThread = self; - } - else - { - result = errno; - break; - } + mx->recursive_count = 1; + mx->ownerThread = self; + } + else + { + result = EINVAL; + break; } - } - while ((c = (LONG) PTW32_INTERLOCKED_COMPARE_EXCHANGE( - (PTW32_INTERLOCKED_LPLONG) &mx->lock_idx, - (PTW32_INTERLOCKED_LONG) 1, - (PTW32_INTERLOCKED_LONG) -1)) != -1); + } } } } diff --git a/pthread_mutex_timedlock.c b/pthread_mutex_timedlock.c index cf929bc..97a625c 100644 --- a/pthread_mutex_timedlock.c +++ b/pthread_mutex_timedlock.c @@ -45,15 +45,12 @@ static INLINE int -ptw32_timed_semwait (sem_t * sem, const struct timespec *abstime) +ptw32_timed_eventwait (HANDLE event, const struct timespec *abstime) /* * ------------------------------------------------------ * DESCRIPTION - * This function waits on a POSIX semaphore. If the - * semaphore value is greater than zero, it decreases - * its value by one. If the semaphore value is zero, then - * the calling thread (or process) is blocked until it can - * successfully decrease the value or until abstime. + * This function waits on an event until signaled or until + * abstime passes. * If abstime has passed when this routine is called then * it returns a result to indicate this. * @@ -61,25 +58,16 @@ ptw32_timed_semwait (sem_t * sem, const struct timespec *abstime) * block until it can successfully decrease the value or * until interrupted by a signal. * - * Unlike sem_timedwait(), this routine is not a cancelation point. - * - * Unlike sem_timedwait(), this routine is non-cancelable. + * This routine is not a cancelation point. * * RESULTS - * 2 abstime has passed already - * 1 abstime timed out while waiting - * 0 successfully decreased semaphore, - * -1 failed, error in errno. - * ERRNO - * EINVAL 'sem' is not a valid semaphore, - * ENOSYS semaphores are not supported, - * EINTR the function was interrupted by a signal, - * EDEADLK a deadlock condition was detected. + * 0 successfully signaled, + * ETIMEDOUT abstime passed + * EINVAL 'event' is not a valid event, * * ------------------------------------------------------ */ { - int result = 0; #ifdef NEED_FTIME @@ -96,9 +84,9 @@ ptw32_timed_semwait (sem_t * sem, const struct timespec *abstime) DWORD milliseconds; DWORD status; - if (sem == NULL) + if (event == NULL) { - result = EINVAL; + return EINVAL; } else { @@ -156,47 +144,26 @@ ptw32_timed_semwait (sem_t * sem, const struct timespec *abstime) if (((int) milliseconds) < 0) { - return 2; + return ETIMEDOUT; } } -#ifdef NEED_SEM - - status = WaitForSingleObject ((*sem)->event, milliseconds); - -#else /* NEED_SEM */ - - status = WaitForSingleObject ((*sem)->sem, milliseconds); - -#endif + status = WaitForSingleObject (event, milliseconds); if (status == WAIT_OBJECT_0) { - -#ifdef NEED_SEM - - ptw32_decrease_semaphore (sem); - -#endif /* NEED_SEM */ - return 0; } else if (status == WAIT_TIMEOUT) { - return 1; + return ETIMEDOUT; } else { - result = EINVAL; + return EINVAL; } } - if (result != 0) - { - errno = result; - return -1; - } - return 0; } /* ptw32_timed_semwait */ @@ -206,7 +173,7 @@ int pthread_mutex_timedlock (pthread_mutex_t * mutex, const struct timespec *abstime) { - LONG c; + int result; pthread_mutex_t mx; #ifdef NEED_SEM @@ -226,8 +193,6 @@ pthread_mutex_timedlock (pthread_mutex_t * mutex, */ if (*mutex >= PTHREAD_ERRORCHECK_MUTEX_INITIALIZER) { - int result; - if ((result = ptw32_mutex_check_need_init (mutex)) != 0) { return (result); @@ -238,51 +203,28 @@ pthread_mutex_timedlock (pthread_mutex_t * mutex, if (mx->kind == PTHREAD_MUTEX_NORMAL) { - if ((c = (LONG) PTW32_INTERLOCKED_COMPARE_EXCHANGE( - (PTW32_INTERLOCKED_LPLONG) &mx->lock_idx, - (PTW32_INTERLOCKED_LONG) 0, - (PTW32_INTERLOCKED_LONG) -1)) != -1) + if ((LONG) PTW32_INTERLOCKED_EXCHANGE( + (LPLONG) &mx->lock_idx, + (LONG) 1) != 0) { - do + while ((LONG) PTW32_INTERLOCKED_EXCHANGE( + (LPLONG) &mx->lock_idx, + (LONG) -1) != 0) { - if (c == 1 || - (LONG) PTW32_INTERLOCKED_COMPARE_EXCHANGE( - (PTW32_INTERLOCKED_LPLONG) &mx->lock_idx, - (PTW32_INTERLOCKED_LONG) 1, - (PTW32_INTERLOCKED_LONG) 0) != -1) - { - switch (ptw32_timed_semwait (&mx->wait_sema, abstime)) - { - case 0: /* We got woken up so try get the lock again. */ - { - break; - } - case 1: /* Timed out. */ - case 2: /* abstime passed before we started to wait. */ - { - return ETIMEDOUT; - } - default: - { - return errno; - } - } + if (0 != (result = ptw32_timed_eventwait (mx->event, abstime))) + { + return result; } - } - while ((c = (LONG) PTW32_INTERLOCKED_COMPARE_EXCHANGE( - (PTW32_INTERLOCKED_LPLONG) &mx->lock_idx, - (PTW32_INTERLOCKED_LONG) 1, - (PTW32_INTERLOCKED_LONG) -1)) != -1); + } } } else { pthread_t self = pthread_self(); - if ((c = (LONG) PTW32_INTERLOCKED_COMPARE_EXCHANGE( - (PTW32_INTERLOCKED_LPLONG) &mx->lock_idx, - (PTW32_INTERLOCKED_LONG) 0, - (PTW32_INTERLOCKED_LONG) -1)) == -1) + if ((LONG) PTW32_INTERLOCKED_EXCHANGE( + (LPLONG) &mx->lock_idx, + (LONG) 1) == 0) { mx->recursive_count = 1; mx->ownerThread = self; @@ -302,36 +244,15 @@ pthread_mutex_timedlock (pthread_mutex_t * mutex, } else { - do + while ((LONG) PTW32_INTERLOCKED_EXCHANGE( + (LPLONG) &mx->lock_idx, + (LONG) -1) != 0) { - if (c == 1 || - (LONG) PTW32_INTERLOCKED_COMPARE_EXCHANGE( - (PTW32_INTERLOCKED_LPLONG) &mx->lock_idx, - (PTW32_INTERLOCKED_LONG) 1, - (PTW32_INTERLOCKED_LONG) 0) != -1) - { - switch (ptw32_timed_semwait (&mx->wait_sema, abstime)) - { - case 0: /* We got woken up so try get the lock again. */ - { - break; - } - case 1: /* Timed out. */ - case 2: /* abstime passed before we started to wait. */ - { - return ETIMEDOUT; - } - default: - { - return errno; - } - } + if (0 != (result = ptw32_timed_eventwait (mx->event, abstime))) + { + return result; } - } - while ((c = (LONG) PTW32_INTERLOCKED_COMPARE_EXCHANGE( - (PTW32_INTERLOCKED_LPLONG) &mx->lock_idx, - (PTW32_INTERLOCKED_LONG) 1, - (PTW32_INTERLOCKED_LONG) -1)) != -1); + } mx->recursive_count = 1; mx->ownerThread = self; diff --git a/pthread_mutex_trylock.c b/pthread_mutex_trylock.c index e8ea57b..a0f063d 100644 --- a/pthread_mutex_trylock.c +++ b/pthread_mutex_trylock.c @@ -64,11 +64,10 @@ pthread_mutex_trylock (pthread_mutex_t * mutex) mx = *mutex; - if ((PTW32_INTERLOCKED_LONG) -1 == - PTW32_INTERLOCKED_COMPARE_EXCHANGE ((PTW32_INTERLOCKED_LPLONG) & - mx->lock_idx, - (PTW32_INTERLOCKED_LONG) 0, - (PTW32_INTERLOCKED_LONG) -1)) + if (0 == (LONG) PTW32_INTERLOCKED_COMPARE_EXCHANGE ( + (PTW32_INTERLOCKED_LPLONG) &mx->lock_idx, + (PTW32_INTERLOCKED_LONG) 1, + (PTW32_INTERLOCKED_LONG) 0)) { if (mx->kind != PTHREAD_MUTEX_NORMAL) { diff --git a/pthread_mutex_unlock.c b/pthread_mutex_unlock.c index d853178..e28b38a 100644 --- a/pthread_mutex_unlock.c +++ b/pthread_mutex_unlock.c @@ -61,30 +61,26 @@ pthread_mutex_unlock (pthread_mutex_t * mutex) { LONG idx; - idx = (LONG) PTW32_INTERLOCKED_COMPARE_EXCHANGE ((PTW32_INTERLOCKED_LPLONG) - &mx->lock_idx, - (PTW32_INTERLOCKED_LONG) -1, - (PTW32_INTERLOCKED_LONG) 0); - + idx = (LONG) PTW32_INTERLOCKED_EXCHANGE ((LPLONG) &mx->lock_idx, + (LONG) 0); if (idx != 0) { - if (idx > 0) + if (idx < 0) { - mx->lock_idx = -1; /* Someone may be waiting on that mutex */ - if (sem_post (&mx->wait_sema) != 0) + if (SetEvent (mx->event) == 0) { - result = errno; + result = EINVAL; } - } - else - { - /* - * Was not locked (so can't be owned by us). - */ - result = EPERM; } } + else + { + /* + * Was not locked (so can't be owned by us). + */ + result = EPERM; + } } else { @@ -95,13 +91,13 @@ pthread_mutex_unlock (pthread_mutex_t * mutex) { mx->ownerThread = NULL; - if (InterlockedDecrement (&mx->lock_idx) >= 0) + if ((LONG) PTW32_INTERLOCKED_EXCHANGE ((LPLONG) &mx->lock_idx, + (LONG) 0) < 0) { /* Someone may be waiting on that mutex */ - mx->lock_idx = -1; - if (sem_post (&mx->wait_sema) != 0) + if (SetEvent (mx->event) == 0) { - result = errno; + result = EINVAL; } } } diff --git a/pthread_win32_attach_detach_np.c b/pthread_win32_attach_detach_np.c index ea0a307..4aedccc 100644 --- a/pthread_win32_attach_detach_np.c +++ b/pthread_win32_attach_detach_np.c @@ -52,14 +52,46 @@ BOOL pthread_win32_process_attach_np () { BOOL result = TRUE; + DWORD_PTR vProcessCPUs; + DWORD_PTR vSystemCPUs; result = ptw32_processInitialize (); + #ifdef _UWIN pthread_count++; #endif ptw32_features = 0; + +#if defined(NEED_PROCESS_AFFINITY_MASK) + + ptw32_smp_system = PTW32_FALSE; + +#else + + if (GetProcessAffinityMask (GetCurrentProcess (), + &vProcessCPUs, &vSystemCPUs)) + { + int CPUs = 0; + DWORD_PTR bit; + + for (bit = 1; bit != 0; bit <<= 1) + { + if (vSystemCPUs & bit) + { + CPUs++; + } + } + ptw32_smp_system = (CPUs > 1); + } + else + { + ptw32_smp_system = PTW32_FALSE; + } + +#endif + #ifndef TEST_ICE /* diff --git a/ptw32_InterlockedCompareExchange.c b/ptw32_InterlockedCompareExchange.c index e3c5162..6da2bec 100644 --- a/ptw32_InterlockedCompareExchange.c +++ b/ptw32_InterlockedCompareExchange.c @@ -42,13 +42,8 @@ /* * ptw32_InterlockedCompareExchange -- * - * Needed because W95 doesn't support InterlockedCompareExchange. - * It is only used when running the dll on W95. Other versions of - * Windows use the Win32 supported version, which may be running on - * different processor types. - * - * This can't be inlined because we need to know it's address so that - * we can call it through a pointer. + * Originally needed because W9x doesn't support InterlockedCompareExchange. + * We now use this version wherever possible so we can inline it. */ INLINE PTW32_INTERLOCKED_LONG WINAPI @@ -64,69 +59,217 @@ ptw32_InterlockedCompareExchange (PTW32_INTERLOCKED_LPLONG location, PTW32_INTERLOCKED_LONG result; + /* + * Using the LOCK prefix on uni-processor machines is significantly slower + * and it is not necessary. The overhead of the conditional below is + * negligible in comparison. Since an optimised DLL will inline this + * routine, this will be faster than calling the system supplied + * Interlocked routine, which appears to avoid the LOCK prefix on + * uniprocessor systems. So one DLL works for all systems. + */ + if (ptw32_smp_system) + /* *INDENT-OFF* */ #if defined(_M_IX86) || defined(_X86_) -#if defined(_MSC_VER) || defined(__WATCOMC__) +#if defined(_MSC_VER) || defined(__WATCOMC__) || defined(__BORLAND__) #define HAVE_INLINABLE_INTERLOCKED_CMPXCHG - _asm { - PUSH ecx - PUSH edx - MOV ecx,dword ptr [location] - MOV edx,dword ptr [value] - MOV eax,dword ptr [comparand] - LOCK CMPXCHG dword ptr [ecx],edx ; if (EAX == [ECX]), - ; [ECX] = EDX - ; else - ; EAX = [ECX] - MOV dword ptr [result], eax - POP edx - POP ecx - } - -#elif defined(__BORLANDC__) + { + _asm { + PUSH ecx + PUSH edx + MOV ecx,dword ptr [location] + MOV edx,dword ptr [value] + MOV eax,dword ptr [comparand] + LOCK CMPXCHG dword ptr [ecx],edx + MOV dword ptr [result], eax + POP edx + POP ecx + } + } + else + { + _asm { + PUSH ecx + PUSH edx + MOV ecx,dword ptr [location] + MOV edx,dword ptr [value] + MOV eax,dword ptr [comparand] + CMPXCHG dword ptr [ecx],edx + MOV dword ptr [result], eax + POP edx + POP ecx + } + } + +#elif defined(__GNUC__) #define HAVE_INLINABLE_INTERLOCKED_CMPXCHG - _asm { - PUSH ecx - PUSH edx - MOV ecx,dword ptr [location] - MOV edx,dword ptr [value] - MOV eax,dword ptr [comparand] - LOCK CMPXCHG dword ptr [ecx],edx /* if (EAX == [ECX]) */ - /* [ECX] = EDX */ - /* else */ - /* EAX = [ECX] */ - MOV dword ptr [result], eax - POP edx - POP ecx - } + { + __asm__ __volatile__ + ( + "lock\n\t" + "cmpxchgl %2,%1" /* if (EAX == [location]) */ + /* [location] = value */ + /* else */ + /* EAX = [location] */ + :"=a" (result) + :"m" (*location), "r" (value), "a" (comparand)); + } + else + { + __asm__ __volatile__ + ( + "cmpxchgl %2,%1" /* if (EAX == [location]) */ + /* [location] = value */ + /* else */ + /* EAX = [location] */ + :"=a" (result) + :"m" (*location), "r" (value), "a" (comparand)); + } + +#endif + +#else + + /* + * If execution gets to here then we're running on a currently + * unsupported processor or compiler. + */ + + result = 0; + +#endif + +/* *INDENT-ON* */ + + return result; + +#if defined(__WATCOMC__) +#pragma enable_message (200) +#endif + +} + +/* + * ptw32_InterlockedExchange -- + * + * We now use this version wherever possible so we can inline it. + */ + +INLINE LONG WINAPI +ptw32_InterlockedExchange (LPLONG location, + LONG value) +{ + +#if defined(__WATCOMC__) +/* Don't report that result is not assigned a value before being referenced */ +#pragma disable_message (200) +#endif + + LONG result; + + /* + * The XCHG instruction always locks the bus with or without the + * LOCKED prefix. This makes it significantly slower than CMPXCHG on + * uni-processor machines. The Windows InterlockedExchange function + * is nearly 3 times faster than the XCHG instruction, so this routine + * is not yet very useful for speeding up pthreads. + */ + if (ptw32_smp_system) + +/* *INDENT-OFF* */ + +#if defined(_M_IX86) || defined(_X86_) + +#if defined(_MSC_VER) || defined(__WATCOMC__) || defined(__BORLAND__) +#define HAVE_INLINABLE_INTERLOCKED_XCHG + + { + _asm { + PUSH ecx + MOV ecx,dword ptr [location] + MOV eax,dword ptr [value] + XCHG dword ptr [ecx],eax + MOV dword ptr [result], eax + POP ecx + } + } + else + { + /* + * Faster version of XCHG for uni-processor systems because + * it doesn't lock the bus. If an interrupt or context switch + * occurs between the MOV and the CMPXCHG then the value in + * 'location' may have changed, in which case we will loop + * back to do the MOV again. Because both instructions + * reference the same location, they will not be re-ordered + * in the pipeline. + * Tests show that this routine has almost identical timing + * to Win32's InterlockedExchange(), which is much faster than + * using the an inlined 'xchg' instruction, so it's probably + * doing something similar to this (on UP systems). + * + * Can we do without the PUSH/POP instructions? + */ + _asm { + PUSH ecx + PUSH edx + MOV ecx,dword ptr [location] + MOV edx,dword ptr [value] +L1: MOV eax,dword ptr [ecx] + CMPXCHG dword ptr [ecx],edx + JNZ L1 + MOV dword ptr [result], eax + POP edx + POP ecx + } + } #elif defined(__GNUC__) -#define HAVE_INLINABLE_INTERLOCKED_CMPXCHG +#define HAVE_INLINABLE_INTERLOCKED_XCHG - __asm__ - ( - "lock\n\t" - "cmpxchgl %3,(%0)" /* if (EAX == [location]) */ - /* [location] = value */ - /* else */ - /* EAX = [location] */ - :"=r" (location), "=a" (result) - :"0" (location), "q" (value), "a" (comparand) - : "memory" ); + { + __asm__ __volatile__ + ( + "xchgl %2,%1" + :"=r" (result) + :"m" (*location), "0" (value)); + } + else + { + /* + * Faster version of XCHG for uni-processor systems because + * it doesn't lock the bus. If an interrupt or context switch + * occurs between the movl and the cmpxchgl then the value in + * 'location' may have changed, in which case we will loop + * back to do the movl again. Because both instructions + * reference the same location, they will not be re-ordered + * in the pipeline. + * Tests show that this routine has almost identical timing + * to Win32's InterlockedExchange(), which is much faster than + * using the an inlined 'xchg' instruction, so it's probably + * doing something similar to this (on UP systems). + */ + __asm__ __volatile__ + ( + "0:\n\t" + "movl %1,%%eax\n\t" + "cmpxchgl %2,%1\n\t" + "jnz 0b" + :"=&a" (result) + :"m" (*location), "r" (value)); + } #endif #else /* - * If execution gets to here then we should be running on a Win95 system - * but either running on something other than an X86 processor, or a - * compiler other than MSVC or GCC. Pthreads-win32 doesn't support that - * platform (yet). + * If execution gets to here then we're running on a currently + * unsupported processor or compiler. */ result = 0; @@ -143,9 +286,17 @@ ptw32_InterlockedCompareExchange (PTW32_INTERLOCKED_LPLONG location, } -#if 0 + +#if 1 + #if defined(PTW32_BUILD_INLINED) && defined(HAVE_INLINABLE_INTERLOCKED_CMPXCHG) #undef PTW32_INTERLOCKED_COMPARE_EXCHANGE #define PTW32_INTERLOCKED_COMPARE_EXCHANGE ptw32_InterlockedCompareExchange #endif + +#if defined(PTW32_BUILD_INLINED) && defined(HAVE_INLINABLE_INTERLOCKED_XCHG) +#undef PTW32_INTERLOCKED_EXCHANGE +#define PTW32_INTERLOCKED_EXCHANGE ptw32_InterlockedExchange +#endif + #endif diff --git a/tests/ChangeLog b/tests/ChangeLog index 2d0c570..f55b2b6 100644 --- a/tests/ChangeLog +++ b/tests/ChangeLog @@ -1,14 +1,20 @@ -2004-09-08 Alexandre Girao +2004-10-14 Ross Johnson - * cancel7.c (main): Win98 wants a valid (non-NULL) location - for the last arg of _beginthreadex(). - * cancel8.c (main): Likewise. - * exit4.c (main): Likewise. - * exit5.c (main): Likewise. + * rwlock7.c (main): Tidy up statistics reporting; randomise + update accesses. + * rwlock8.c: New test. + +2004-09-08 Alexandre Girao + + * cancel7.c (main): Win98 wants a valid (non-NULL) location + for the last arg of _beginthreadex(). + * cancel8.c (main): Likewise. + * exit4.c (main): Likewise. + * exit5.c (main): Likewise. 2004-08-26 Ross Johnson - * create3.c: New test. + * create3.c: New test. 2004-06-21 Ross Johnson diff --git a/tests/GNUmakefile b/tests/GNUmakefile index cc36b53..0979fda 100644 --- a/tests/GNUmakefile +++ b/tests/GNUmakefile @@ -70,7 +70,7 @@ COPYFILES = $(HDR) $(LIB) $(DLL) $(QAPC) # stop. TESTS = sizes loadfree \ - semaphore1 semaphore2 self1 mutex5 mutex1 mutex1e mutex1n mutex1r \ + self1 mutex5 mutex1 mutex1e mutex1n mutex1r semaphore1 semaphore2 \ condvar1 condvar1_1 condvar1_2 condvar2 condvar2_1 exit1 \ create1 create2 reuse1 reuse2 equal1 \ kill1 valid1 valid2 \ @@ -85,7 +85,7 @@ TESTS = sizes loadfree \ condvar3 condvar3_1 condvar3_2 condvar3_3 \ condvar4 condvar5 condvar6 condvar7 condvar8 condvar9 \ errno1 \ - rwlock1 rwlock2 rwlock3 rwlock4 rwlock5 rwlock6 rwlock7 \ + rwlock1 rwlock2 rwlock3 rwlock4 rwlock5 rwlock6 rwlock7 rwlock8 \ rwlock2_t rwlock3_t rwlock4_t rwlock5_t rwlock6_t rwlock6_t2 \ context1 cancel3 cancel4 cancel5 cancel6a cancel6d \ cancel7 cancel8 \ @@ -240,6 +240,7 @@ rwlock4.pass: rwlock3.pass rwlock5.pass: rwlock4.pass rwlock6.pass: rwlock5.pass rwlock7.pass: rwlock6.pass +rwlock8.pass: rwlock7.pass rwlock2_t.pass: rwlock2.pass rwlock3_t.pass: rwlock2_t.pass rwlock4_t.pass: rwlock3_t.pass diff --git a/tests/Makefile b/tests/Makefile index 31979a4..535347f 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -100,7 +100,8 @@ PASSES= sizes.pass loadfree.pass \ condvar4.pass condvar5.pass condvar6.pass \ condvar7.pass condvar8.pass condvar9.pass \ errno1.pass \ - rwlock1.pass rwlock2.pass rwlock3.pass rwlock4.pass rwlock5.pass rwlock6.pass rwlock7.pass \ + rwlock1.pass rwlock2.pass rwlock3.pass rwlock4.pass \ + rwlock5.pass rwlock6.pass rwlock7.pass rwlock8.pass \ rwlock2_t.pass rwlock3_t.pass rwlock4_t.pass rwlock5_t.pass rwlock6_t.pass rwlock6_t2.pass \ context1.pass \ cancel3.pass cancel4.pass cancel5.pass cancel6a.pass cancel6d.pass \ @@ -324,6 +325,7 @@ rwlock4.pass: rwlock3.pass rwlock5.pass: rwlock4.pass rwlock6.pass: rwlock5.pass rwlock7.pass: rwlock6.pass +rwlock8.pass: rwlock7.pass rwlock2_t.pass: rwlock2.pass rwlock3_t.pass: rwlock2_t.pass rwlock4_t.pass: rwlock3_t.pass diff --git a/tests/benchtest1.c b/tests/benchtest1.c index ce45cf2..120eb19 100644 --- a/tests/benchtest1.c +++ b/tests/benchtest1.c @@ -179,7 +179,7 @@ main (int argc, char *argv[]) durationMilliSecs = GetDurationMilliSecs(currSysTimeStart, currSysTimeStop) - overHeadMilliSecs; printf( "%-45s %15ld %15.3f\n", - "Simple Critical Section x 2", + "Simple Critical Section", durationMilliSecs, (float) durationMilliSecs * 1E3 / ITERATIONS); diff --git a/tests/rwlock7.c b/tests/rwlock7.c index 8706d4a..91466e4 100644 --- a/tests/rwlock7.c +++ b/tests/rwlock7.c @@ -13,7 +13,7 @@ #endif #define THREADS 5 -#define DATASIZE 15 +#define DATASIZE 7 #define ITERATIONS 1000000 /* @@ -24,7 +24,8 @@ typedef struct thread_tag { pthread_t thread_id; int updates; int reads; - int interval; + int changed; + int seed; } thread_t; /* @@ -45,9 +46,12 @@ static data_t data[DATASIZE]; void *thread_routine (void *arg) { thread_t *self = (thread_t*)arg; - int repeats = 0; int iteration; int element = 0; + int seed = self->seed; + int interval = 1 + rand_r (&seed) % 71; + + self->changed = 0; for (iteration = 0; iteration < ITERATIONS; iteration++) { @@ -61,12 +65,13 @@ void *thread_routine (void *arg) * update operation (write lock instead of read * lock). */ - if ((iteration % self->interval) == 0) + if ((iteration % interval) == 0) { assert(pthread_rwlock_wrlock (&data[element].lock) == 0); data[element].data = self->thread_num; data[element].updates++; self->updates++; + interval = 1 + rand_r (&seed) % 71; assert(pthread_rwlock_unlock (&data[element].lock) == 0); } else { /* @@ -78,27 +83,17 @@ void *thread_routine (void *arg) self->reads++; - if (data[element].data == self->thread_num) + if (data[element].data != self->thread_num) { - repeats++; + self->changed++; + interval = 1 + self->changed % 71; } assert(pthread_rwlock_unlock (&data[element].lock) == 0); } - element++; - - if (element >= DATASIZE) - { - element = 0; - } - } + element = (element + 1) % DATASIZE; - if (repeats > 0) - { - printf ("\nThread %d found unchanged elements %d times", - self->thread_num, repeats); - fflush(stdout); } return NULL; @@ -137,7 +132,7 @@ main (int argc, char *argv[]) threads[count].thread_num = count; threads[count].updates = 0; threads[count].reads = 0; - threads[count].interval = rand_r (&seed) % 71; + threads[count].seed = 1 + rand_r (&seed) % 71; assert(pthread_create (&threads[count].thread_id, NULL, thread_routine, (void*)&threads[count]) == 0); @@ -150,9 +145,28 @@ main (int argc, char *argv[]) for (count = 0; count < THREADS; count++) { assert(pthread_join (threads[count].thread_id, NULL) == 0); + } + + putchar('\n'); + fflush(stdout); + + for (count = 0; count < THREADS; count++) + { + if (threads[count].changed > 0) + { + printf ("Thread %d found changed elements %d times\n", + count, threads[count].changed); + } + } + + putchar('\n'); + fflush(stdout); + + for (count = 0; count < THREADS; count++) + { thread_updates += threads[count].updates; - printf ("\n%02d: interval %d, updates %d, reads %d\n", - count, threads[count].interval, + printf ("%02d: seed %d, updates %d, reads %d\n", + count, threads[count].seed, threads[count].updates, threads[count].reads); } diff --git a/tests/rwlock8.c b/tests/rwlock8.c new file mode 100644 index 0000000..c83a775 --- /dev/null +++ b/tests/rwlock8.c @@ -0,0 +1,205 @@ +/* + * rwlock8.c + * + * Hammer on a bunch of rwlocks to test robustness and fairness. + * Printed stats should be roughly even for each thread. + * + * Yield during each access to exercise lock contention code paths + * more than rwlock7.c does (particularly on uni-processor systems). + */ + +#include "test.h" +#include + +#ifdef __GNUC__ +#include +#endif + +#define THREADS 5 +#define DATASIZE 7 +#define ITERATIONS 100000 + +/* + * Keep statistics for each thread. + */ +typedef struct thread_tag { + int thread_num; + pthread_t thread_id; + int updates; + int reads; + int changed; + int seed; +} thread_t; + +/* + * Read-write lock and shared data + */ +typedef struct data_tag { + pthread_rwlock_t lock; + int data; + int updates; +} data_t; + +static thread_t threads[THREADS]; +static data_t data[DATASIZE]; + +/* + * Thread start routine that uses read-write locks + */ +void *thread_routine (void *arg) +{ + thread_t *self = (thread_t*)arg; + int iteration; + int element = 0; + int seed = self->seed; + int interval = 1 + rand_r (&seed) % 71; + + self->changed = 0; + + for (iteration = 0; iteration < ITERATIONS; iteration++) + { + if (iteration % (ITERATIONS / 10) == 0) + { + putchar('.'); + fflush(stdout); + } + /* + * Each "self->interval" iterations, perform an + * update operation (write lock instead of read + * lock). + */ + if ((iteration % interval) == 0) + { + assert(pthread_rwlock_wrlock (&data[element].lock) == 0); + data[element].data = self->thread_num; + data[element].updates++; + self->updates++; + interval = 1 + rand_r (&seed) % 71; + sched_yield(); + assert(pthread_rwlock_unlock (&data[element].lock) == 0); + } else { + /* + * Look at the current data element to see whether + * the current thread last updated it. Count the + * times, to report later. + */ + assert(pthread_rwlock_rdlock (&data[element].lock) == 0); + + self->reads++; + + if (data[element].data != self->thread_num) + { + self->changed++; + interval = 1 + self->changed % 71; + } + + sched_yield(); + + assert(pthread_rwlock_unlock (&data[element].lock) == 0); + } + + element = (element + 1) % DATASIZE; + + } + + return NULL; +} + +int +main (int argc, char *argv[]) +{ + int count; + int data_count; + int thread_updates = 0; + int data_updates = 0; + int seed = 1; + + struct _timeb currSysTime1; + struct _timeb currSysTime2; + + /* + * Initialize the shared data. + */ + for (data_count = 0; data_count < DATASIZE; data_count++) + { + data[data_count].data = 0; + data[data_count].updates = 0; + + assert(pthread_rwlock_init (&data[data_count].lock, NULL) == 0); + } + + _ftime(&currSysTime1); + + /* + * Create THREADS threads to access shared data. + */ + for (count = 0; count < THREADS; count++) + { + threads[count].thread_num = count; + threads[count].updates = 0; + threads[count].reads = 0; + threads[count].seed = 1 + rand_r (&seed) % 71; + + assert(pthread_create (&threads[count].thread_id, + NULL, thread_routine, (void*)&threads[count]) == 0); + } + + /* + * Wait for all threads to complete, and collect + * statistics. + */ + for (count = 0; count < THREADS; count++) + { + assert(pthread_join (threads[count].thread_id, NULL) == 0); + } + + putchar('\n'); + fflush(stdout); + + for (count = 0; count < THREADS; count++) + { + if (threads[count].changed > 0) + { + printf ("Thread %d found changed elements %d times\n", + count, threads[count].changed); + } + } + + putchar('\n'); + fflush(stdout); + + for (count = 0; count < THREADS; count++) + { + thread_updates += threads[count].updates; + printf ("%02d: seed %d, updates %d, reads %d\n", + count, threads[count].seed, + threads[count].updates, threads[count].reads); + } + + putchar('\n'); + fflush(stdout); + + /* + * Collect statistics for the data. + */ + for (data_count = 0; data_count < DATASIZE; data_count++) + { + data_updates += data[data_count].updates; + printf ("data %02d: value %d, %d updates\n", + data_count, data[data_count].data, data[data_count].updates); + assert(pthread_rwlock_destroy (&data[data_count].lock) == 0); + } + + printf ("%d thread updates, %d data updates\n", + thread_updates, data_updates); + + _ftime(&currSysTime2); + + printf( "\nstart: %ld/%d, stop: %ld/%d, duration:%ld\n", + currSysTime1.time,currSysTime1.millitm, + currSysTime2.time,currSysTime2.millitm, + (currSysTime2.time*1000+currSysTime2.millitm) - + (currSysTime1.time*1000+currSysTime1.millitm)); + + return 0; +} -- cgit v1.2.3