diff options
author | root <root> | 2008-05-09 15:52:13 +0000 |
---|---|---|
committer | root <root> | 2008-05-09 15:52:13 +0000 |
commit | 27a989dd483bf314f84f2a0b0f7708ab200acf7b (patch) | |
tree | a83fdc13577d2d285bb4ca1641c7f0e1ba75e8c2 | |
parent | 2d31af87182b532206cb9522146514bedf7624b7 (diff) |
*** empty log message ***
-rw-r--r-- | Changes | 3 | ||||
-rw-r--r-- | ev.c | 32 | ||||
-rw-r--r-- | ev.pod | 22 |
3 files changed, 47 insertions, 10 deletions
@@ -6,6 +6,9 @@ Revision history for libev, a high-performance and full-featured event loop. - use 3-based 4-heap for !EV_MINIMAL. this makes better use of cpu cache lines and gives better growth behaviour than 2-based heaps. + - cache timestamp within heap for !EV_MINIMAL, to avoid random + memory accesses. + - document/add EV_USE_4HEAP and EV_HEAP_CACHE_AT. - fix a potential aliasing issue in ev_timer_again. - add/document ev_periodic_at, retract direct access to ->at. - improve ev_stat docs. @@ -237,6 +237,14 @@ extern "C" { # endif #endif +#ifndef EV_USE_4HEAP +# define EV_USE_4HEAP !EV_MINIMAL +#endif + +#ifndef EV_HEAP_CACHE_AT +# define EV_HEAP_CACHE_AT !EV_MINIMAL +#endif + /* this block fixes any misconfiguration where we know we run into trouble otherwise */ #ifndef CLOCK_MONOTONIC @@ -432,11 +440,10 @@ typedef struct #endif /* Heap Entry */ -#define EV_HEAP_CACHE_AT 0 #if EV_HEAP_CACHE_AT typedef struct { - WT w; ev_tstamp at; + WT w; } ANHE; #define ANHE_w(he) (he).w /* access watcher, read-write */ @@ -793,7 +800,6 @@ fd_rearm_all (EV_P) * which is more cache-efficient. * the difference is about 5% with 50000+ watchers. */ -#define EV_USE_4HEAP !EV_MINIMAL #if EV_USE_4HEAP #define DHEAP 4 @@ -888,7 +894,7 @@ upheap (ANHE *heap, int k) k = p; } - heap [k] = w; + heap [k] = he; ev_active (ANHE_w (heap [k])) = k; } @@ -908,7 +914,7 @@ downheap (ANHE *heap, int N, int k) c += c + 1 < N && ANHE_at (heap [c]) > ANHE_at (heap [c + 1]) ? 1 : 0; - if (w->at <= ANHE_at (heap [c])) + if (ANHE_at (he) <= ANHE_at (heap [c])) break; heap [k] = heap [c]; @@ -1606,12 +1612,12 @@ timers_reify (EV_P) /* first reschedule or stop timer */ if (w->repeat) { - assert (("negative ev_timer repeat value found while processing timers", w->repeat > 0.)); - ev_at (w) += w->repeat; if (ev_at (w) < mn_now) ev_at (w) = mn_now; + assert (("negative ev_timer repeat value found while processing timers", w->repeat > 0.)); + ANHE_at_set (timers [HEAP0]); downheap (timers, timercnt, HEAP0); } @@ -1636,7 +1642,9 @@ periodics_reify (EV_P) if (w->reschedule_cb) { ev_at (w) = w->reschedule_cb (w, ev_rt_now + TIME_EPSILON); + assert (("ev_periodic reschedule callback returned time in the past", ev_at (w) > ev_rt_now)); + ANHE_at_set (periodics [HEAP0]); downheap (periodics, periodiccnt, HEAP0); } @@ -1644,7 +1652,9 @@ periodics_reify (EV_P) { ev_at (w) = w->offset + ceil ((ev_rt_now - w->offset) / w->interval) * w->interval; if (ev_at (w) - ev_rt_now <= TIME_EPSILON) ev_at (w) += w->interval; + assert (("ev_periodic timeout in the past detected while processing timers, negative interval?", ev_at (w) > ev_rt_now)); + ANHE_at_set (periodics [HEAP0]); downheap (periodics, periodiccnt, HEAP0); } @@ -1673,9 +1683,10 @@ periodics_reschedule (EV_P) ANHE_at_set (periodics [i]); } - /* now rebuild the heap, this for the 2-heap, inefficient for the 4-heap, but correct */ - for (i = periodiccnt >> 1; --i; ) - downheap (periodics, periodiccnt, i + HEAP0); + /* we don't use floyds algorithm, uphead is simpler and is more cache-efficient */ + /* also, this is easy and corretc for both 2-heaps and 4-heaps */ + for (i = 0; i < periodiccnt; ++i) + upheap (periodics, i + HEAP0); } #endif @@ -2088,6 +2099,7 @@ ev_periodic_start (EV_P_ ev_periodic *w) ev_start (EV_A_ (W)w, ++periodiccnt + HEAP0 - 1); array_needsize (ANHE, periodics, periodicmax, ev_active (w) + 1, EMPTY2); ANHE_w (periodics [ev_active (w)]) = (WT)w; + ANHE_at_set (periodics [ev_active (w)]); upheap (periodics, ev_active (w)); /*assert (("internal periodic heap corruption", ANHE_w (periodics [ev_active (w)]) == (WT)w));*/ @@ -3001,6 +3001,28 @@ usually more than enough. If you need to manage thousands of C<ev_stat> watchers you might want to increase this value (I<must> be a power of two). +=item EV_USE_4HEAP + +Heaps are not very cache-efficient. To improve the cache-efficiency of the +timer and periodics heap, libev uses a 4-heap when this symbol is defined +to C<1>. The 4-heap uses more complicated (longer) code but has a +noticable after performance with many (thousands) of watchers. + +The default is C<1> unless C<EV_MINIMAL> is set in which case it is C<0> +(disabled). + +=item EV_HEAP_CACHE_AT + +Heaps are not very cache-efficient. To improve the cache-efficiency of the +timer and periodics heap, libev can cache the timestamp (I<at>) within +the heap structure (selected by defining C<EV_HEAP_CACHE_AT> to C<1>), +which uses 8-12 bytes more per watcher and a few hundred bytes more code, +but avoids random read accesses on heap changes. This noticably improves +performance noticably with with many (hundreds) of watchers. + +The default is C<1> unless C<EV_MINIMAL> is set in which case it is C<0> +(disabled). + =item EV_COMMON By default, all watchers have a C<void *data> member. By redefining |