diff options
| -rw-r--r-- | Changes | 3 | ||||
| -rw-r--r-- | ev.c | 32 | ||||
| -rw-r--r-- | ev.pod | 22 | 
3 files changed, 47 insertions, 10 deletions
| @@ -6,6 +6,9 @@ Revision history for libev, a high-performance and full-featured event loop.          - use 3-based 4-heap for !EV_MINIMAL. this makes better use            of cpu cache lines and gives better growth behaviour than            2-based heaps. +        - cache timestamp within heap for !EV_MINIMAL, to avoid random +          memory accesses. +        - document/add EV_USE_4HEAP and EV_HEAP_CACHE_AT.          - fix a potential aliasing issue in ev_timer_again.          - add/document ev_periodic_at, retract direct access to ->at.          - improve ev_stat docs. @@ -237,6 +237,14 @@ extern "C" {  # endif  #endif +#ifndef EV_USE_4HEAP +# define EV_USE_4HEAP !EV_MINIMAL +#endif + +#ifndef EV_HEAP_CACHE_AT +# define EV_HEAP_CACHE_AT !EV_MINIMAL +#endif +  /* this block fixes any misconfiguration where we know we run into trouble otherwise */  #ifndef CLOCK_MONOTONIC @@ -432,11 +440,10 @@ typedef struct  #endif  /* Heap Entry */ -#define EV_HEAP_CACHE_AT 0  #if EV_HEAP_CACHE_AT    typedef struct { -    WT w;      ev_tstamp at; +    WT w;    } ANHE;    #define ANHE_w(he)      (he).w     /* access watcher, read-write */ @@ -793,7 +800,6 @@ fd_rearm_all (EV_P)   * which is more cache-efficient.   * the difference is about 5% with 50000+ watchers.   */ -#define EV_USE_4HEAP !EV_MINIMAL  #if EV_USE_4HEAP  #define DHEAP 4 @@ -888,7 +894,7 @@ upheap (ANHE *heap, int k)        k = p;      } -  heap [k] = w; +  heap [k] = he;    ev_active (ANHE_w (heap [k])) = k;  } @@ -908,7 +914,7 @@ downheap (ANHE *heap, int N, int k)        c += c + 1 < N && ANHE_at (heap [c]) > ANHE_at (heap [c + 1])             ? 1 : 0; -      if (w->at <= ANHE_at (heap [c])) +      if (ANHE_at (he) <= ANHE_at (heap [c]))          break;        heap [k] = heap [c]; @@ -1606,12 +1612,12 @@ timers_reify (EV_P)        /* first reschedule or stop timer */        if (w->repeat)          { -          assert (("negative ev_timer repeat value found while processing timers", w->repeat > 0.)); -            ev_at (w) += w->repeat;            if (ev_at (w) < mn_now)              ev_at (w) = mn_now; +          assert (("negative ev_timer repeat value found while processing timers", w->repeat > 0.)); +            ANHE_at_set (timers [HEAP0]);            downheap (timers, timercnt, HEAP0);          } @@ -1636,7 +1642,9 @@ periodics_reify (EV_P)        if (w->reschedule_cb)          {            ev_at (w) = w->reschedule_cb (w, ev_rt_now + TIME_EPSILON); +            assert (("ev_periodic reschedule callback returned time in the past", ev_at (w) > ev_rt_now)); +            ANHE_at_set (periodics [HEAP0]);            downheap (periodics, periodiccnt, HEAP0);          } @@ -1644,7 +1652,9 @@ periodics_reify (EV_P)          {            ev_at (w) = w->offset + ceil ((ev_rt_now - w->offset) / w->interval) * w->interval;            if (ev_at (w) - ev_rt_now <= TIME_EPSILON) ev_at (w) += w->interval; +            assert (("ev_periodic timeout in the past detected while processing timers, negative interval?", ev_at (w) > ev_rt_now)); +            ANHE_at_set (periodics [HEAP0]);            downheap (periodics, periodiccnt, HEAP0);          } @@ -1673,9 +1683,10 @@ periodics_reschedule (EV_P)        ANHE_at_set (periodics [i]);      } -  /* now rebuild the heap, this for the 2-heap, inefficient for the 4-heap, but correct */ -  for (i = periodiccnt >> 1; --i; ) -    downheap (periodics, periodiccnt, i + HEAP0); +  /* we don't use floyds algorithm, uphead is simpler and is more cache-efficient */ +  /* also, this is easy and corretc for both 2-heaps and 4-heaps */ +  for (i = 0; i < periodiccnt; ++i) +    upheap (periodics, i + HEAP0);  }  #endif @@ -2088,6 +2099,7 @@ ev_periodic_start (EV_P_ ev_periodic *w)    ev_start (EV_A_ (W)w, ++periodiccnt + HEAP0 - 1);    array_needsize (ANHE, periodics, periodicmax, ev_active (w) + 1, EMPTY2);    ANHE_w (periodics [ev_active (w)]) = (WT)w; +  ANHE_at_set (periodics [ev_active (w)]);    upheap (periodics, ev_active (w));    /*assert (("internal periodic heap corruption", ANHE_w (periodics [ev_active (w)]) == (WT)w));*/ @@ -3001,6 +3001,28 @@ usually more than enough. If you need to manage thousands of C<ev_stat>  watchers you might want to increase this value (I<must> be a power of  two). +=item EV_USE_4HEAP + +Heaps are not very cache-efficient. To improve the cache-efficiency of the +timer and periodics heap, libev uses a 4-heap when this symbol is defined +to C<1>. The 4-heap uses more complicated (longer) code but has a +noticable after performance with many (thousands) of watchers. + +The default is C<1> unless C<EV_MINIMAL> is set in which case it is C<0> +(disabled). + +=item EV_HEAP_CACHE_AT + +Heaps are not very cache-efficient. To improve the cache-efficiency of the +timer and periodics heap, libev can cache the timestamp (I<at>) within +the heap structure (selected by defining C<EV_HEAP_CACHE_AT> to C<1>), +which uses 8-12 bytes more per watcher and a few hundred bytes more code, +but avoids random read accesses on heap changes. This noticably improves +performance noticably with with many (hundreds) of watchers. + +The default is C<1> unless C<EV_MINIMAL> is set in which case it is C<0> +(disabled). +  =item EV_COMMON  By default, all watchers have a C<void *data> member. By redefining | 
