summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Changes3
-rw-r--r--ev.c32
-rw-r--r--ev.pod22
3 files changed, 47 insertions, 10 deletions
diff --git a/Changes b/Changes
index d3d5820..3216dd0 100644
--- a/Changes
+++ b/Changes
@@ -6,6 +6,9 @@ Revision history for libev, a high-performance and full-featured event loop.
- use 3-based 4-heap for !EV_MINIMAL. this makes better use
of cpu cache lines and gives better growth behaviour than
2-based heaps.
+ - cache timestamp within heap for !EV_MINIMAL, to avoid random
+ memory accesses.
+ - document/add EV_USE_4HEAP and EV_HEAP_CACHE_AT.
- fix a potential aliasing issue in ev_timer_again.
- add/document ev_periodic_at, retract direct access to ->at.
- improve ev_stat docs.
diff --git a/ev.c b/ev.c
index b70e452..3147d40 100644
--- a/ev.c
+++ b/ev.c
@@ -237,6 +237,14 @@ extern "C" {
# endif
#endif
+#ifndef EV_USE_4HEAP
+# define EV_USE_4HEAP !EV_MINIMAL
+#endif
+
+#ifndef EV_HEAP_CACHE_AT
+# define EV_HEAP_CACHE_AT !EV_MINIMAL
+#endif
+
/* this block fixes any misconfiguration where we know we run into trouble otherwise */
#ifndef CLOCK_MONOTONIC
@@ -432,11 +440,10 @@ typedef struct
#endif
/* Heap Entry */
-#define EV_HEAP_CACHE_AT 0
#if EV_HEAP_CACHE_AT
typedef struct {
- WT w;
ev_tstamp at;
+ WT w;
} ANHE;
#define ANHE_w(he) (he).w /* access watcher, read-write */
@@ -793,7 +800,6 @@ fd_rearm_all (EV_P)
* which is more cache-efficient.
* the difference is about 5% with 50000+ watchers.
*/
-#define EV_USE_4HEAP !EV_MINIMAL
#if EV_USE_4HEAP
#define DHEAP 4
@@ -888,7 +894,7 @@ upheap (ANHE *heap, int k)
k = p;
}
- heap [k] = w;
+ heap [k] = he;
ev_active (ANHE_w (heap [k])) = k;
}
@@ -908,7 +914,7 @@ downheap (ANHE *heap, int N, int k)
c += c + 1 < N && ANHE_at (heap [c]) > ANHE_at (heap [c + 1])
? 1 : 0;
- if (w->at <= ANHE_at (heap [c]))
+ if (ANHE_at (he) <= ANHE_at (heap [c]))
break;
heap [k] = heap [c];
@@ -1606,12 +1612,12 @@ timers_reify (EV_P)
/* first reschedule or stop timer */
if (w->repeat)
{
- assert (("negative ev_timer repeat value found while processing timers", w->repeat > 0.));
-
ev_at (w) += w->repeat;
if (ev_at (w) < mn_now)
ev_at (w) = mn_now;
+ assert (("negative ev_timer repeat value found while processing timers", w->repeat > 0.));
+
ANHE_at_set (timers [HEAP0]);
downheap (timers, timercnt, HEAP0);
}
@@ -1636,7 +1642,9 @@ periodics_reify (EV_P)
if (w->reschedule_cb)
{
ev_at (w) = w->reschedule_cb (w, ev_rt_now + TIME_EPSILON);
+
assert (("ev_periodic reschedule callback returned time in the past", ev_at (w) > ev_rt_now));
+
ANHE_at_set (periodics [HEAP0]);
downheap (periodics, periodiccnt, HEAP0);
}
@@ -1644,7 +1652,9 @@ periodics_reify (EV_P)
{
ev_at (w) = w->offset + ceil ((ev_rt_now - w->offset) / w->interval) * w->interval;
if (ev_at (w) - ev_rt_now <= TIME_EPSILON) ev_at (w) += w->interval;
+
assert (("ev_periodic timeout in the past detected while processing timers, negative interval?", ev_at (w) > ev_rt_now));
+
ANHE_at_set (periodics [HEAP0]);
downheap (periodics, periodiccnt, HEAP0);
}
@@ -1673,9 +1683,10 @@ periodics_reschedule (EV_P)
ANHE_at_set (periodics [i]);
}
- /* now rebuild the heap, this for the 2-heap, inefficient for the 4-heap, but correct */
- for (i = periodiccnt >> 1; --i; )
- downheap (periodics, periodiccnt, i + HEAP0);
+ /* we don't use floyds algorithm, uphead is simpler and is more cache-efficient */
+ /* also, this is easy and corretc for both 2-heaps and 4-heaps */
+ for (i = 0; i < periodiccnt; ++i)
+ upheap (periodics, i + HEAP0);
}
#endif
@@ -2088,6 +2099,7 @@ ev_periodic_start (EV_P_ ev_periodic *w)
ev_start (EV_A_ (W)w, ++periodiccnt + HEAP0 - 1);
array_needsize (ANHE, periodics, periodicmax, ev_active (w) + 1, EMPTY2);
ANHE_w (periodics [ev_active (w)]) = (WT)w;
+ ANHE_at_set (periodics [ev_active (w)]);
upheap (periodics, ev_active (w));
/*assert (("internal periodic heap corruption", ANHE_w (periodics [ev_active (w)]) == (WT)w));*/
diff --git a/ev.pod b/ev.pod
index 2f44e55..0c09ceb 100644
--- a/ev.pod
+++ b/ev.pod
@@ -3001,6 +3001,28 @@ usually more than enough. If you need to manage thousands of C<ev_stat>
watchers you might want to increase this value (I<must> be a power of
two).
+=item EV_USE_4HEAP
+
+Heaps are not very cache-efficient. To improve the cache-efficiency of the
+timer and periodics heap, libev uses a 4-heap when this symbol is defined
+to C<1>. The 4-heap uses more complicated (longer) code but has a
+noticable after performance with many (thousands) of watchers.
+
+The default is C<1> unless C<EV_MINIMAL> is set in which case it is C<0>
+(disabled).
+
+=item EV_HEAP_CACHE_AT
+
+Heaps are not very cache-efficient. To improve the cache-efficiency of the
+timer and periodics heap, libev can cache the timestamp (I<at>) within
+the heap structure (selected by defining C<EV_HEAP_CACHE_AT> to C<1>),
+which uses 8-12 bytes more per watcher and a few hundred bytes more code,
+but avoids random read accesses on heap changes. This noticably improves
+performance noticably with with many (hundreds) of watchers.
+
+The default is C<1> unless C<EV_MINIMAL> is set in which case it is C<0>
+(disabled).
+
=item EV_COMMON
By default, all watchers have a C<void *data> member. By redefining