diff options
| author | root <root> | 2008-10-27 11:08:29 +0000 | 
|---|---|---|
| committer | root <root> | 2008-10-27 11:08:29 +0000 | 
| commit | ed3f7d82632357b7ba925cb742cd9ef61ceed26d (patch) | |
| tree | 1c03ccb4ac30a7c147b4b5be39f99b5fe1325e14 | |
| parent | 591339236dd05d5184c977fea78de5f5d82e70e1 (diff) | |
work around epoll spurious notifications
| -rw-r--r-- | Changes | 2 | ||||
| -rw-r--r-- | ev.c | 2 | ||||
| -rw-r--r-- | ev.pod | 11 | ||||
| -rw-r--r-- | ev_epoll.c | 20 | 
4 files changed, 26 insertions, 9 deletions
| @@ -5,6 +5,8 @@ WISH? monotonic clocks times/GetTickCount for coarse corrections?  	- further optimise away the EPOLL_CTL_ADD/MOD combo in the epoll            backend by assuming the kernel event mask hasn't changed if            ADD fails with EEXIST. +        - work around spurious event notification bugs in epoll by using +          an 8-bit generation counter.          - use memset to initialise most arrays now and do away with the            init functions.          - expand time-out strategies into a "Be smart about timeouts" section. @@ -452,7 +452,7 @@ typedef struct    unsigned char events;    unsigned char reify;    unsigned char emask; /* the epoll backend stores the actual kernel mask in here */ -  unsigned char unused; /* currently unused padding */ +  unsigned char egen;  /* generation counter to counter epoll bugs */  #if EV_SELECT_IS_WINSOCKET    SOCKET handle;  #endif @@ -391,16 +391,19 @@ of shortcomings, such as silently dropping events in some hard-to-detect  cases and requiring a system call per fd change, no fork support and bad  support for dup. +Epoll is also notoriously buggy - embedding epoll fds should work, but +of course doesn't, and epoll just loves to report events for totally +I<different> file descriptors (even already closed ones) than registered +in the set (especially on SMP systems). Libev tries to counter these +spurious notifications by employing an additional generation counter and +comparing that against the events to filter out spurious ones. +  While stopping, setting and starting an I/O watcher in the same iteration  will result in some caching, there is still a system call per such incident  (because the fd could point to a different file description now), so its  best to avoid that. Also, C<dup ()>'ed file descriptors might not work  very well if you register events for both fds. -Please note that epoll sometimes generates spurious notifications, so you -need to use non-blocking I/O or other means to avoid blocking when no data -(or space) is available. -  Best performance from this backend is achieved by not unregistering all  watchers for a file descriptor until it has been closed, if possible,  i.e. keep at least one watcher active per fd at all times. Stopping and @@ -85,7 +85,8 @@ epoll_modify (EV_P_ int fd, int oev, int nev)    oldmask = anfds [fd].emask;    anfds [fd].emask = nev; -  ev.data.u64 = fd; /* use u64 to fully initialise the struct, for nicer strace etc. */ +  /* store the generation counter in the upper 32 bits */ +  ev.data.u64 = fd | ((uint64_t)++anfds [fd].egen << 32);    ev.events   = (nev & EV_READ  ? EPOLLIN  : 0)                | (nev & EV_WRITE ? EPOLLOUT : 0); @@ -96,7 +97,7 @@ epoll_modify (EV_P_ int fd, int oev, int nev)      {        /* if ENOENT then the fd went away, so try to do the right thing */        if (!nev) -        return; +        goto dec_egen;        if (!epoll_ctl (backend_fd, EPOLL_CTL_ADD, fd, &ev))          return; @@ -105,11 +106,18 @@ epoll_modify (EV_P_ int fd, int oev, int nev)      {        /* EEXIST means we ignored a previous DEL, but the fd is still active */        /* if the kernel mask is the same as the new mask, we assume it hasn't changed */ -      if (oldmask == nev || !epoll_ctl (backend_fd, EPOLL_CTL_MOD, fd, &ev)) +      if (oldmask == nev) +        goto dec_egen; + +      if (!epoll_ctl (backend_fd, EPOLL_CTL_MOD, fd, &ev))          return;      }    fd_kill (EV_A_ fd); + +dec_egen: +  /* we didn't successfully call epoll_ctl, so decrement the generation counter again */ +  --anfds [fd].egen;  }  static void @@ -130,11 +138,15 @@ epoll_poll (EV_P_ ev_tstamp timeout)      {        struct epoll_event *ev = epoll_events + i; -      int fd   = ev->data.u64; +      int fd   = (uint32_t)ev->data.u64; /* mask out the lower 32 bits */        int got  = (ev->events & (EPOLLOUT | EPOLLERR | EPOLLHUP) ? EV_WRITE : 0)                 | (ev->events & (EPOLLIN  | EPOLLERR | EPOLLHUP) ? EV_READ  : 0);        int want = anfds [fd].events; +      if (anfds [fd].egen != (unsigned char)(ev->data.u64 >> 32)) +        /*fprintf (stderr, "spurious notification fd %d, %d vs %d\n", fd, (int)(ev->data.u64 >> 32), anfds [fd].egen);*/ +        continue; +        if (expect_false (got & ~want))          {            anfds [fd].emask = want; | 
