epoll: make epoll_wait() use the hrtimer range feature

This make epoll use hrtimers for the timeout value which prevents
epoll_wait() from timing out up to a millisecond early.

This mirrors the behavior of select() and poll().

Signed-off-by: Shawn Bohrer <shawn.bohrer@gmail.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Acked-by: Davide Libenzi <davidel@xmailserver.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
Shawn Bohrer 2010-10-27 15:34:54 -07:00 committed by Linus Torvalds
parent 231f3d393f
commit 95aac7b1cd
3 changed files with 21 additions and 16 deletions

View File

@ -77,9 +77,6 @@
/* Maximum number of nesting allowed inside epoll sets */ /* Maximum number of nesting allowed inside epoll sets */
#define EP_MAX_NESTS 4 #define EP_MAX_NESTS 4
/* Maximum msec timeout value storeable in a long int */
#define EP_MAX_MSTIMEO min(1000ULL * MAX_SCHEDULE_TIMEOUT / HZ, (LONG_MAX - 999ULL) / HZ)
#define EP_MAX_EVENTS (INT_MAX / sizeof(struct epoll_event)) #define EP_MAX_EVENTS (INT_MAX / sizeof(struct epoll_event))
#define EP_UNACTIVE_PTR ((void *) -1L) #define EP_UNACTIVE_PTR ((void *) -1L)
@ -1117,18 +1114,22 @@ static int ep_send_events(struct eventpoll *ep,
static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
int maxevents, long timeout) int maxevents, long timeout)
{ {
int res, eavail; int res, eavail, timed_out = 0;
unsigned long flags; unsigned long flags;
long jtimeout; long slack;
wait_queue_t wait; wait_queue_t wait;
struct timespec end_time;
ktime_t expires, *to = NULL;
/* if (timeout > 0) {
* Calculate the timeout by checking for the "infinite" value (-1) ktime_get_ts(&end_time);
* and the overflow condition. The passed timeout is in milliseconds, timespec_add_ns(&end_time, (u64)timeout * NSEC_PER_MSEC);
* that why (t * HZ) / 1000. slack = select_estimate_accuracy(&end_time);
*/ to = &expires;
jtimeout = (timeout < 0 || timeout >= EP_MAX_MSTIMEO) ? *to = timespec_to_ktime(end_time);
MAX_SCHEDULE_TIMEOUT : (timeout * HZ + 999) / 1000; } else if (timeout == 0) {
timed_out = 1;
}
retry: retry:
spin_lock_irqsave(&ep->lock, flags); spin_lock_irqsave(&ep->lock, flags);
@ -1150,7 +1151,7 @@ retry:
* to TASK_INTERRUPTIBLE before doing the checks. * to TASK_INTERRUPTIBLE before doing the checks.
*/ */
set_current_state(TASK_INTERRUPTIBLE); set_current_state(TASK_INTERRUPTIBLE);
if (!list_empty(&ep->rdllist) || !jtimeout) if (!list_empty(&ep->rdllist) || timed_out)
break; break;
if (signal_pending(current)) { if (signal_pending(current)) {
res = -EINTR; res = -EINTR;
@ -1158,7 +1159,9 @@ retry:
} }
spin_unlock_irqrestore(&ep->lock, flags); spin_unlock_irqrestore(&ep->lock, flags);
jtimeout = schedule_timeout(jtimeout); if (!schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS))
timed_out = 1;
spin_lock_irqsave(&ep->lock, flags); spin_lock_irqsave(&ep->lock, flags);
} }
__remove_wait_queue(&ep->wq, &wait); __remove_wait_queue(&ep->wq, &wait);
@ -1176,7 +1179,7 @@ retry:
* more luck. * more luck.
*/ */
if (!res && eavail && if (!res && eavail &&
!(res = ep_send_events(ep, events, maxevents)) && jtimeout) !(res = ep_send_events(ep, events, maxevents)) && !timed_out)
goto retry; goto retry;
return res; return res;

View File

@ -67,7 +67,7 @@ static long __estimate_accuracy(struct timespec *tv)
return slack; return slack;
} }
static long select_estimate_accuracy(struct timespec *tv) long select_estimate_accuracy(struct timespec *tv)
{ {
unsigned long ret; unsigned long ret;
struct timespec now; struct timespec now;

View File

@ -73,6 +73,8 @@ extern void poll_initwait(struct poll_wqueues *pwq);
extern void poll_freewait(struct poll_wqueues *pwq); extern void poll_freewait(struct poll_wqueues *pwq);
extern int poll_schedule_timeout(struct poll_wqueues *pwq, int state, extern int poll_schedule_timeout(struct poll_wqueues *pwq, int state,
ktime_t *expires, unsigned long slack); ktime_t *expires, unsigned long slack);
extern long select_estimate_accuracy(struct timespec *tv);
static inline int poll_schedule(struct poll_wqueues *pwq, int state) static inline int poll_schedule(struct poll_wqueues *pwq, int state)
{ {