Skip to content

Commit

Permalink
poll: avoid extra wakeups in select/poll
Browse files Browse the repository at this point in the history
After introduction of keyed wakeups Davide Libenzi did on epoll, we are
able to avoid spurious wakeups in poll()/select() code too.

For example, typical use of poll()/select() is to wait for incoming
network frames on many sockets.  But TX completion for UDP/TCP frames call
sock_wfree() which in turn schedules thread.

When scheduled, thread does a full scan of all polled fds and can sleep
again, because nothing is really available.  If number of fds is large,
this cause significant load.

This patch makes select()/poll() aware of keyed wakeups and useless
wakeups are avoided.  This reduces number of context switches by about 50%
on some setups, and work performed by sofirq handlers.

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Acked-by: David S. Miller <davem@davemloft.net>
Acked-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Ingo Molnar <mingo@elte.hu>
Acked-by: Davide Libenzi <davidel@xmailserver.org>
Cc: Christoph Lameter <cl@linux-foundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
  • Loading branch information
Eric Dumazet authored and Linus Torvalds committed Jun 17, 2009
1 parent 02d5341 commit 4938d7e
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 4 deletions.
40 changes: 36 additions & 4 deletions fs/select.c
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ static struct poll_table_entry *poll_get_entry(struct poll_wqueues *p)
return table->entry++;
}

static int pollwake(wait_queue_t *wait, unsigned mode, int sync, void *key)
static int __pollwake(wait_queue_t *wait, unsigned mode, int sync, void *key)
{
struct poll_wqueues *pwq = wait->private;
DECLARE_WAITQUEUE(dummy_wait, pwq->polling_task);
Expand All @@ -194,6 +194,16 @@ static int pollwake(wait_queue_t *wait, unsigned mode, int sync, void *key)
return default_wake_function(&dummy_wait, mode, sync, key);
}

static int pollwake(wait_queue_t *wait, unsigned mode, int sync, void *key)
{
struct poll_table_entry *entry;

entry = container_of(wait, struct poll_table_entry, wait);
if (key && !((unsigned long)key & entry->key))
return 0;
return __pollwake(wait, mode, sync, key);
}

/* Add a new entry */
static void __pollwait(struct file *filp, wait_queue_head_t *wait_address,
poll_table *p)
Expand All @@ -205,6 +215,7 @@ static void __pollwait(struct file *filp, wait_queue_head_t *wait_address,
get_file(filp);
entry->filp = filp;
entry->wait_address = wait_address;
entry->key = p->key;
init_waitqueue_func_entry(&entry->wait, pollwake);
entry->wait.private = pwq;
add_wait_queue(wait_address, &entry->wait);
Expand Down Expand Up @@ -362,6 +373,18 @@ static int max_select_fd(unsigned long n, fd_set_bits *fds)
#define POLLOUT_SET (POLLWRBAND | POLLWRNORM | POLLOUT | POLLERR)
#define POLLEX_SET (POLLPRI)

static inline void wait_key_set(poll_table *wait, unsigned long in,
unsigned long out, unsigned long bit)
{
if (wait) {
wait->key = POLLEX_SET;
if (in & bit)
wait->key |= POLLIN_SET;
if (out & bit)
wait->key |= POLLOUT_SET;
}
}

int do_select(int n, fd_set_bits *fds, struct timespec *end_time)
{
ktime_t expire, *to = NULL;
Expand Down Expand Up @@ -418,20 +441,25 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time)
if (file) {
f_op = file->f_op;
mask = DEFAULT_POLLMASK;
if (f_op && f_op->poll)
mask = (*f_op->poll)(file, retval ? NULL : wait);
if (f_op && f_op->poll) {
wait_key_set(wait, in, out, bit);
mask = (*f_op->poll)(file, wait);
}
fput_light(file, fput_needed);
if ((mask & POLLIN_SET) && (in & bit)) {
res_in |= bit;
retval++;
wait = NULL;
}
if ((mask & POLLOUT_SET) && (out & bit)) {
res_out |= bit;
retval++;
wait = NULL;
}
if ((mask & POLLEX_SET) && (ex & bit)) {
res_ex |= bit;
retval++;
wait = NULL;
}
}
}
Expand Down Expand Up @@ -685,8 +713,12 @@ static inline unsigned int do_pollfd(struct pollfd *pollfd, poll_table *pwait)
mask = POLLNVAL;
if (file != NULL) {
mask = DEFAULT_POLLMASK;
if (file->f_op && file->f_op->poll)
if (file->f_op && file->f_op->poll) {
if (pwait)
pwait->key = pollfd->events |
POLLERR | POLLHUP;
mask = file->f_op->poll(file, pwait);
}
/* Mask out unneeded events. */
mask &= pollfd->events | POLLERR | POLLHUP;
fput_light(file, fput_needed);
Expand Down
3 changes: 3 additions & 0 deletions include/linux/poll.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ typedef void (*poll_queue_proc)(struct file *, wait_queue_head_t *, struct poll_

typedef struct poll_table_struct {
poll_queue_proc qproc;
unsigned long key;
} poll_table;

static inline void poll_wait(struct file * filp, wait_queue_head_t * wait_address, poll_table *p)
Expand All @@ -43,10 +44,12 @@ static inline void poll_wait(struct file * filp, wait_queue_head_t * wait_addres
static inline void init_poll_funcptr(poll_table *pt, poll_queue_proc qproc)
{
pt->qproc = qproc;
pt->key = ~0UL; /* all events enabled */
}

struct poll_table_entry {
struct file *filp;
unsigned long key;
wait_queue_t wait;
wait_queue_head_t *wait_address;
};
Expand Down

0 comments on commit 4938d7e

Please sign in to comment.