From 3bd012060f962567aadb52b27b2fc8fdc91102c7 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Mon, 8 Sep 2008 08:58:59 -0700 Subject: [PATCH 1/5] hrtimer: make the nanosleep() syscall use the per process slack This patch makes the nanosleep() system call use the per process slack value; with this users are able to externally control existing applications to reduce the wakeup rate. Signed-off-by: Arjan van de Ven --- kernel/hrtimer.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index a0222097c57e..9a4c90185566 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -1563,9 +1563,14 @@ long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp, struct restart_block *restart; struct hrtimer_sleeper t; int ret = 0; + unsigned long slack; + + slack = current->timer_slack_ns; + if (rt_task(current)) + slack = 0; hrtimer_init_on_stack(&t.timer, clockid, mode); - hrtimer_set_expires(&t.timer, timespec_to_ktime(*rqtp)); + hrtimer_set_expires_range_ns(&t.timer, timespec_to_ktime(*rqtp), slack); if (do_nanosleep(&t, mode)) goto out; From ae4b748e81b7e366f04f55229d5e372e372c33af Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Mon, 8 Sep 2008 09:03:57 -0700 Subject: [PATCH 2/5] hrtimer: make the futex() system call use the per process slack value This patch makes the futex() system call use the per process slack value; with this users are able to externally control existing applications to reduce the wakeup rate. Signed-off-by: Arjan van de Ven --- kernel/futex.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/kernel/futex.c b/kernel/futex.c index 4cd5b4319b04..8af10027514b 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -1296,10 +1296,14 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared, if (!abs_time) schedule(); else { + unsigned long slack; + slack = current->timer_slack_ns; + if (rt_task(current)) + slack = 0; hrtimer_init_on_stack(&t.timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); hrtimer_init_sleeper(&t, current); - hrtimer_set_expires(&t.timer, *abs_time); + hrtimer_set_expires_range_ns(&t.timer, *abs_time, slack); hrtimer_start_expires(&t.timer, HRTIMER_MODE_ABS); if (!hrtimer_active(&t.timer)) From 2e94d1f71f7e4404d997e6fb4f1618aa147d76f9 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Wed, 10 Sep 2008 16:06:00 -0700 Subject: [PATCH 3/5] hrtimer: peek at the timer queue just before going idle As part of going idle, we already look at the time of the next timer event to determine which C-state to select etc. This patch adds functionality that causes the timers that are past their soft expire time, to fire at this time, before we calculate the next wakeup time. This functionality will thus avoid wakeups by running timers before going idle rather than specially waking up for it. Signed-off-by: Arjan van de Ven --- drivers/cpuidle/cpuidle.c | 7 +++++++ include/linux/hrtimer.h | 5 +++++ kernel/hrtimer.c | 30 ++++++++++++++++++++++++++++++ 3 files changed, 42 insertions(+) diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index 5ce07b517c58..2e3148499368 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -16,6 +16,7 @@ #include #include #include +#include #include "cpuidle.h" @@ -60,6 +61,12 @@ static void cpuidle_idle_call(void) return; } + /* + * run any timers that can be run now, at this point + * before calculating the idle duration etc. + */ + hrtimer_peek_ahead_timers(); + /* ask the governor for the next state */ next_state = cpuidle_curr_governor->select(dev); if (need_resched()) diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 95db11f62ff2..d93b1e1dc169 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -326,6 +326,11 @@ static inline int hrtimer_is_hres_active(struct hrtimer *timer) extern ktime_t ktime_get(void); extern ktime_t ktime_get_real(void); + +DECLARE_PER_CPU(struct tick_device, tick_cpu_device); +extern void hrtimer_peek_ahead_timers(void); + + /* Exported timer functions: */ /* Initialize timers: */ diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 9a4c90185566..eb2cf984959f 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -1381,6 +1381,36 @@ void hrtimer_interrupt(struct clock_event_device *dev) raise_softirq(HRTIMER_SOFTIRQ); } +/** + * hrtimer_peek_ahead_timers -- run soft-expired timers now + * + * hrtimer_peek_ahead_timers will peek at the timer queue of + * the current cpu and check if there are any timers for which + * the soft expires time has passed. If any such timers exist, + * they are run immediately and then removed from the timer queue. + * + */ +void hrtimer_peek_ahead_timers(void) +{ + unsigned long flags; + struct tick_device *td; + struct clock_event_device *dev; + + if (hrtimer_hres_active()) + return; + + local_irq_save(flags); + td = &__get_cpu_var(tick_cpu_device); + if (!td) + goto out; + dev = td->evtdev; + if (!dev) + goto out; + hrtimer_interrupt(dev); +out: + local_irq_restore(flags); +} + static void run_hrtimer_softirq(struct softirq_action *h) { run_hrtimer_pending(&__get_cpu_var(hrtimer_bases)); From 80a4b18d19bf1f7b88a261088c00a0d6b310a722 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Mon, 6 Oct 2008 13:01:53 -0700 Subject: [PATCH 4/5] select: fix alpha OSF wrapper ... alpha calls the core select code from inside it's architecture code for emulating OSF; this patch makes it compile again Signed-off-by: Arjan van de Ven --- arch/alpha/kernel/osf_sys.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c index 8509dad31204..8e19acbf2886 100644 --- a/arch/alpha/kernel/osf_sys.c +++ b/arch/alpha/kernel/osf_sys.c @@ -986,10 +986,12 @@ asmlinkage int osf_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, struct timeval32 __user *tvp) { - s64 timeout = MAX_SCHEDULE_TIMEOUT; + struct timespec end_time, *to = NULL; if (tvp) { time_t sec, usec; + to = &end_time; + if (!access_ok(VERIFY_READ, tvp, sizeof(*tvp)) || __get_user(sec, &tvp->tv_sec) || __get_user(usec, &tvp->tv_usec)) { @@ -999,14 +1001,13 @@ osf_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, if (sec < 0 || usec < 0) return -EINVAL; - if ((unsigned long) sec < MAX_SELECT_SECONDS) { - timeout = (usec + 1000000/HZ - 1) / (1000000/HZ); - timeout += sec * (unsigned long) HZ; - } + if (poll_select_set_timeout(to, sec, usec * NSEC_PER_USEC)) + return -EINVAL; + } /* OSF does not copy back the remaining time. */ - return core_sys_select(n, inp, outp, exp, &timeout); + return core_sys_select(n, inp, outp, exp, to); } struct rusage32 { From 2075eb8d95612cadde91ef5be82691d97a2ea6c5 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Tue, 7 Oct 2008 10:57:54 -0700 Subject: [PATCH 5/5] rangetimer: fix x86 build failure for the !HRTIMERS case the timer peek function was on the wrong side of an ifdef, breaking for the !HRTIMERs case. Just provide an empty inline for that case since it doesn't make sense in that scenario. Signed-off-by: Arjan van de Ven --- include/linux/hrtimer.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index d93b1e1dc169..508ce20b8f9c 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -283,6 +283,8 @@ static inline int hrtimer_is_hres_active(struct hrtimer *timer) return timer->base->cpu_base->hres_active; } +extern void hrtimer_peek_ahead_timers(void); + /* * The resolution of the clocks. The resolution value is returned in * the clock_getres() system call to give application programmers an @@ -305,6 +307,7 @@ static inline int hrtimer_is_hres_active(struct hrtimer *timer) * is expired in the next softirq when the clock was advanced. */ static inline void clock_was_set(void) { } +static inline void hrtimer_peek_ahead_timers(void) { } static inline void hres_timers_resume(void) { } @@ -328,7 +331,6 @@ extern ktime_t ktime_get_real(void); DECLARE_PER_CPU(struct tick_device, tick_cpu_device); -extern void hrtimer_peek_ahead_timers(void); /* Exported timer functions: */