From 724a371396e8162cc883a40cd8e525dfc7e5f3ff Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 10 Oct 2013 16:55:57 +0200 Subject: [PATCH 01/10] posix-timers: Remove dead thread posix cpu timers caching When a task is exiting or has exited, its posix cpu timers don't tick anymore and won't elapse further. It's too late for them to expire. So any further call to timer_gettime() on these timers will return the same remaining expiry time. The current code optimize this by caching the remaining delta and storing it where we use to save the absolute expiration time. This way, the future calls to timer_gettime() won't need to compute the difference between the absolute expiration time and the current time anymore. Now this optimization doesn't seem to bring much value. Computing the timer remaining delta is not very costly. Fetching the timer value OTOH can be costly in two ways: * CPUCLOCK_SCHED read requires to lock the target's rq. But some optimizations are on the way to make task_sched_runtime() not holding the rq lock of a non-running target. * CPUCLOCK_VIRT/CPUCLOCK_PROF read simply consist in fetching current->utime/current->stime except when the system uses full dynticks cputime accounting. The latter requires a per task lock in order to correctly compute user and system time. But once the target is dead, this lock shouldn't be contended anyway. All in one this caching doesn't seem to be justified. Given that it complicates the code significantly for few wins, let's remove it on single thread timers. Signed-off-by: Frederic Weisbecker Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Oleg Nesterov Cc: Kosaki Motohiro Cc: Andrew Morton --- kernel/posix-cpu-timers.c | 34 ++++++++++++++++------------------ 1 file changed, 16 insertions(+), 18 deletions(-) diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index 79747b7d94205..3b7df86539138 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -788,7 +788,6 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp) { unsigned long long now; struct task_struct *p = timer->it.cpu.task; - int clear_dead; /* * Easy part: convert the reload time. @@ -802,6 +801,7 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp) } if (unlikely(p == NULL)) { + WARN_ON_ONCE(CPUCLOCK_PERTHREAD(timer->it_clock)); /* * This task already died and the timer will never fire. * In this case, expires is actually the dead value. @@ -817,7 +817,6 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp) */ if (CPUCLOCK_PERTHREAD(timer->it_clock)) { cpu_clock_sample(timer->it_clock, p, &now); - clear_dead = p->exit_state; } else { read_lock(&tasklist_lock); if (unlikely(p->sighand == NULL)) { @@ -833,22 +832,20 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp) goto dead; } else { cpu_timer_sample_group(timer->it_clock, p, &now); - clear_dead = (unlikely(p->exit_state) && - thread_group_empty(p)); + if (unlikely(p->exit_state) && thread_group_empty(p)) { + read_unlock(&tasklist_lock); + /* + * We've noticed that the thread is dead, but + * not yet reaped. Take this opportunity to + * drop our task ref. + */ + clear_dead_task(timer, now); + goto dead; + } } read_unlock(&tasklist_lock); } - if (unlikely(clear_dead)) { - /* - * We've noticed that the thread is dead, but - * not yet reaped. Take this opportunity to - * drop our task ref. - */ - clear_dead_task(timer, now); - goto dead; - } - if (now < timer->it.cpu.expires) { sample_to_timespec(timer->it_clock, timer->it.cpu.expires - now, @@ -1063,11 +1060,13 @@ void posix_cpu_timer_schedule(struct k_itimer *timer) struct task_struct *p = timer->it.cpu.task; unsigned long long now; - if (unlikely(p == NULL)) + if (unlikely(p == NULL)) { + WARN_ON_ONCE(CPUCLOCK_PERTHREAD(timer->it_clock)); /* * The task was cleaned up already, no future firings. */ goto out; + } /* * Fetch the current sample and update the timer's expiry time. @@ -1075,10 +1074,9 @@ void posix_cpu_timer_schedule(struct k_itimer *timer) if (CPUCLOCK_PERTHREAD(timer->it_clock)) { cpu_clock_sample(timer->it_clock, p, &now); bump_cpu_timer(timer, now); - if (unlikely(p->exit_state)) { - clear_dead_task(timer, now); + if (unlikely(p->exit_state)) goto out; - } + read_lock(&tasklist_lock); /* arm_timer needs it. */ spin_lock(&p->sighand->siglock); } else { From d430b9173a9a50a83e10d1c70baead3e625b522f Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 10 Oct 2013 16:55:57 +0200 Subject: [PATCH 02/10] posix-timers: Remove dead process posix cpu timers caching Now that we removed dead thread posix cpu timers caching, lets remove the dead process wide version. This caching is similar to the per thread version but it should be even more rare: * If the process id dead, we are not reading its timers status from a thread belonging to its group since they are all dead. So this caching only concern remote process timers reads. Now posix cpu timers using itimers or timer_settime() can't do remote process timers anyway so it's not even clear if there is actually a user for this caching. * Unlike per thread timers caching, this only applies to zombies targets. Buried targets' process wide timers return 0 values. But then again, timer_gettime() can't read remote process timers, so if the process is dead, there can't be any reader left anyway. Then again this caching seem to complicate the code for corner cases that are probably not worth it. So lets get rid of it. Also remove the sample snapshot on dying process timer that is now useless, as suggested by Kosaki. Signed-off-by: Frederic Weisbecker Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Oleg Nesterov Cc: Kosaki Motohiro Cc: Andrew Morton --- kernel/posix-cpu-timers.c | 35 +---------------------------------- 1 file changed, 1 insertion(+), 34 deletions(-) diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index 3b7df86539138..c5d1ef5302682 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -453,23 +453,6 @@ void posix_cpu_timers_exit_group(struct task_struct *tsk) tsk->se.sum_exec_runtime + sig->sum_sched_runtime); } -static void clear_dead_task(struct k_itimer *itimer, unsigned long long now) -{ - struct cpu_timer_list *timer = &itimer->it.cpu; - - /* - * That's all for this thread or process. - * We leave our residual in expires to be reported. - */ - put_task_struct(timer->task); - timer->task = NULL; - if (timer->expires < now) { - timer->expires = 0; - } else { - timer->expires -= now; - } -} - static inline int expires_gt(cputime_t expires, cputime_t new_exp) { return expires == 0 || expires > new_exp; @@ -832,16 +815,6 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp) goto dead; } else { cpu_timer_sample_group(timer->it_clock, p, &now); - if (unlikely(p->exit_state) && thread_group_empty(p)) { - read_unlock(&tasklist_lock); - /* - * We've noticed that the thread is dead, but - * not yet reaped. Take this opportunity to - * drop our task ref. - */ - clear_dead_task(timer, now); - goto dead; - } } read_unlock(&tasklist_lock); } @@ -1092,14 +1065,8 @@ void posix_cpu_timer_schedule(struct k_itimer *timer) read_unlock(&tasklist_lock); goto out; } else if (unlikely(p->exit_state) && thread_group_empty(p)) { - /* - * We've noticed that the thread is dead, but - * not yet reaped. Take this opportunity to - * drop our task ref. - */ - cpu_timer_sample_group(timer->it_clock, p, &now); - clear_dead_task(timer, now); read_unlock(&tasklist_lock); + /* Optimizations: if the process is dying, no need to rearm */ goto out; } spin_lock(&p->sighand->siglock); From e26d70d271ee1a68a925796b411cb0239394c7a1 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 11 Oct 2013 00:27:19 +0200 Subject: [PATCH 03/10] posix-timers: Cleanup reaped target handling When a timer's target is seen to be buried, for example on calls to timer_gettime(), the posix cpu timers code behaves a bit like a garbage collector and releases early the reference to the task. Then again, this optimization complicates the code for no much value: it's up to the user to release the timer and its associated ressources by calling timer_delete() after it buries the target tasks. Remove this to simplify the code. Signed-off-by: Frederic Weisbecker Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Oleg Nesterov Cc: Kosaki Motohiro Cc: Andrew Morton --- kernel/posix-cpu-timers.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index c5d1ef5302682..dc4355b967dbd 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -639,8 +639,6 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int flags, */ if (unlikely(p->sighand == NULL)) { read_unlock(&tasklist_lock); - put_task_struct(p); - timer->it.cpu.task = NULL; return -ESRCH; } @@ -808,8 +806,6 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp) * We can't even collect a sample any more. * Call the timer disarmed, nothing else to do. */ - put_task_struct(p); - timer->it.cpu.task = NULL; timer->it.cpu.expires = 0; read_unlock(&tasklist_lock); goto dead; @@ -1059,8 +1055,6 @@ void posix_cpu_timer_schedule(struct k_itimer *timer) * The process has been reaped. * We can't even collect a sample any more. */ - put_task_struct(p); - timer->it.cpu.task = p = NULL; timer->it.cpu.expires = 0; read_unlock(&tasklist_lock); goto out; From a3222f88fa4f2ebec4632aef527dd2c9a41b997d Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 11 Oct 2013 00:37:39 +0200 Subject: [PATCH 04/10] posix-timers: Remove dead task special case Now that we've removed all the optimizations that could result in NULL timer's targets, we can remove all the associated special case handling. Also add some warnings on NULL targets to spot any possible leftover. Signed-off-by: Frederic Weisbecker Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Oleg Nesterov Cc: Kosaki Motohiro Cc: Andrew Morton --- kernel/posix-cpu-timers.c | 70 ++++++++++++++------------------------- 1 file changed, 25 insertions(+), 45 deletions(-) diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index dc4355b967dbd..ab9911b54fafe 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -374,27 +374,27 @@ static int posix_cpu_timer_del(struct k_itimer *timer) struct task_struct *p = timer->it.cpu.task; int ret = 0; - if (likely(p != NULL)) { - read_lock(&tasklist_lock); - if (unlikely(p->sighand == NULL)) { - /* - * We raced with the reaping of the task. - * The deletion should have cleared us off the list. - */ - BUG_ON(!list_empty(&timer->it.cpu.entry)); - } else { - spin_lock(&p->sighand->siglock); - if (timer->it.cpu.firing) - ret = TIMER_RETRY; - else - list_del(&timer->it.cpu.entry); - spin_unlock(&p->sighand->siglock); - } - read_unlock(&tasklist_lock); + WARN_ON_ONCE(p == NULL); - if (!ret) - put_task_struct(p); + read_lock(&tasklist_lock); + if (unlikely(p->sighand == NULL)) { + /* + * We raced with the reaping of the task. + * The deletion should have cleared us off the list. + */ + BUG_ON(!list_empty(&timer->it.cpu.entry)); + } else { + spin_lock(&p->sighand->siglock); + if (timer->it.cpu.firing) + ret = TIMER_RETRY; + else + list_del(&timer->it.cpu.entry); + spin_unlock(&p->sighand->siglock); } + read_unlock(&tasklist_lock); + + if (!ret) + put_task_struct(p); return ret; } @@ -622,12 +622,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int flags, unsigned long long old_expires, new_expires, old_incr, val; int ret; - if (unlikely(p == NULL)) { - /* - * Timer refers to a dead task's clock. - */ - return -ESRCH; - } + WARN_ON_ONCE(p == NULL); new_expires = timespec_to_sample(timer->it_clock, &new->it_value); @@ -770,6 +765,8 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp) unsigned long long now; struct task_struct *p = timer->it.cpu.task; + WARN_ON_ONCE(p == NULL); + /* * Easy part: convert the reload time. */ @@ -781,18 +778,6 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp) return; } - if (unlikely(p == NULL)) { - WARN_ON_ONCE(CPUCLOCK_PERTHREAD(timer->it_clock)); - /* - * This task already died and the timer will never fire. - * In this case, expires is actually the dead value. - */ - dead: - sample_to_timespec(timer->it_clock, timer->it.cpu.expires, - &itp->it_value); - return; - } - /* * Sample the clock to take the difference with the expiry time. */ @@ -807,8 +792,9 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp) * Call the timer disarmed, nothing else to do. */ timer->it.cpu.expires = 0; + sample_to_timespec(timer->it_clock, timer->it.cpu.expires, + &itp->it_value); read_unlock(&tasklist_lock); - goto dead; } else { cpu_timer_sample_group(timer->it_clock, p, &now); } @@ -1029,13 +1015,7 @@ void posix_cpu_timer_schedule(struct k_itimer *timer) struct task_struct *p = timer->it.cpu.task; unsigned long long now; - if (unlikely(p == NULL)) { - WARN_ON_ONCE(CPUCLOCK_PERTHREAD(timer->it_clock)); - /* - * The task was cleaned up already, no future firings. - */ - goto out; - } + WARN_ON_ONCE(p == NULL); /* * Fetch the current sample and update the timer's expiry time. From af82eb3c3068877a6b1989796a06b846b1e9e1c3 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 11 Oct 2013 16:11:43 +0200 Subject: [PATCH 05/10] posix-timers: Remove useless clock sample on timers cleanup a0b2062b0904ef07944c4a6e4d0f88ee44f1e9f2 ("posix_timers: fix racy timer delta caching on task exit") forgot to remove the arguments used for timer caching. Fix this leftover. Signed-off-by: Frederic Weisbecker Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Oleg Nesterov Cc: Kosaki Motohiro Cc: Andrew Morton --- kernel/posix-cpu-timers.c | 30 +++++++----------------------- 1 file changed, 7 insertions(+), 23 deletions(-) diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index ab9911b54fafe..e6389f915bcb3 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -399,8 +399,7 @@ static int posix_cpu_timer_del(struct k_itimer *timer) return ret; } -static void cleanup_timers_list(struct list_head *head, - unsigned long long curr) +static void cleanup_timers_list(struct list_head *head) { struct cpu_timer_list *timer, *next; @@ -414,16 +413,11 @@ static void cleanup_timers_list(struct list_head *head, * time for later timer_gettime calls to return. * This must be called with the siglock held. */ -static void cleanup_timers(struct list_head *head, - cputime_t utime, cputime_t stime, - unsigned long long sum_exec_runtime) +static void cleanup_timers(struct list_head *head) { - - cputime_t ptime = utime + stime; - - cleanup_timers_list(head, cputime_to_expires(ptime)); - cleanup_timers_list(++head, cputime_to_expires(utime)); - cleanup_timers_list(++head, sum_exec_runtime); + cleanup_timers_list(head); + cleanup_timers_list(++head); + cleanup_timers_list(++head); } /* @@ -433,24 +427,14 @@ static void cleanup_timers(struct list_head *head, */ void posix_cpu_timers_exit(struct task_struct *tsk) { - cputime_t utime, stime; - add_device_randomness((const void*) &tsk->se.sum_exec_runtime, sizeof(unsigned long long)); - task_cputime(tsk, &utime, &stime); - cleanup_timers(tsk->cpu_timers, - utime, stime, tsk->se.sum_exec_runtime); + cleanup_timers(tsk->cpu_timers); } void posix_cpu_timers_exit_group(struct task_struct *tsk) { - struct signal_struct *const sig = tsk->signal; - cputime_t utime, stime; - - task_cputime(tsk, &utime, &stime); - cleanup_timers(tsk->signal->cpu_timers, - utime + sig->utime, stime + sig->stime, - tsk->se.sum_exec_runtime + sig->sum_sched_runtime); + cleanup_timers(tsk->signal->cpu_timers); } static inline int expires_gt(cputime_t expires, cputime_t new_exp) From 33ab0fec33527e8b5ab124cff6aefd4746508e04 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 11 Oct 2013 17:41:11 +0200 Subject: [PATCH 06/10] posix-timers: Consolidate posix_cpu_clock_get() Consolidate the clock sampling common code used for both local and remote targets. Note that this introduces a tiny user ABI change: if a PID is passed to clock_gettime() along the clockid, we used to forbid a process wide clock sample when that PID doesn't belong to a group leader. Now after this patch we allow process wide clock samples if that PID belongs to the current task, even if the current task is not the group leader. But local process wide clock samples are allowed if PID == 0 (current task) even if the current task is not the group leader. So in the end this should be no big deal as this actually harmonize the behaviour when the remote sample is actually a local one. Signed-off-by: Frederic Weisbecker Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Oleg Nesterov Cc: Kosaki Motohiro Cc: Andrew Morton --- kernel/posix-cpu-timers.c | 64 ++++++++++++++++++--------------------- 1 file changed, 30 insertions(+), 34 deletions(-) diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index e6389f915bcb3..03c5d6c3e614d 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -260,30 +260,43 @@ static int cpu_clock_sample_group(const clockid_t which_clock, return 0; } +static int posix_cpu_clock_get_task(struct task_struct *tsk, + const clockid_t which_clock, + struct timespec *tp) +{ + int err = -EINVAL; + unsigned long long rtn; + + if (CPUCLOCK_PERTHREAD(which_clock)) { + if (same_thread_group(tsk, current)) + err = cpu_clock_sample(which_clock, tsk, &rtn); + } else { + read_lock(&tasklist_lock); + + if (tsk->sighand && (tsk == current || thread_group_leader(tsk))) + err = cpu_clock_sample_group(which_clock, tsk, &rtn); + + read_unlock(&tasklist_lock); + } + + if (!err) + sample_to_timespec(which_clock, rtn, tp); + + return err; +} + static int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *tp) { const pid_t pid = CPUCLOCK_PID(which_clock); - int error = -EINVAL; - unsigned long long rtn; + int err = -EINVAL; if (pid == 0) { /* * Special case constant value for our own clocks. * We don't have to do any lookup to find ourselves. */ - if (CPUCLOCK_PERTHREAD(which_clock)) { - /* - * Sampling just ourselves we can do with no locking. - */ - error = cpu_clock_sample(which_clock, - current, &rtn); - } else { - read_lock(&tasklist_lock); - error = cpu_clock_sample_group(which_clock, - current, &rtn); - read_unlock(&tasklist_lock); - } + err = posix_cpu_clock_get_task(current, which_clock, tp); } else { /* * Find the given PID, and validate that the caller @@ -292,29 +305,12 @@ static int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *tp) struct task_struct *p; rcu_read_lock(); p = find_task_by_vpid(pid); - if (p) { - if (CPUCLOCK_PERTHREAD(which_clock)) { - if (same_thread_group(p, current)) { - error = cpu_clock_sample(which_clock, - p, &rtn); - } - } else { - read_lock(&tasklist_lock); - if (thread_group_leader(p) && p->sighand) { - error = - cpu_clock_sample_group(which_clock, - p, &rtn); - } - read_unlock(&tasklist_lock); - } - } + if (p) + err = posix_cpu_clock_get_task(p, which_clock, tp); rcu_read_unlock(); } - if (error) - return error; - sample_to_timespec(which_clock, rtn, tp); - return 0; + return err; } From 50875788a1d4a3f662a27ed13cd05282d835939a Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 11 Oct 2013 17:41:11 +0200 Subject: [PATCH 07/10] posix-timers: Use sighand lock instead of tasklist_lock for task clock sample There is no need for the tasklist_lock just to take a process wide clock sample. All we need is to get a coherent sample that doesn't race with exit() and exec(): * exit() may be concurrently reaping a task and flushing its time * sighand is unstable under exit() and exec(), and the latter also result in group leader that can change To protect against these, locking the target's sighand is enough. Signed-off-by: Frederic Weisbecker Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Oleg Nesterov Cc: Kosaki Motohiro Cc: Andrew Morton --- kernel/posix-cpu-timers.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index 03c5d6c3e614d..71a07699a36bd 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -271,12 +271,22 @@ static int posix_cpu_clock_get_task(struct task_struct *tsk, if (same_thread_group(tsk, current)) err = cpu_clock_sample(which_clock, tsk, &rtn); } else { - read_lock(&tasklist_lock); + unsigned long flags; + struct sighand_struct *sighand; - if (tsk->sighand && (tsk == current || thread_group_leader(tsk))) + /* + * while_each_thread() is not yet entirely RCU safe, + * keep locking the group while sampling process + * clock for now. + */ + sighand = lock_task_sighand(tsk, &flags); + if (!sighand) + return err; + + if (tsk == current || thread_group_leader(tsk)) err = cpu_clock_sample_group(which_clock, tsk, &rtn); - read_unlock(&tasklist_lock); + unlock_task_sighand(tsk, &flags); } if (!err) From 3d7a1427e4ce545e949e9bccb75d0ca8d941d93c Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 11 Oct 2013 17:41:11 +0200 Subject: [PATCH 08/10] posix-timers: Use sighand lock instead of tasklist_lock on timer deletion Timer deletion doesn't need the tasklist lock. We need to protect against: * concurrent access to the lists p->cputime_expires and p->sighand->cputime_expires * task reaping that may also delete the timer list entry * timer firing We already hold the timer lock which protects us against concurrent timer firing. The rest only need the targets sighand to be locked. So hold it and drop the use of tasklist_lock there. Signed-off-by: Frederic Weisbecker Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Oleg Nesterov Cc: Kosaki Motohiro Cc: Andrew Morton --- kernel/posix-cpu-timers.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index 71a07699a36bd..9641958ddb3ee 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -377,27 +377,32 @@ static int posix_cpu_timer_create(struct k_itimer *new_timer) */ static int posix_cpu_timer_del(struct k_itimer *timer) { - struct task_struct *p = timer->it.cpu.task; int ret = 0; + unsigned long flags; + struct sighand_struct *sighand; + struct task_struct *p = timer->it.cpu.task; WARN_ON_ONCE(p == NULL); - read_lock(&tasklist_lock); - if (unlikely(p->sighand == NULL)) { + /* + * Protect against sighand release/switch in exit/exec and process/ + * thread timer list entry concurrent read/writes. + */ + sighand = lock_task_sighand(p, &flags); + if (unlikely(sighand == NULL)) { /* * We raced with the reaping of the task. * The deletion should have cleared us off the list. */ BUG_ON(!list_empty(&timer->it.cpu.entry)); } else { - spin_lock(&p->sighand->siglock); if (timer->it.cpu.firing) ret = TIMER_RETRY; else list_del(&timer->it.cpu.entry); - spin_unlock(&p->sighand->siglock); + + unlock_task_sighand(p, &flags); } - read_unlock(&tasklist_lock); if (!ret) put_task_struct(p); From e73d84e33f15c099ed1df60437700093cb14e46e Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 11 Oct 2013 18:56:49 +0200 Subject: [PATCH 09/10] posix-timers: Remove remaining uses of tasklist_lock The remaining uses of tasklist_lock were mostly about synchronizing against sighand modifications, getting coherent and safe group samples and also thread/process wide timers list handling. All of this is already safely synchronizable with the target's sighand lock. Let's use it on these places instead. Also update the comments about locking. Signed-off-by: Frederic Weisbecker Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Oleg Nesterov Cc: Kosaki Motohiro Cc: Andrew Morton --- kernel/posix-cpu-timers.c | 76 ++++++++++++++++++++++----------------- 1 file changed, 44 insertions(+), 32 deletions(-) diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index 9641958ddb3ee..d9dc5edc318c3 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -233,7 +233,8 @@ void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times) /* * Sample a process (thread group) clock for the given group_leader task. - * Must be called with tasklist_lock held for reading. + * Must be called with task sighand lock held for safe while_each_thread() + * traversal. */ static int cpu_clock_sample_group(const clockid_t which_clock, struct task_struct *p, @@ -455,8 +456,7 @@ static inline int expires_gt(cputime_t expires, cputime_t new_exp) /* * Insert the timer on the appropriate list before any timers that - * expire later. This must be called with the tasklist_lock held - * for reading, interrupts disabled and p->sighand->siglock taken. + * expire later. This must be called with the sighand lock held. */ static void arm_timer(struct k_itimer *timer) { @@ -547,7 +547,8 @@ static void cpu_timer_fire(struct k_itimer *timer) /* * Sample a process (thread group) timer for the given group_leader task. - * Must be called with tasklist_lock held for reading. + * Must be called with task sighand lock held for safe while_each_thread() + * traversal. */ static int cpu_timer_sample_group(const clockid_t which_clock, struct task_struct *p, @@ -610,9 +611,11 @@ static inline void posix_cpu_timer_kick_nohz(void) { } * If we return TIMER_RETRY, it's necessary to release the timer's lock * and try again. (This happens when the timer is in the middle of firing.) */ -static int posix_cpu_timer_set(struct k_itimer *timer, int flags, +static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags, struct itimerspec *new, struct itimerspec *old) { + unsigned long flags; + struct sighand_struct *sighand; struct task_struct *p = timer->it.cpu.task; unsigned long long old_expires, new_expires, old_incr, val; int ret; @@ -621,14 +624,16 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int flags, new_expires = timespec_to_sample(timer->it_clock, &new->it_value); - read_lock(&tasklist_lock); /* - * We need the tasklist_lock to protect against reaping that - * clears p->sighand. If p has just been reaped, we can no + * Protect against sighand release/switch in exit/exec and p->cpu_timers + * and p->signal->cpu_timers read/write in arm_timer() + */ + sighand = lock_task_sighand(p, &flags); + /* + * If p has just been reaped, we can no * longer get any information about it at all. */ - if (unlikely(p->sighand == NULL)) { - read_unlock(&tasklist_lock); + if (unlikely(sighand == NULL)) { return -ESRCH; } @@ -639,7 +644,6 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int flags, ret = 0; old_incr = timer->it.cpu.incr; - spin_lock(&p->sighand->siglock); old_expires = timer->it.cpu.expires; if (unlikely(timer->it.cpu.firing)) { timer->it.cpu.firing = -1; @@ -696,12 +700,11 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int flags, * disable this firing since we are already reporting * it as an overrun (thanks to bump_cpu_timer above). */ - spin_unlock(&p->sighand->siglock); - read_unlock(&tasklist_lock); + unlock_task_sighand(p, &flags); goto out; } - if (new_expires != 0 && !(flags & TIMER_ABSTIME)) { + if (new_expires != 0 && !(timer_flags & TIMER_ABSTIME)) { new_expires += val; } @@ -715,9 +718,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int flags, arm_timer(timer); } - spin_unlock(&p->sighand->siglock); - read_unlock(&tasklist_lock); - + unlock_task_sighand(p, &flags); /* * Install the new reload setting, and * set up the signal and overrun bookkeeping. @@ -779,8 +780,16 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp) if (CPUCLOCK_PERTHREAD(timer->it_clock)) { cpu_clock_sample(timer->it_clock, p, &now); } else { - read_lock(&tasklist_lock); - if (unlikely(p->sighand == NULL)) { + struct sighand_struct *sighand; + unsigned long flags; + + /* + * Protect against sighand release/switch in exit/exec and + * also make timer sampling safe if it ends up calling + * thread_group_cputime(). + */ + sighand = lock_task_sighand(p, &flags); + if (unlikely(sighand == NULL)) { /* * The process has been reaped. * We can't even collect a sample any more. @@ -789,11 +798,10 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp) timer->it.cpu.expires = 0; sample_to_timespec(timer->it_clock, timer->it.cpu.expires, &itp->it_value); - read_unlock(&tasklist_lock); } else { cpu_timer_sample_group(timer->it_clock, p, &now); + unlock_task_sighand(p, &flags); } - read_unlock(&tasklist_lock); } if (now < timer->it.cpu.expires) { @@ -1007,6 +1015,8 @@ static void check_process_timers(struct task_struct *tsk, */ void posix_cpu_timer_schedule(struct k_itimer *timer) { + struct sighand_struct *sighand; + unsigned long flags; struct task_struct *p = timer->it.cpu.task; unsigned long long now; @@ -1021,27 +1031,31 @@ void posix_cpu_timer_schedule(struct k_itimer *timer) if (unlikely(p->exit_state)) goto out; - read_lock(&tasklist_lock); /* arm_timer needs it. */ - spin_lock(&p->sighand->siglock); + /* Protect timer list r/w in arm_timer() */ + sighand = lock_task_sighand(p, &flags); + if (!sighand) + goto out; } else { - read_lock(&tasklist_lock); - if (unlikely(p->sighand == NULL)) { + /* + * Protect arm_timer() and timer sampling in case of call to + * thread_group_cputime(). + */ + sighand = lock_task_sighand(p, &flags); + if (unlikely(sighand == NULL)) { /* * The process has been reaped. * We can't even collect a sample any more. */ timer->it.cpu.expires = 0; - read_unlock(&tasklist_lock); goto out; } else if (unlikely(p->exit_state) && thread_group_empty(p)) { - read_unlock(&tasklist_lock); + unlock_task_sighand(p, &flags); /* Optimizations: if the process is dying, no need to rearm */ goto out; } - spin_lock(&p->sighand->siglock); cpu_timer_sample_group(timer->it_clock, p, &now); bump_cpu_timer(timer, now); - /* Leave the tasklist_lock locked for the call below. */ + /* Leave the sighand locked for the call below. */ } /* @@ -1049,12 +1063,10 @@ void posix_cpu_timer_schedule(struct k_itimer *timer) */ BUG_ON(!irqs_disabled()); arm_timer(timer); - spin_unlock(&p->sighand->siglock); - read_unlock(&tasklist_lock); + unlock_task_sighand(p, &flags); /* Kick full dynticks CPUs in case they need to tick on the new timer */ posix_cpu_timer_kick_nohz(); - out: timer->it_overrun_last = timer->it_overrun; timer->it_overrun = -1; From 531f64fd6f46a3f2a3edb1b97ecc827c775932c5 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 11 Oct 2013 17:58:08 +0200 Subject: [PATCH 10/10] posix-timers: Convert abuses of BUG_ON to WARN_ON The posix cpu timers code makes a heavy use of BUG_ON() but none of these concern fatal issues that require to stop the machine. So let's just warn the user when some internal state slips out of our hands. Signed-off-by: Frederic Weisbecker Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Oleg Nesterov Cc: Kosaki Motohiro Cc: Andrew Morton --- kernel/posix-cpu-timers.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index d9dc5edc318c3..3b8946416a5f8 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -395,7 +395,7 @@ static int posix_cpu_timer_del(struct k_itimer *timer) * We raced with the reaping of the task. * The deletion should have cleared us off the list. */ - BUG_ON(!list_empty(&timer->it.cpu.entry)); + WARN_ON_ONCE(!list_empty(&timer->it.cpu.entry)); } else { if (timer->it.cpu.firing) ret = TIMER_RETRY; @@ -640,7 +640,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags, /* * Disarm any old timer after extracting its expiry time. */ - BUG_ON(!irqs_disabled()); + WARN_ON_ONCE(!irqs_disabled()); ret = 0; old_incr = timer->it.cpu.incr; @@ -1061,7 +1061,7 @@ void posix_cpu_timer_schedule(struct k_itimer *timer) /* * Now re-arm for the new expiry time. */ - BUG_ON(!irqs_disabled()); + WARN_ON_ONCE(!irqs_disabled()); arm_timer(timer); unlock_task_sighand(p, &flags); @@ -1150,7 +1150,7 @@ void run_posix_cpu_timers(struct task_struct *tsk) struct k_itimer *timer, *next; unsigned long flags; - BUG_ON(!irqs_disabled()); + WARN_ON_ONCE(!irqs_disabled()); /* * The fast path checks that there are no expired thread or thread @@ -1217,7 +1217,7 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx, { unsigned long long now; - BUG_ON(clock_idx == CPUCLOCK_SCHED); + WARN_ON_ONCE(clock_idx == CPUCLOCK_SCHED); cpu_timer_sample_group(clock_idx, tsk, &now); if (oldval) {