Skip to content

Commit

Permalink
sched,dl: Fix sched class hopping CBS hole
Browse files Browse the repository at this point in the history
We still have a few pending issues with the deadline code, one of which
is that switching between scheduling classes can 'leak' CBS state.

Close the hole by retaining the current CBS state when leaving
SCHED_DEADLINE and unconditionally programming the deadline timer.
The timer will then reset the CBS state if the task is still
!SCHED_DEADLINE by the time it hits.

If the task left SCHED_DEADLINE it will not call task_dead_dl() and
we'll not cancel the hrtimer, leaving us a pending timer in free
space. Avoid this by giving the timer a task reference, this avoids
littering the task exit path for this rather uncommon case.

In order to do this, I had to move dl_task_offline_migration() below
the replenishment, such that the task_rq()->lock fully covers that.
While doing this, I noticed that it (was) buggy in assuming a task is
enqueued and or we need to enqueue the task now. Fixing this means
select_task_rq_dl() might encounter an offline rq -- look into that.

As a result this kills cancel_dl_timer() which included a rq->lock
break.

Fixes: 40767b0 ("sched/deadline: Fix deadline parameter modification handling")
Cc: Wanpeng Li <wanpeng.li@linux.intel.com>
Cc: Luca Abeni <luca.abeni@unitn.it>
Cc: Juri Lelli <juri.lelli@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: ktkhai@parallels.com
Cc: rostedt@goodmis.org
Cc: juri.lelli@gmail.com
Cc: pang.xunlei@linaro.org
Cc: oleg@redhat.com
Cc: wanpeng.li@linux.intel.com
Cc: Luca Abeni <luca.abeni@unitn.it>
Cc: Juri Lelli <juri.lelli@arm.com>
Cc: umgwanakikbuti@gmail.com
Link: http://lkml.kernel.org/r/20150611124743.574192138@infradead.org
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
  • Loading branch information
Peter Zijlstra authored and Thomas Gleixner committed Jun 18, 2015
1 parent 9916e21 commit a649f23
Showing 1 changed file with 86 additions and 66 deletions.
152 changes: 86 additions & 66 deletions kernel/sched/deadline.c
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,7 @@ static inline void queue_pull_task(struct rq *rq)

static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq);

static void dl_task_offline_migration(struct rq *rq, struct task_struct *p)
static struct rq *dl_task_offline_migration(struct rq *rq, struct task_struct *p)
{
struct rq *later_rq = NULL;
bool fallback = false;
Expand Down Expand Up @@ -268,14 +268,19 @@ static void dl_task_offline_migration(struct rq *rq, struct task_struct *p)
double_lock_balance(rq, later_rq);
}

/*
* By now the task is replenished and enqueued; migrate it.
*/
deactivate_task(rq, p, 0);
set_task_cpu(p, later_rq->cpu);
activate_task(later_rq, p, ENQUEUE_REPLENISH);
activate_task(later_rq, p, 0);

if (!fallback)
resched_curr(later_rq);

double_unlock_balance(rq, later_rq);
double_unlock_balance(later_rq, rq);

return later_rq;
}

#else
Expand Down Expand Up @@ -515,22 +520,23 @@ static void update_dl_entity(struct sched_dl_entity *dl_se,
* actually started or not (i.e., the replenishment instant is in
* the future or in the past).
*/
static int start_dl_timer(struct sched_dl_entity *dl_se, bool boosted)
static int start_dl_timer(struct task_struct *p)
{
struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
struct rq *rq = rq_of_dl_rq(dl_rq);
struct sched_dl_entity *dl_se = &p->dl;
struct hrtimer *timer = &dl_se->dl_timer;
struct rq *rq = task_rq(p);
ktime_t now, act;
s64 delta;

if (boosted)
return 0;
lockdep_assert_held(&rq->lock);

/*
* We want the timer to fire at the deadline, but considering
* that it is actually coming from rq->clock and not from
* hrtimer's time base reading.
*/
act = ns_to_ktime(dl_se->deadline);
now = hrtimer_cb_get_time(&dl_se->dl_timer);
now = hrtimer_cb_get_time(timer);
delta = ktime_to_ns(now) - rq_clock(rq);
act = ktime_add_ns(act, delta);

Expand All @@ -542,7 +548,19 @@ static int start_dl_timer(struct sched_dl_entity *dl_se, bool boosted)
if (ktime_us_delta(act, now) < 0)
return 0;

hrtimer_start(&dl_se->dl_timer, act, HRTIMER_MODE_ABS);
/*
* !enqueued will guarantee another callback; even if one is already in
* progress. This ensures a balanced {get,put}_task_struct().
*
* The race against __run_timer() clearing the enqueued state is
* harmless because we're holding task_rq()->lock, therefore the timer
* expiring after we've done the check will wait on its task_rq_lock()
* and observe our state.
*/
if (!hrtimer_is_queued(timer)) {
get_task_struct(p);
hrtimer_start(timer, act, HRTIMER_MODE_ABS);
}

return 1;
}
Expand Down Expand Up @@ -572,35 +590,40 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
rq = task_rq_lock(p, &flags);

/*
* We need to take care of several possible races here:
*
* - the task might have changed its scheduling policy
* to something different than SCHED_DEADLINE
* - the task might have changed its reservation parameters
* (through sched_setattr())
* - the task might have been boosted by someone else and
* might be in the boosting/deboosting path
* The task might have changed its scheduling policy to something
* different than SCHED_DEADLINE (through switched_fromd_dl()).
*/
if (!dl_task(p)) {
__dl_clear_params(p);
goto unlock;
}

/*
* This is possible if switched_from_dl() raced against a running
* callback that took the above !dl_task() path and we've since then
* switched back into SCHED_DEADLINE.
*
* In all this cases we bail out, as the task is already
* in the runqueue or is going to be enqueued back anyway.
* There's nothing to do except drop our task reference.
*/
if (!dl_task(p) || dl_se->dl_new ||
dl_se->dl_boosted || !dl_se->dl_throttled)
if (dl_se->dl_new)
goto unlock;

sched_clock_tick();
update_rq_clock(rq);
/*
* The task might have been boosted by someone else and might be in the
* boosting/deboosting path, its not throttled.
*/
if (dl_se->dl_boosted)
goto unlock;

#ifdef CONFIG_SMP
/*
* If we find that the rq the task was on is no longer
* available, we need to select a new rq.
* Spurious timer due to start_dl_timer() race; or we already received
* a replenishment from rt_mutex_setprio().
*/
if (unlikely(!rq->online)) {
dl_task_offline_migration(rq, p);
if (!dl_se->dl_throttled)
goto unlock;
}
#endif

sched_clock_tick();
update_rq_clock(rq);

/*
* If the throttle happened during sched-out; like:
Expand All @@ -626,17 +649,38 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
check_preempt_curr_dl(rq, p, 0);
else
resched_curr(rq);

#ifdef CONFIG_SMP
/*
* Queueing this task back might have overloaded rq,
* check if we need to kick someone away.
* Perform balancing operations here; after the replenishments. We
* cannot drop rq->lock before this, otherwise the assertion in
* start_dl_timer() about not missing updates is not true.
*
* If we find that the rq the task was on is no longer available, we
* need to select a new rq.
*
* XXX figure out if select_task_rq_dl() deals with offline cpus.
*/
if (unlikely(!rq->online))
rq = dl_task_offline_migration(rq, p);

/*
* Queueing this task back might have overloaded rq, check if we need
* to kick someone away.
*/
if (has_pushable_dl_tasks(rq))
push_dl_task(rq);
#endif

unlock:
task_rq_unlock(rq, p, &flags);

/*
* This can free the task_struct, including this hrtimer, do not touch
* anything related to that after this.
*/
put_task_struct(p);

return HRTIMER_NORESTART;
}

Expand Down Expand Up @@ -696,7 +740,7 @@ static void update_curr_dl(struct rq *rq)
if (dl_runtime_exceeded(rq, dl_se)) {
dl_se->dl_throttled = 1;
__dequeue_task_dl(rq, curr, 0);
if (unlikely(!start_dl_timer(dl_se, curr->dl.dl_boosted)))
if (unlikely(dl_se->dl_boosted || !start_dl_timer(curr)))
enqueue_task_dl(rq, curr, ENQUEUE_REPLENISH);

if (!is_leftmost(curr, &rq->dl))
Expand Down Expand Up @@ -1178,7 +1222,6 @@ static void task_fork_dl(struct task_struct *p)

static void task_dead_dl(struct task_struct *p)
{
struct hrtimer *timer = &p->dl.dl_timer;
struct dl_bw *dl_b = dl_bw_of(task_cpu(p));

/*
Expand All @@ -1188,8 +1231,6 @@ static void task_dead_dl(struct task_struct *p)
/* XXX we should retain the bw until 0-lag */
dl_b->total_bw -= p->dl.dl_bw;
raw_spin_unlock_irq(&dl_b->lock);

hrtimer_cancel(timer);
}

static void set_curr_task_dl(struct rq *rq)
Expand Down Expand Up @@ -1674,37 +1715,16 @@ void init_sched_dl_class(void)

#endif /* CONFIG_SMP */

/*
* Ensure p's dl_timer is cancelled. May drop rq->lock for a while.
*/
static void cancel_dl_timer(struct rq *rq, struct task_struct *p)
{
struct hrtimer *dl_timer = &p->dl.dl_timer;

/* Nobody will change task's class if pi_lock is held */
lockdep_assert_held(&p->pi_lock);

if (hrtimer_active(dl_timer)) {
int ret = hrtimer_try_to_cancel(dl_timer);

if (unlikely(ret == -1)) {
/*
* Note, p may migrate OR new deadline tasks
* may appear in rq when we are unlocking it.
* A caller of us must be fine with that.
*/
raw_spin_unlock(&rq->lock);
hrtimer_cancel(dl_timer);
raw_spin_lock(&rq->lock);
}
}
}

static void switched_from_dl(struct rq *rq, struct task_struct *p)
{
/* XXX we should retain the bw until 0-lag */
cancel_dl_timer(rq, p);
__dl_clear_params(p);
/*
* Start the deadline timer; if we switch back to dl before this we'll
* continue consuming our current CBS slice. If we stay outside of
* SCHED_DEADLINE until the deadline passes, the timer will reset the
* task.
*/
if (!start_dl_timer(p))
__dl_clear_params(p);

/*
* Since this might be the only -deadline task on the rq,
Expand Down

0 comments on commit a649f23

Please sign in to comment.