Skip to content

Commit

Permalink
sched: Add support for unthrottling group entities
Browse files Browse the repository at this point in the history
At the start of each period we refresh the global bandwidth pool.  At this time
we must also unthrottle any cfs_rq entities who are now within bandwidth once
more (as quota permits).

Unthrottled entities have their corresponding cfs_rq->throttled flag cleared
and their entities re-enqueued.

Signed-off-by: Paul Turner <pjt@google.com>
Reviewed-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/20110721184757.574628950@google.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
  • Loading branch information
Paul Turner authored and Ingo Molnar committed Aug 14, 2011
1 parent 85dac90 commit 671fd9d
Show file tree
Hide file tree
Showing 2 changed files with 126 additions and 4 deletions.
3 changes: 3 additions & 0 deletions kernel/sched.c
Original file line number Diff line number Diff line change
Expand Up @@ -9192,6 +9192,9 @@ static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota)
raw_spin_lock_irq(&rq->lock);
cfs_rq->runtime_enabled = runtime_enabled;
cfs_rq->runtime_remaining = 0;

if (cfs_rq_throttled(cfs_rq))
unthrottle_cfs_rq(cfs_rq);
raw_spin_unlock_irq(&rq->lock);
}
out_unlock:
Expand Down
127 changes: 123 additions & 4 deletions kernel/sched_fair.c
Original file line number Diff line number Diff line change
Expand Up @@ -1439,6 +1439,84 @@ static __used void throttle_cfs_rq(struct cfs_rq *cfs_rq)
raw_spin_unlock(&cfs_b->lock);
}

static void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
{
struct rq *rq = rq_of(cfs_rq);
struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg);
struct sched_entity *se;
int enqueue = 1;
long task_delta;

se = cfs_rq->tg->se[cpu_of(rq_of(cfs_rq))];

cfs_rq->throttled = 0;
raw_spin_lock(&cfs_b->lock);
list_del_rcu(&cfs_rq->throttled_list);
raw_spin_unlock(&cfs_b->lock);

if (!cfs_rq->load.weight)
return;

task_delta = cfs_rq->h_nr_running;
for_each_sched_entity(se) {
if (se->on_rq)
enqueue = 0;

cfs_rq = cfs_rq_of(se);
if (enqueue)
enqueue_entity(cfs_rq, se, ENQUEUE_WAKEUP);
cfs_rq->h_nr_running += task_delta;

if (cfs_rq_throttled(cfs_rq))
break;
}

if (!se)
rq->nr_running += task_delta;

/* determine whether we need to wake up potentially idle cpu */
if (rq->curr == rq->idle && rq->cfs.nr_running)
resched_task(rq->curr);
}

static u64 distribute_cfs_runtime(struct cfs_bandwidth *cfs_b,
u64 remaining, u64 expires)
{
struct cfs_rq *cfs_rq;
u64 runtime = remaining;

rcu_read_lock();
list_for_each_entry_rcu(cfs_rq, &cfs_b->throttled_cfs_rq,
throttled_list) {
struct rq *rq = rq_of(cfs_rq);

raw_spin_lock(&rq->lock);
if (!cfs_rq_throttled(cfs_rq))
goto next;

runtime = -cfs_rq->runtime_remaining + 1;
if (runtime > remaining)
runtime = remaining;
remaining -= runtime;

cfs_rq->runtime_remaining += runtime;
cfs_rq->runtime_expires = expires;

/* we check whether we're throttled above */
if (cfs_rq->runtime_remaining > 0)
unthrottle_cfs_rq(cfs_rq);

next:
raw_spin_unlock(&rq->lock);

if (!remaining)
break;
}
rcu_read_unlock();

return remaining;
}

/*
* Responsible for refilling a task_group's bandwidth and unthrottling its
* cfs_rqs as appropriate. If there has been no activity within the last
Expand All @@ -1447,23 +1525,64 @@ static __used void throttle_cfs_rq(struct cfs_rq *cfs_rq)
*/
static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun)
{
int idle = 1;
u64 runtime, runtime_expires;
int idle = 1, throttled;

raw_spin_lock(&cfs_b->lock);
/* no need to continue the timer with no bandwidth constraint */
if (cfs_b->quota == RUNTIME_INF)
goto out_unlock;

idle = cfs_b->idle;
throttled = !list_empty(&cfs_b->throttled_cfs_rq);
/* idle depends on !throttled (for the case of a large deficit) */
idle = cfs_b->idle && !throttled;

/* if we're going inactive then everything else can be deferred */
if (idle)
goto out_unlock;

__refill_cfs_bandwidth_runtime(cfs_b);

if (!throttled) {
/* mark as potentially idle for the upcoming period */
cfs_b->idle = 1;
goto out_unlock;
}

/*
* There are throttled entities so we must first use the new bandwidth
* to unthrottle them before making it generally available. This
* ensures that all existing debts will be paid before a new cfs_rq is
* allowed to run.
*/
runtime = cfs_b->runtime;
runtime_expires = cfs_b->runtime_expires;
cfs_b->runtime = 0;

/*
* This check is repeated as we are holding onto the new bandwidth
* while we unthrottle. This can potentially race with an unthrottled
* group trying to acquire new bandwidth from the global pool.
*/
while (throttled && runtime > 0) {
raw_spin_unlock(&cfs_b->lock);
/* we can't nest cfs_b->lock while distributing bandwidth */
runtime = distribute_cfs_runtime(cfs_b, runtime,
runtime_expires);
raw_spin_lock(&cfs_b->lock);

throttled = !list_empty(&cfs_b->throttled_cfs_rq);
}

/* mark as potentially idle for the upcoming period */
cfs_b->idle = 1;
/* return (any) remaining runtime */
cfs_b->runtime = runtime;
/*
* While we are ensured activity in the period following an
* unthrottle, this also covers the case in which the new bandwidth is
* insufficient to cover the existing bandwidth deficit. (Forcing the
* timer to remain active while there are any throttled entities.)
*/
cfs_b->idle = 0;
out_unlock:
if (idle)
cfs_b->timer_active = 0;
Expand Down

0 comments on commit 671fd9d

Please sign in to comment.