Skip to content

Commit

Permalink
---
Browse files Browse the repository at this point in the history
yaml
---
r: 223985
b: refs/heads/master
c: 2069dd7
h: refs/heads/master
i:
  223983: 4c2204d
v: v3
  • Loading branch information
Peter Zijlstra authored and Ingo Molnar committed Nov 18, 2010
1 parent 930e5c6 commit 73ed387
Show file tree
Hide file tree
Showing 7 changed files with 163 additions and 214 deletions.
2 changes: 1 addition & 1 deletion [refs]
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
---
refs/heads/master: 48c5ccae88dcd989d9de507e8510313c6cbd352b
refs/heads/master: 2069dd75c7d0f49355939e5586daf5a9ab216db7
2 changes: 0 additions & 2 deletions trunk/include/linux/sched.h
Original file line number Diff line number Diff line change
Expand Up @@ -1885,8 +1885,6 @@ static inline void wake_up_idle_cpu(int cpu) { }
extern unsigned int sysctl_sched_latency;
extern unsigned int sysctl_sched_min_granularity;
extern unsigned int sysctl_sched_wakeup_granularity;
extern unsigned int sysctl_sched_shares_ratelimit;
extern unsigned int sysctl_sched_shares_thresh;
extern unsigned int sysctl_sched_child_runs_first;

enum sched_tunable_scaling {
Expand Down
173 changes: 44 additions & 129 deletions trunk/kernel/sched.c
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,8 @@ struct task_group {
/* runqueue "owned" by this group on each cpu */
struct cfs_rq **cfs_rq;
unsigned long shares;

atomic_t load_weight;
#endif

#ifdef CONFIG_RT_GROUP_SCHED
Expand Down Expand Up @@ -359,15 +361,11 @@ struct cfs_rq {
*/
unsigned long h_load;

/*
* this cpu's part of tg->shares
*/
unsigned long shares;
u64 load_avg;
u64 load_period;
u64 load_stamp;

/*
* load.weight at the time we set shares
*/
unsigned long rq_weight;
unsigned long load_contribution;
#endif
#endif
};
Expand Down Expand Up @@ -806,20 +804,6 @@ late_initcall(sched_init_debug);
*/
const_debug unsigned int sysctl_sched_nr_migrate = 32;

/*
* ratelimit for updating the group shares.
* default: 0.25ms
*/
unsigned int sysctl_sched_shares_ratelimit = 250000;
unsigned int normalized_sysctl_sched_shares_ratelimit = 250000;

/*
* Inject some fuzzyness into changing the per-cpu group shares
* this avoids remote rq-locks at the expense of fairness.
* default: 4
*/
unsigned int sysctl_sched_shares_thresh = 4;

/*
* period over which we average the RT time consumption, measured
* in ms.
Expand Down Expand Up @@ -1369,6 +1353,12 @@ static inline void update_load_sub(struct load_weight *lw, unsigned long dec)
lw->inv_weight = 0;
}

static inline void update_load_set(struct load_weight *lw, unsigned long w)
{
lw->weight = w;
lw->inv_weight = 0;
}

/*
* To aid in avoiding the subversion of "niceness" due to uneven distribution
* of tasks with abnormal "nice" values across CPUs the contribution that
Expand Down Expand Up @@ -1557,97 +1547,44 @@ static unsigned long cpu_avg_load_per_task(int cpu)

#ifdef CONFIG_FAIR_GROUP_SCHED

static __read_mostly unsigned long __percpu *update_shares_data;

static void __set_se_shares(struct sched_entity *se, unsigned long shares);

/*
* Calculate and set the cpu's group shares.
*/
static void update_group_shares_cpu(struct task_group *tg, int cpu,
unsigned long sd_shares,
unsigned long sd_rq_weight,
unsigned long *usd_rq_weight)
{
unsigned long shares, rq_weight;
int boost = 0;

rq_weight = usd_rq_weight[cpu];
if (!rq_weight) {
boost = 1;
rq_weight = NICE_0_LOAD;
}

/*
* \Sum_j shares_j * rq_weight_i
* shares_i = -----------------------------
* \Sum_j rq_weight_j
*/
shares = (sd_shares * rq_weight) / sd_rq_weight;
shares = clamp_t(unsigned long, shares, MIN_SHARES, MAX_SHARES);

if (abs(shares - tg->se[cpu]->load.weight) >
sysctl_sched_shares_thresh) {
struct rq *rq = cpu_rq(cpu);
unsigned long flags;

raw_spin_lock_irqsave(&rq->lock, flags);
tg->cfs_rq[cpu]->rq_weight = boost ? 0 : rq_weight;
tg->cfs_rq[cpu]->shares = boost ? 0 : shares;
__set_se_shares(tg->se[cpu], shares);
raw_spin_unlock_irqrestore(&rq->lock, flags);
}
}
static void update_cfs_load(struct cfs_rq *cfs_rq);
static void update_cfs_shares(struct cfs_rq *cfs_rq);

/*
* Re-compute the task group their per cpu shares over the given domain.
* This needs to be done in a bottom-up fashion because the rq weight of a
* parent group depends on the shares of its child groups.
* update tg->load_weight by folding this cpu's load_avg
*/
static int tg_shares_up(struct task_group *tg, void *data)
{
unsigned long weight, rq_weight = 0, sum_weight = 0, shares = 0;
unsigned long *usd_rq_weight;
struct sched_domain *sd = data;
long load_avg;
struct cfs_rq *cfs_rq;
unsigned long flags;
int i;
int cpu = (long)data;
struct rq *rq;

if (!tg->se[0])
if (!tg->se[cpu])
return 0;

local_irq_save(flags);
usd_rq_weight = per_cpu_ptr(update_shares_data, smp_processor_id());

for_each_cpu(i, sched_domain_span(sd)) {
weight = tg->cfs_rq[i]->load.weight;
usd_rq_weight[i] = weight;

rq_weight += weight;
/*
* If there are currently no tasks on the cpu pretend there
* is one of average load so that when a new task gets to
* run here it will not get delayed by group starvation.
*/
if (!weight)
weight = NICE_0_LOAD;
rq = cpu_rq(cpu);
cfs_rq = tg->cfs_rq[cpu];

sum_weight += weight;
shares += tg->cfs_rq[i]->shares;
}
raw_spin_lock_irqsave(&rq->lock, flags);

if (!rq_weight)
rq_weight = sum_weight;
update_rq_clock(rq);
update_cfs_load(cfs_rq);

if ((!shares && rq_weight) || shares > tg->shares)
shares = tg->shares;
load_avg = div64_u64(cfs_rq->load_avg, cfs_rq->load_period+1);
load_avg -= cfs_rq->load_contribution;

if (!sd->parent || !(sd->parent->flags & SD_LOAD_BALANCE))
shares = tg->shares;
atomic_add(load_avg, &tg->load_weight);
cfs_rq->load_contribution += load_avg;

for_each_cpu(i, sched_domain_span(sd))
update_group_shares_cpu(tg, i, shares, rq_weight, usd_rq_weight);
/*
* We need to update shares after updating tg->load_weight in
* order to adjust the weight of groups with long running tasks.
*/
update_cfs_shares(cfs_rq);

local_irq_restore(flags);
raw_spin_unlock_irqrestore(&rq->lock, flags);

return 0;
}
Expand All @@ -1666,7 +1603,7 @@ static int tg_load_down(struct task_group *tg, void *data)
load = cpu_rq(cpu)->load.weight;
} else {
load = tg->parent->cfs_rq[cpu]->h_load;
load *= tg->cfs_rq[cpu]->shares;
load *= tg->se[cpu]->load.weight;
load /= tg->parent->cfs_rq[cpu]->load.weight + 1;
}

Expand All @@ -1675,21 +1612,16 @@ static int tg_load_down(struct task_group *tg, void *data)
return 0;
}

static void update_shares(struct sched_domain *sd)
static void update_shares(long cpu)
{
s64 elapsed;
u64 now;

if (root_task_group_empty())
return;

now = local_clock();
elapsed = now - sd->last_update;
/*
* XXX: replace with an on-demand list
*/

if (elapsed >= (s64)(u64)sysctl_sched_shares_ratelimit) {
sd->last_update = now;
walk_tg_tree(tg_nop, tg_shares_up, sd);
}
walk_tg_tree(tg_nop, tg_shares_up, (void *)cpu);
}

static void update_h_load(long cpu)
Expand All @@ -1699,7 +1631,7 @@ static void update_h_load(long cpu)

#else

static inline void update_shares(struct sched_domain *sd)
static inline void update_shares(int cpu)
{
}

Expand Down Expand Up @@ -1824,15 +1756,6 @@ static void double_rq_unlock(struct rq *rq1, struct rq *rq2)

#endif

#ifdef CONFIG_FAIR_GROUP_SCHED
static void cfs_rq_set_shares(struct cfs_rq *cfs_rq, unsigned long shares)
{
#ifdef CONFIG_SMP
cfs_rq->shares = shares;
#endif
}
#endif

static void calc_load_account_idle(struct rq *this_rq);
static void update_sysctl(void);
static int get_update_sysctl_factor(void);
Expand Down Expand Up @@ -5551,7 +5474,6 @@ static void update_sysctl(void)
SET_SYSCTL(sched_min_granularity);
SET_SYSCTL(sched_latency);
SET_SYSCTL(sched_wakeup_granularity);
SET_SYSCTL(sched_shares_ratelimit);
#undef SET_SYSCTL
}

Expand Down Expand Up @@ -7787,8 +7709,7 @@ static void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq,
se->cfs_rq = parent->my_q;

se->my_q = cfs_rq;
se->load.weight = tg->shares;
se->load.inv_weight = 0;
update_load_set(&se->load, tg->shares);
se->parent = parent;
}
#endif
Expand Down Expand Up @@ -7881,10 +7802,6 @@ void __init sched_init(void)

#endif /* CONFIG_CGROUP_SCHED */

#if defined CONFIG_FAIR_GROUP_SCHED && defined CONFIG_SMP
update_shares_data = __alloc_percpu(nr_cpu_ids * sizeof(unsigned long),
__alignof__(unsigned long));
#endif
for_each_possible_cpu(i) {
struct rq *rq;

Expand Down Expand Up @@ -8452,8 +8369,7 @@ static void __set_se_shares(struct sched_entity *se, unsigned long shares)
if (on_rq)
dequeue_entity(cfs_rq, se, 0);

se->load.weight = shares;
se->load.inv_weight = 0;
update_load_set(&se->load, shares);

if (on_rq)
enqueue_entity(cfs_rq, se, 0);
Expand Down Expand Up @@ -8510,7 +8426,6 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares)
/*
* force a rebalance
*/
cfs_rq_set_shares(tg->cfs_rq[i], 0);
set_se_shares(tg->se[i], shares);
}

Expand Down
15 changes: 11 additions & 4 deletions trunk/kernel/sched_debug.c
Original file line number Diff line number Diff line change
Expand Up @@ -202,15 +202,22 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
spread0 = min_vruntime - rq0_min_vruntime;
SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "spread0",
SPLIT_NS(spread0));
SEQ_printf(m, " .%-30s: %ld\n", "nr_running", cfs_rq->nr_running);
SEQ_printf(m, " .%-30s: %ld\n", "load", cfs_rq->load.weight);

SEQ_printf(m, " .%-30s: %d\n", "nr_spread_over",
cfs_rq->nr_spread_over);
SEQ_printf(m, " .%-30s: %ld\n", "nr_running", cfs_rq->nr_running);
SEQ_printf(m, " .%-30s: %ld\n", "load", cfs_rq->load.weight);
#ifdef CONFIG_FAIR_GROUP_SCHED
#ifdef CONFIG_SMP
SEQ_printf(m, " .%-30s: %lu\n", "shares", cfs_rq->shares);
SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "load_avg",
SPLIT_NS(cfs_rq->load_avg));
SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "load_period",
SPLIT_NS(cfs_rq->load_period));
SEQ_printf(m, " .%-30s: %ld\n", "load_contrib",
cfs_rq->load_contribution);
SEQ_printf(m, " .%-30s: %d\n", "load_tg",
atomic_read(&tg->load_weight));
#endif

print_cfs_group_stats(m, cpu, cfs_rq->tg);
#endif
}
Expand Down
Loading

0 comments on commit 73ed387

Please sign in to comment.