From 115869f435f7ff8bf35df9eb5605d417f3220028 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 11 Jul 2008 13:34:54 +0100 Subject: [PATCH] --- yaml --- r: 105242 b: refs/heads/master c: 577b4a58d2e74a4d48050eeea3e3f952ce04eb86 h: refs/heads/master v: v3 --- [refs] | 2 +- trunk/include/linux/cpumask.h | 6 +- trunk/include/linux/cpuset.h | 7 --- trunk/include/linux/sched.h | 11 +--- trunk/init/main.c | 7 --- trunk/kernel/cpu.c | 40 ++----------- trunk/kernel/cpuset.c | 2 +- trunk/kernel/sched.c | 108 ++++++++++++++++++++-------------- trunk/kernel/sched_fair.c | 3 - trunk/kernel/sched_rt.c | 77 ++++++++++-------------- 10 files changed, 103 insertions(+), 160 deletions(-) diff --git a/[refs] b/[refs] index bd1e859d257d..f7b901cca55d 100644 --- a/[refs] +++ b/[refs] @@ -1,2 +1,2 @@ --- -refs/heads/master: 1b427c153a08fdbc092c2bdbf845b92fda58d857 +refs/heads/master: 577b4a58d2e74a4d48050eeea3e3f952ce04eb86 diff --git a/trunk/include/linux/cpumask.h b/trunk/include/linux/cpumask.h index d614d2472798..c24875bd9c5b 100644 --- a/trunk/include/linux/cpumask.h +++ b/trunk/include/linux/cpumask.h @@ -359,14 +359,13 @@ static inline void __cpus_fold(cpumask_t *dstp, const cpumask_t *origp, /* * The following particular system cpumasks and operations manage - * possible, present, active and online cpus. Each of them is a fixed size + * possible, present and online cpus. Each of them is a fixed size * bitmap of size NR_CPUS. * * #ifdef CONFIG_HOTPLUG_CPU * cpu_possible_map - has bit 'cpu' set iff cpu is populatable * cpu_present_map - has bit 'cpu' set iff cpu is populated * cpu_online_map - has bit 'cpu' set iff cpu available to scheduler - * cpu_active_map - has bit 'cpu' set iff cpu available to migration * #else * cpu_possible_map - has bit 'cpu' set iff cpu is populated * cpu_present_map - copy of cpu_possible_map @@ -417,7 +416,6 @@ static inline void __cpus_fold(cpumask_t *dstp, const cpumask_t *origp, extern cpumask_t cpu_possible_map; extern cpumask_t cpu_online_map; extern cpumask_t cpu_present_map; -extern cpumask_t cpu_active_map; #if NR_CPUS > 1 #define num_online_cpus() cpus_weight(cpu_online_map) @@ -426,7 +424,6 @@ extern cpumask_t cpu_active_map; #define cpu_online(cpu) cpu_isset((cpu), cpu_online_map) #define cpu_possible(cpu) cpu_isset((cpu), cpu_possible_map) #define cpu_present(cpu) cpu_isset((cpu), cpu_present_map) -#define cpu_active(cpu) cpu_isset((cpu), cpu_active_map) #else #define num_online_cpus() 1 #define num_possible_cpus() 1 @@ -434,7 +431,6 @@ extern cpumask_t cpu_active_map; #define cpu_online(cpu) ((cpu) == 0) #define cpu_possible(cpu) ((cpu) == 0) #define cpu_present(cpu) ((cpu) == 0) -#define cpu_active(cpu) ((cpu) == 0) #endif #define cpu_is_offline(cpu) unlikely(!cpu_online(cpu)) diff --git a/trunk/include/linux/cpuset.h b/trunk/include/linux/cpuset.h index e8f450c499b0..038578362b47 100644 --- a/trunk/include/linux/cpuset.h +++ b/trunk/include/linux/cpuset.h @@ -78,8 +78,6 @@ extern void cpuset_track_online_nodes(void); extern int current_cpuset_is_being_rebound(void); -extern void rebuild_sched_domains(void); - #else /* !CONFIG_CPUSETS */ static inline int cpuset_init_early(void) { return 0; } @@ -158,11 +156,6 @@ static inline int current_cpuset_is_being_rebound(void) return 0; } -static inline void rebuild_sched_domains(void) -{ - partition_sched_domains(0, NULL, NULL); -} - #endif /* !CONFIG_CPUSETS */ #endif /* _LINUX_CPUSET_H */ diff --git a/trunk/include/linux/sched.h b/trunk/include/linux/sched.h index 26da921530fe..1941d8b5cf11 100644 --- a/trunk/include/linux/sched.h +++ b/trunk/include/linux/sched.h @@ -824,16 +824,7 @@ extern void partition_sched_domains(int ndoms_new, cpumask_t *doms_new, struct sched_domain_attr *dattr_new); extern int arch_reinit_sched_domains(void); -#else /* CONFIG_SMP */ - -struct sched_domain_attr; - -static inline void -partition_sched_domains(int ndoms_new, cpumask_t *doms_new, - struct sched_domain_attr *dattr_new) -{ -} -#endif /* !CONFIG_SMP */ +#endif /* CONFIG_SMP */ struct io_context; /* See blkdev.h */ #define NGROUPS_SMALL 32 diff --git a/trunk/init/main.c b/trunk/init/main.c index dd25259530ea..edeace036fd9 100644 --- a/trunk/init/main.c +++ b/trunk/init/main.c @@ -415,13 +415,6 @@ static void __init smp_init(void) { unsigned int cpu; - /* - * Set up the current CPU as possible to migrate to. - * The other ones will be done by cpu_up/cpu_down() - */ - cpu = smp_processor_id(); - cpu_set(cpu, cpu_active_map); - /* FIXME: This should be done in userspace --RR */ for_each_present_cpu(cpu) { if (num_online_cpus() >= setup_max_cpus) diff --git a/trunk/kernel/cpu.c b/trunk/kernel/cpu.c index 033603c1d7c3..cfb1d43ab801 100644 --- a/trunk/kernel/cpu.c +++ b/trunk/kernel/cpu.c @@ -64,8 +64,6 @@ void __init cpu_hotplug_init(void) cpu_hotplug.refcount = 0; } -cpumask_t cpu_active_map; - #ifdef CONFIG_HOTPLUG_CPU void get_online_cpus(void) @@ -293,30 +291,11 @@ int __ref cpu_down(unsigned int cpu) int err = 0; cpu_maps_update_begin(); - - if (cpu_hotplug_disabled) { + if (cpu_hotplug_disabled) err = -EBUSY; - goto out; - } - - cpu_clear(cpu, cpu_active_map); - - /* - * Make sure the all cpus did the reschedule and are not - * using stale version of the cpu_active_map. - * This is not strictly necessary becuase stop_machine() - * that we run down the line already provides the required - * synchronization. But it's really a side effect and we do not - * want to depend on the innards of the stop_machine here. - */ - synchronize_sched(); - - err = _cpu_down(cpu, 0); + else + err = _cpu_down(cpu, 0); - if (cpu_online(cpu)) - cpu_set(cpu, cpu_active_map); - -out: cpu_maps_update_done(); return err; } @@ -376,18 +355,11 @@ int __cpuinit cpu_up(unsigned int cpu) } cpu_maps_update_begin(); - - if (cpu_hotplug_disabled) { + if (cpu_hotplug_disabled) err = -EBUSY; - goto out; - } - - err = _cpu_up(cpu, 0); + else + err = _cpu_up(cpu, 0); - if (cpu_online(cpu)) - cpu_set(cpu, cpu_active_map); - -out: cpu_maps_update_done(); return err; } diff --git a/trunk/kernel/cpuset.c b/trunk/kernel/cpuset.c index 3c3ef02f65f1..459d601947a8 100644 --- a/trunk/kernel/cpuset.c +++ b/trunk/kernel/cpuset.c @@ -564,7 +564,7 @@ update_domain_attr(struct sched_domain_attr *dattr, struct cpuset *c) * partition_sched_domains(). */ -void rebuild_sched_domains(void) +static void rebuild_sched_domains(void) { struct kfifo *q; /* queue of cpusets to be scanned */ struct cpuset *cp; /* scans q */ diff --git a/trunk/kernel/sched.c b/trunk/kernel/sched.c index c237624a8a04..1ee18dbb4516 100644 --- a/trunk/kernel/sched.c +++ b/trunk/kernel/sched.c @@ -2881,7 +2881,7 @@ static void sched_migrate_task(struct task_struct *p, int dest_cpu) rq = task_rq_lock(p, &flags); if (!cpu_isset(dest_cpu, p->cpus_allowed) - || unlikely(!cpu_active(dest_cpu))) + || unlikely(cpu_is_offline(dest_cpu))) goto out; /* force the process onto the specified CPU */ @@ -3849,7 +3849,7 @@ int select_nohz_load_balancer(int stop_tick) /* * If we are going offline and still the leader, give up! */ - if (!cpu_active(cpu) && + if (cpu_is_offline(cpu) && atomic_read(&nohz.load_balancer) == cpu) { if (atomic_cmpxchg(&nohz.load_balancer, cpu, -1) != cpu) BUG(); @@ -5876,7 +5876,7 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu) struct rq *rq_dest, *rq_src; int ret = 0, on_rq; - if (unlikely(!cpu_active(dest_cpu))) + if (unlikely(cpu_is_offline(dest_cpu))) return ret; rq_src = cpu_rq(src_cpu); @@ -7553,6 +7553,18 @@ void __attribute__((weak)) arch_update_cpu_topology(void) { } +/* + * Free current domain masks. + * Called after all cpus are attached to NULL domain. + */ +static void free_sched_domains(void) +{ + ndoms_cur = 0; + if (doms_cur != &fallback_doms) + kfree(doms_cur); + doms_cur = &fallback_doms; +} + /* * Set up scheduler domains and groups. Callers must hold the hotplug lock. * For now this just excludes isolated cpus, but could be used to @@ -7631,7 +7643,7 @@ static int dattrs_equal(struct sched_domain_attr *cur, int idx_cur, * ownership of it and will kfree it when done with it. If the caller * failed the kmalloc call, then it can pass in doms_new == NULL, * and partition_sched_domains() will fallback to the single partition - * 'fallback_doms', it also forces the domains to be rebuilt. + * 'fallback_doms'. * * Call with hotplug lock held */ @@ -7645,8 +7657,12 @@ void partition_sched_domains(int ndoms_new, cpumask_t *doms_new, /* always unregister in case we don't destroy any domains */ unregister_sched_domain_sysctl(); - if (doms_new == NULL) - ndoms_new = 0; + if (doms_new == NULL) { + ndoms_new = 1; + doms_new = &fallback_doms; + cpus_andnot(doms_new[0], cpu_online_map, cpu_isolated_map); + dattr_new = NULL; + } /* Destroy deleted domains */ for (i = 0; i < ndoms_cur; i++) { @@ -7661,14 +7677,6 @@ void partition_sched_domains(int ndoms_new, cpumask_t *doms_new, ; } - if (doms_new == NULL) { - ndoms_cur = 0; - ndoms_new = 1; - doms_new = &fallback_doms; - cpus_andnot(doms_new[0], cpu_online_map, cpu_isolated_map); - dattr_new = NULL; - } - /* Build new domains */ for (i = 0; i < ndoms_new; i++) { for (j = 0; j < ndoms_cur; j++) { @@ -7699,10 +7707,17 @@ void partition_sched_domains(int ndoms_new, cpumask_t *doms_new, #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) int arch_reinit_sched_domains(void) { + int err; + get_online_cpus(); - rebuild_sched_domains(); + mutex_lock(&sched_domains_mutex); + detach_destroy_domains(&cpu_online_map); + free_sched_domains(); + err = arch_init_sched_domains(&cpu_online_map); + mutex_unlock(&sched_domains_mutex); put_online_cpus(); - return 0; + + return err; } static ssize_t sched_power_savings_store(const char *buf, size_t count, int smt) @@ -7768,30 +7783,14 @@ int sched_create_sysfs_power_savings_entries(struct sysdev_class *cls) } #endif /* CONFIG_SCHED_MC || CONFIG_SCHED_SMT */ -#ifndef CONFIG_CPUSETS /* - * Add online and remove offline CPUs from the scheduler domains. - * When cpusets are enabled they take over this function. + * Force a reinitialization of the sched domains hierarchy. The domains + * and groups cannot be updated in place without racing with the balancing + * code, so we temporarily attach all running cpus to the NULL domain + * which will prevent rebalancing while the sched domains are recalculated. */ static int update_sched_domains(struct notifier_block *nfb, unsigned long action, void *hcpu) -{ - switch (action) { - case CPU_ONLINE: - case CPU_ONLINE_FROZEN: - case CPU_DEAD: - case CPU_DEAD_FROZEN: - partition_sched_domains(0, NULL, NULL); - return NOTIFY_OK; - - default: - return NOTIFY_DONE; - } -} -#endif - -static int update_runtime(struct notifier_block *nfb, - unsigned long action, void *hcpu) { int cpu = (int)(long)hcpu; @@ -7799,18 +7798,44 @@ static int update_runtime(struct notifier_block *nfb, case CPU_DOWN_PREPARE: case CPU_DOWN_PREPARE_FROZEN: disable_runtime(cpu_rq(cpu)); + /* fall-through */ + case CPU_UP_PREPARE: + case CPU_UP_PREPARE_FROZEN: + detach_destroy_domains(&cpu_online_map); + free_sched_domains(); return NOTIFY_OK; + case CPU_DOWN_FAILED: case CPU_DOWN_FAILED_FROZEN: case CPU_ONLINE: case CPU_ONLINE_FROZEN: enable_runtime(cpu_rq(cpu)); - return NOTIFY_OK; - + /* fall-through */ + case CPU_UP_CANCELED: + case CPU_UP_CANCELED_FROZEN: + case CPU_DEAD: + case CPU_DEAD_FROZEN: + /* + * Fall through and re-initialise the domains. + */ + break; default: return NOTIFY_DONE; } + +#ifndef CONFIG_CPUSETS + /* + * Create default domain partitioning if cpusets are disabled. + * Otherwise we let cpusets rebuild the domains based on the + * current setup. + */ + + /* The hotplug lock is already held by cpu_up/cpu_down */ + arch_init_sched_domains(&cpu_online_map); +#endif + + return NOTIFY_OK; } void __init sched_init_smp(void) @@ -7830,15 +7855,8 @@ void __init sched_init_smp(void) cpu_set(smp_processor_id(), non_isolated_cpus); mutex_unlock(&sched_domains_mutex); put_online_cpus(); - -#ifndef CONFIG_CPUSETS /* XXX: Theoretical race here - CPU may be hotplugged now */ hotcpu_notifier(update_sched_domains, 0); -#endif - - /* RT runtime code needs to handle some hotplug events */ - hotcpu_notifier(update_runtime, 0); - init_hrtick(); /* Move init over to a non-isolated CPU */ diff --git a/trunk/kernel/sched_fair.c b/trunk/kernel/sched_fair.c index d924c679dfac..f2aa987027d6 100644 --- a/trunk/kernel/sched_fair.c +++ b/trunk/kernel/sched_fair.c @@ -1004,8 +1004,6 @@ static void yield_task_fair(struct rq *rq) * not idle and an idle cpu is available. The span of cpus to * search starts with cpus closest then further out as needed, * so we always favor a closer, idle cpu. - * Domains may include CPUs that are not usable for migration, - * hence we need to mask them out (cpu_active_map) * * Returns the CPU we should wake onto. */ @@ -1033,7 +1031,6 @@ static int wake_idle(int cpu, struct task_struct *p) || ((sd->flags & SD_WAKE_IDLE_FAR) && !task_hot(p, task_rq(p)->clock, sd))) { cpus_and(tmp, sd->span, p->cpus_allowed); - cpus_and(tmp, tmp, cpu_active_map); for_each_cpu_mask(i, tmp) { if (idle_cpu(i)) { if (i != task_cpu(p)) { diff --git a/trunk/kernel/sched_rt.c b/trunk/kernel/sched_rt.c index 50735bb96149..147004c651c0 100644 --- a/trunk/kernel/sched_rt.c +++ b/trunk/kernel/sched_rt.c @@ -505,7 +505,9 @@ void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) rt_rq->rt_nr_running++; #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED if (rt_se_prio(rt_se) < rt_rq->highest_prio) { +#ifdef CONFIG_SMP struct rq *rq = rq_of_rt_rq(rt_rq); +#endif rt_rq->highest_prio = rt_se_prio(rt_se); #ifdef CONFIG_SMP @@ -599,7 +601,11 @@ static void __enqueue_rt_entity(struct sched_rt_entity *rt_se) if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running)) return; - list_add_tail(&rt_se->run_list, queue); + if (rt_se->nr_cpus_allowed == 1) + list_add(&rt_se->run_list, queue); + else + list_add_tail(&rt_se->run_list, queue); + __set_bit(rt_se_prio(rt_se), array->bitmap); inc_rt_tasks(rt_se, rt_rq); @@ -684,34 +690,32 @@ static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep) * Put task to the end of the run list without the overhead of dequeue * followed by enqueue. */ -static void -requeue_rt_entity(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se, int head) +static +void requeue_rt_entity(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se) { - if (on_rt_rq(rt_se)) { - struct rt_prio_array *array = &rt_rq->active; - struct list_head *queue = array->queue + rt_se_prio(rt_se); + struct rt_prio_array *array = &rt_rq->active; - if (head) - list_move(&rt_se->run_list, queue); - else - list_move_tail(&rt_se->run_list, queue); + if (on_rt_rq(rt_se)) { + list_del_init(&rt_se->run_list); + list_add_tail(&rt_se->run_list, + array->queue + rt_se_prio(rt_se)); } } -static void requeue_task_rt(struct rq *rq, struct task_struct *p, int head) +static void requeue_task_rt(struct rq *rq, struct task_struct *p) { struct sched_rt_entity *rt_se = &p->rt; struct rt_rq *rt_rq; for_each_sched_rt_entity(rt_se) { rt_rq = rt_rq_of_se(rt_se); - requeue_rt_entity(rt_rq, rt_se, head); + requeue_rt_entity(rt_rq, rt_se); } } static void yield_task_rt(struct rq *rq) { - requeue_task_rt(rq, rq->curr, 0); + requeue_task_rt(rq, rq->curr); } #ifdef CONFIG_SMP @@ -751,30 +755,6 @@ static int select_task_rq_rt(struct task_struct *p, int sync) */ return task_cpu(p); } - -static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p) -{ - cpumask_t mask; - - if (rq->curr->rt.nr_cpus_allowed == 1) - return; - - if (p->rt.nr_cpus_allowed != 1 - && cpupri_find(&rq->rd->cpupri, p, &mask)) - return; - - if (!cpupri_find(&rq->rd->cpupri, rq->curr, &mask)) - return; - - /* - * There appears to be other cpus that can accept - * current and none to run 'p', so lets reschedule - * to try and push current away: - */ - requeue_task_rt(rq, p, 1); - resched_task(rq->curr); -} - #endif /* CONFIG_SMP */ /* @@ -800,8 +780,18 @@ static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p) * to move current somewhere else, making room for our non-migratable * task. */ - if (p->prio == rq->curr->prio && !need_resched()) - check_preempt_equal_prio(rq, p); + if((p->prio == rq->curr->prio) + && p->rt.nr_cpus_allowed == 1 + && rq->curr->rt.nr_cpus_allowed != 1) { + cpumask_t mask; + + if (cpupri_find(&rq->rd->cpupri, rq->curr, &mask)) + /* + * There appears to be other cpus that can accept + * current, so lets reschedule to try and push it away + */ + resched_task(rq->curr); + } #endif } @@ -933,13 +923,6 @@ static int find_lowest_rq(struct task_struct *task) if (!cpupri_find(&task_rq(task)->rd->cpupri, task, lowest_mask)) return -1; /* No targets found */ - /* - * Only consider CPUs that are usable for migration. - * I guess we might want to change cpupri_find() to ignore those - * in the first place. - */ - cpus_and(*lowest_mask, *lowest_mask, cpu_active_map); - /* * At this point we have built a mask of cpus representing the * lowest priority tasks in the system. Now we want to elect @@ -1434,7 +1417,7 @@ static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued) * on the queue: */ if (p->rt.run_list.prev != p->rt.run_list.next) { - requeue_task_rt(rq, p, 0); + requeue_task_rt(rq, p); set_tsk_need_resched(p); } }