diff --git a/[refs] b/[refs] index 7a7c5dbaf84e..13beca71559f 100644 --- a/[refs] +++ b/[refs] @@ -1,2 +1,2 @@ --- -refs/heads/master: c40c6f85a7594ad842233885386a0ca4cd40eafe +refs/heads/master: b342501cd31e5546d0c9ca8ceff5ded1832f9e5b diff --git a/trunk/include/linux/init_task.h b/trunk/include/linux/init_task.h index af1de95e711e..e752d973fa21 100644 --- a/trunk/include/linux/init_task.h +++ b/trunk/include/linux/init_task.h @@ -147,7 +147,6 @@ extern struct cred init_cred; .nr_cpus_allowed = NR_CPUS, \ }, \ .tasks = LIST_HEAD_INIT(tsk.tasks), \ - .pushable_tasks = PLIST_NODE_INIT(tsk.pushable_tasks, MAX_PRIO), \ .ptraced = LIST_HEAD_INIT(tsk.ptraced), \ .ptrace_entry = LIST_HEAD_INIT(tsk.ptrace_entry), \ .real_parent = &tsk, \ diff --git a/trunk/include/linux/latencytop.h b/trunk/include/linux/latencytop.h index b0e99898527c..901c2d6377a8 100644 --- a/trunk/include/linux/latencytop.h +++ b/trunk/include/linux/latencytop.h @@ -9,7 +9,6 @@ #ifndef _INCLUDE_GUARD_LATENCYTOP_H_ #define _INCLUDE_GUARD_LATENCYTOP_H_ -#include #ifdef CONFIG_LATENCYTOP #define LT_SAVECOUNT 32 @@ -25,14 +24,7 @@ struct latency_record { struct task_struct; -extern int latencytop_enabled; -void __account_scheduler_latency(struct task_struct *task, int usecs, int inter); -static inline void -account_scheduler_latency(struct task_struct *task, int usecs, int inter) -{ - if (unlikely(latencytop_enabled)) - __account_scheduler_latency(task, usecs, inter); -} +void account_scheduler_latency(struct task_struct *task, int usecs, int inter); void clear_all_latency_tracing(struct task_struct *p); diff --git a/trunk/include/linux/plist.h b/trunk/include/linux/plist.h index 45926d77d6ac..85de2f055874 100644 --- a/trunk/include/linux/plist.h +++ b/trunk/include/linux/plist.h @@ -96,10 +96,6 @@ struct plist_node { # define PLIST_HEAD_LOCK_INIT(_lock) #endif -#define _PLIST_HEAD_INIT(head) \ - .prio_list = LIST_HEAD_INIT((head).prio_list), \ - .node_list = LIST_HEAD_INIT((head).node_list) - /** * PLIST_HEAD_INIT - static struct plist_head initializer * @head: struct plist_head variable name @@ -107,7 +103,8 @@ struct plist_node { */ #define PLIST_HEAD_INIT(head, _lock) \ { \ - _PLIST_HEAD_INIT(head), \ + .prio_list = LIST_HEAD_INIT((head).prio_list), \ + .node_list = LIST_HEAD_INIT((head).node_list), \ PLIST_HEAD_LOCK_INIT(&(_lock)) \ } @@ -119,7 +116,7 @@ struct plist_node { #define PLIST_NODE_INIT(node, __prio) \ { \ .prio = (__prio), \ - .plist = { _PLIST_HEAD_INIT((node).plist) }, \ + .plist = PLIST_HEAD_INIT((node).plist, NULL), \ } /** diff --git a/trunk/include/linux/sched.h b/trunk/include/linux/sched.h index b339a0bef024..a063d19b7a7d 100644 --- a/trunk/include/linux/sched.h +++ b/trunk/include/linux/sched.h @@ -998,7 +998,6 @@ struct sched_class { struct rq *busiest, struct sched_domain *sd, enum cpu_idle_type idle); void (*pre_schedule) (struct rq *this_rq, struct task_struct *task); - int (*needs_post_schedule) (struct rq *this_rq); void (*post_schedule) (struct rq *this_rq); void (*task_wake_up) (struct rq *this_rq, struct task_struct *task); @@ -1053,10 +1052,6 @@ struct sched_entity { u64 last_wakeup; u64 avg_overlap; - u64 start_runtime; - u64 avg_wakeup; - u64 nr_migrations; - #ifdef CONFIG_SCHEDSTATS u64 wait_start; u64 wait_max; @@ -1072,6 +1067,7 @@ struct sched_entity { u64 exec_max; u64 slice_max; + u64 nr_migrations; u64 nr_migrations_cold; u64 nr_failed_migrations_affine; u64 nr_failed_migrations_running; @@ -1168,7 +1164,6 @@ struct task_struct { #endif struct list_head tasks; - struct plist_node pushable_tasks; struct mm_struct *mm, *active_mm; @@ -1675,6 +1670,16 @@ static inline int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask) return set_cpus_allowed_ptr(p, &new_mask); } +/* + * Architectures can set this to 1 if they have specified + * CONFIG_HAVE_UNSTABLE_SCHED_CLOCK in their arch Kconfig, + * but then during bootup it turns out that sched_clock() + * is reliable after all: + */ +#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK +extern int sched_clock_stable; +#endif + extern unsigned long long sched_clock(void); extern void sched_clock_init(void); diff --git a/trunk/init/Kconfig b/trunk/init/Kconfig index a90fcbeeb9d2..f068071fcc5d 100644 --- a/trunk/init/Kconfig +++ b/trunk/init/Kconfig @@ -966,6 +966,7 @@ config SLABINFO config RT_MUTEXES boolean + select PLIST config BASE_SMALL int diff --git a/trunk/kernel/latencytop.c b/trunk/kernel/latencytop.c index ca07c5c0c914..449db466bdbc 100644 --- a/trunk/kernel/latencytop.c +++ b/trunk/kernel/latencytop.c @@ -9,44 +9,6 @@ * as published by the Free Software Foundation; version 2 * of the License. */ - -/* - * CONFIG_LATENCYTOP enables a kernel latency tracking infrastructure that is - * used by the "latencytop" userspace tool. The latency that is tracked is not - * the 'traditional' interrupt latency (which is primarily caused by something - * else consuming CPU), but instead, it is the latency an application encounters - * because the kernel sleeps on its behalf for various reasons. - * - * This code tracks 2 levels of statistics: - * 1) System level latency - * 2) Per process latency - * - * The latency is stored in fixed sized data structures in an accumulated form; - * if the "same" latency cause is hit twice, this will be tracked as one entry - * in the data structure. Both the count, total accumulated latency and maximum - * latency are tracked in this data structure. When the fixed size structure is - * full, no new causes are tracked until the buffer is flushed by writing to - * the /proc file; the userspace tool does this on a regular basis. - * - * A latency cause is identified by a stringified backtrace at the point that - * the scheduler gets invoked. The userland tool will use this string to - * identify the cause of the latency in human readable form. - * - * The information is exported via /proc/latency_stats and /proc//latency. - * These files look like this: - * - * Latency Top version : v0.1 - * 70 59433 4897 i915_irq_wait drm_ioctl vfs_ioctl do_vfs_ioctl sys_ioctl - * | | | | - * | | | +----> the stringified backtrace - * | | +---------> The maximum latency for this entry in microseconds - * | +--------------> The accumulated latency for this entry (microseconds) - * +-------------------> The number of times this entry is hit - * - * (note: the average latency is the accumulated latency divided by the number - * of times) - */ - #include #include #include @@ -110,7 +72,7 @@ account_global_scheduler_latency(struct task_struct *tsk, struct latency_record firstnonnull = i; continue; } - for (q = 0; q < LT_BACKTRACEDEPTH; q++) { + for (q = 0 ; q < LT_BACKTRACEDEPTH ; q++) { unsigned long record = lat->backtrace[q]; if (latency_record[i].backtrace[q] != record) { @@ -139,50 +101,29 @@ account_global_scheduler_latency(struct task_struct *tsk, struct latency_record memcpy(&latency_record[i], lat, sizeof(struct latency_record)); } -/* - * Iterator to store a backtrace into a latency record entry - */ -static inline void store_stacktrace(struct task_struct *tsk, - struct latency_record *lat) +static inline void store_stacktrace(struct task_struct *tsk, struct latency_record *lat) { struct stack_trace trace; memset(&trace, 0, sizeof(trace)); trace.max_entries = LT_BACKTRACEDEPTH; trace.entries = &lat->backtrace[0]; + trace.skip = 0; save_stack_trace_tsk(tsk, &trace); } -/** - * __account_scheduler_latency - record an occured latency - * @tsk - the task struct of the task hitting the latency - * @usecs - the duration of the latency in microseconds - * @inter - 1 if the sleep was interruptible, 0 if uninterruptible - * - * This function is the main entry point for recording latency entries - * as called by the scheduler. - * - * This function has a few special cases to deal with normal 'non-latency' - * sleeps: specifically, interruptible sleep longer than 5 msec is skipped - * since this usually is caused by waiting for events via select() and co. - * - * Negative latencies (caused by time going backwards) are also explicitly - * skipped. - */ void __sched -__account_scheduler_latency(struct task_struct *tsk, int usecs, int inter) +account_scheduler_latency(struct task_struct *tsk, int usecs, int inter) { unsigned long flags; int i, q; struct latency_record lat; - /* Long interruptible waits are generally user requested... */ - if (inter && usecs > 5000) + if (!latencytop_enabled) return; - /* Negative sleeps are time going backwards */ - /* Zero-time sleeps are non-interesting */ - if (usecs <= 0) + /* Long interruptible waits are generally user requested... */ + if (inter && usecs > 5000) return; memset(&lat, 0, sizeof(lat)); @@ -202,12 +143,12 @@ __account_scheduler_latency(struct task_struct *tsk, int usecs, int inter) if (tsk->latency_record_count >= LT_SAVECOUNT) goto out_unlock; - for (i = 0; i < LT_SAVECOUNT; i++) { + for (i = 0; i < LT_SAVECOUNT ; i++) { struct latency_record *mylat; int same = 1; mylat = &tsk->latency_record[i]; - for (q = 0; q < LT_BACKTRACEDEPTH; q++) { + for (q = 0 ; q < LT_BACKTRACEDEPTH ; q++) { unsigned long record = lat.backtrace[q]; if (mylat->backtrace[q] != record) { @@ -245,7 +186,7 @@ static int lstats_show(struct seq_file *m, void *v) for (i = 0; i < MAXLR; i++) { if (latency_record[i].backtrace[0]) { int q; - seq_printf(m, "%i %lu %lu ", + seq_printf(m, "%i %li %li ", latency_record[i].count, latency_record[i].time, latency_record[i].max); @@ -282,7 +223,7 @@ static int lstats_open(struct inode *inode, struct file *filp) return single_open(filp, lstats_show, NULL); } -static const struct file_operations lstats_fops = { +static struct file_operations lstats_fops = { .open = lstats_open, .read = seq_read, .write = lstats_write, @@ -295,4 +236,4 @@ static int __init init_lstats_procfs(void) proc_create("latency_stats", 0644, NULL, &lstats_fops); return 0; } -device_initcall(init_lstats_procfs); +__initcall(init_lstats_procfs); diff --git a/trunk/kernel/sched.c b/trunk/kernel/sched.c index 8e63ffb6ed05..410eec404133 100644 --- a/trunk/kernel/sched.c +++ b/trunk/kernel/sched.c @@ -467,17 +467,11 @@ struct rt_rq { struct rt_prio_array active; unsigned long rt_nr_running; #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED - struct { - int curr; /* highest queued rt task prio */ -#ifdef CONFIG_SMP - int next; /* next highest */ -#endif - } highest_prio; + int highest_prio; /* highest queued rt task prio */ #endif #ifdef CONFIG_SMP unsigned long rt_nr_migratory; int overloaded; - struct plist_head pushable_tasks; #endif int rt_throttled; u64 rt_time; @@ -555,6 +549,7 @@ struct rq { unsigned long nr_running; #define CPU_LOAD_IDX_MAX 5 unsigned long cpu_load[CPU_LOAD_IDX_MAX]; + unsigned char idle_at_tick; #ifdef CONFIG_NO_HZ unsigned long last_tick_seen; unsigned char in_nohz_recently; @@ -595,7 +590,6 @@ struct rq { struct root_domain *rd; struct sched_domain *sd; - unsigned char idle_at_tick; /* For active balancing */ int active_balance; int push_cpu; @@ -1616,42 +1610,21 @@ static inline void update_shares_locked(struct rq *rq, struct sched_domain *sd) #endif -#ifdef CONFIG_PREEMPT - -/* - * fair double_lock_balance: Safely acquires both rq->locks in a fair - * way at the expense of forcing extra atomic operations in all - * invocations. This assures that the double_lock is acquired using the - * same underlying policy as the spinlock_t on this architecture, which - * reduces latency compared to the unfair variant below. However, it - * also adds more overhead and therefore may reduce throughput. - */ -static inline int _double_lock_balance(struct rq *this_rq, struct rq *busiest) - __releases(this_rq->lock) - __acquires(busiest->lock) - __acquires(this_rq->lock) -{ - spin_unlock(&this_rq->lock); - double_rq_lock(this_rq, busiest); - - return 1; -} - -#else /* - * Unfair double_lock_balance: Optimizes throughput at the expense of - * latency by eliminating extra atomic operations when the locks are - * already in proper order on entry. This favors lower cpu-ids and will - * grant the double lock to lower cpus over higher ids under contention, - * regardless of entry order into the function. + * double_lock_balance - lock the busiest runqueue, this_rq is locked already. */ -static int _double_lock_balance(struct rq *this_rq, struct rq *busiest) +static int double_lock_balance(struct rq *this_rq, struct rq *busiest) __releases(this_rq->lock) __acquires(busiest->lock) __acquires(this_rq->lock) { int ret = 0; + if (unlikely(!irqs_disabled())) { + /* printk() doesn't work good under rq->lock */ + spin_unlock(&this_rq->lock); + BUG_ON(1); + } if (unlikely(!spin_trylock(&busiest->lock))) { if (busiest < this_rq) { spin_unlock(&this_rq->lock); @@ -1664,22 +1637,6 @@ static int _double_lock_balance(struct rq *this_rq, struct rq *busiest) return ret; } -#endif /* CONFIG_PREEMPT */ - -/* - * double_lock_balance - lock the busiest runqueue, this_rq is locked already. - */ -static int double_lock_balance(struct rq *this_rq, struct rq *busiest) -{ - if (unlikely(!irqs_disabled())) { - /* printk() doesn't work good under rq->lock */ - spin_unlock(&this_rq->lock); - BUG_ON(1); - } - - return _double_lock_balance(this_rq, busiest); -} - static inline void double_unlock_balance(struct rq *this_rq, struct rq *busiest) __releases(busiest->lock) { @@ -1748,9 +1705,6 @@ static void update_avg(u64 *avg, u64 sample) static void enqueue_task(struct rq *rq, struct task_struct *p, int wakeup) { - if (wakeup) - p->se.start_runtime = p->se.sum_exec_runtime; - sched_info_queued(p); p->sched_class->enqueue_task(rq, p, wakeup); p->se.on_rq = 1; @@ -1758,15 +1712,10 @@ static void enqueue_task(struct rq *rq, struct task_struct *p, int wakeup) static void dequeue_task(struct rq *rq, struct task_struct *p, int sleep) { - if (sleep) { - if (p->se.last_wakeup) { - update_avg(&p->se.avg_overlap, - p->se.sum_exec_runtime - p->se.last_wakeup); - p->se.last_wakeup = 0; - } else { - update_avg(&p->se.avg_wakeup, - sysctl_sched_wakeup_granularity); - } + if (sleep && p->se.last_wakeup) { + update_avg(&p->se.avg_overlap, + p->se.sum_exec_runtime - p->se.last_wakeup); + p->se.last_wakeup = 0; } sched_info_dequeued(p); @@ -2396,22 +2345,6 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync) activate_task(rq, p, 1); success = 1; - /* - * Only attribute actual wakeups done by this task. - */ - if (!in_interrupt()) { - struct sched_entity *se = ¤t->se; - u64 sample = se->sum_exec_runtime; - - if (se->last_wakeup) - sample -= se->last_wakeup; - else - sample -= se->start_runtime; - update_avg(&se->avg_wakeup, sample); - - se->last_wakeup = se->sum_exec_runtime; - } - out_running: trace_sched_wakeup(rq, p, success); check_preempt_curr(rq, p, sync); @@ -2422,6 +2355,8 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync) p->sched_class->task_wake_up(rq, p); #endif out: + current->se.last_wakeup = current->se.sum_exec_runtime; + task_rq_unlock(rq, &flags); return success; @@ -2451,8 +2386,6 @@ static void __sched_fork(struct task_struct *p) p->se.prev_sum_exec_runtime = 0; p->se.last_wakeup = 0; p->se.avg_overlap = 0; - p->se.start_runtime = 0; - p->se.avg_wakeup = sysctl_sched_wakeup_granularity; #ifdef CONFIG_SCHEDSTATS p->se.wait_start = 0; @@ -2515,8 +2448,6 @@ void sched_fork(struct task_struct *p, int clone_flags) /* Want to start with kernel preemption disabled. */ task_thread_info(p)->preempt_count = 1; #endif - plist_node_init(&p->pushable_tasks, MAX_PRIO); - put_cpu(); } @@ -2657,12 +2588,6 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev) { struct mm_struct *mm = rq->prev_mm; long prev_state; -#ifdef CONFIG_SMP - int post_schedule = 0; - - if (current->sched_class->needs_post_schedule) - post_schedule = current->sched_class->needs_post_schedule(rq); -#endif rq->prev_mm = NULL; @@ -2681,7 +2606,7 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev) finish_arch_switch(prev); finish_lock_switch(rq, prev); #ifdef CONFIG_SMP - if (post_schedule) + if (current->sched_class->post_schedule) current->sched_class->post_schedule(rq); #endif @@ -3062,16 +2987,6 @@ balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest, pulled++; rem_load_move -= p->se.load.weight; -#ifdef CONFIG_PREEMPT - /* - * NEWIDLE balancing is a source of latency, so preemptible kernels - * will stop after the first task is pulled to minimize the critical - * section. - */ - if (idle == CPU_NEWLY_IDLE) - goto out; -#endif - /* * We only want to steal up to the prescribed amount of weighted load. */ @@ -3118,15 +3033,9 @@ static int move_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest, sd, idle, all_pinned, &this_best_prio); class = class->next; -#ifdef CONFIG_PREEMPT - /* - * NEWIDLE balancing is a source of latency, so preemptible - * kernels will stop after the first task is pulled to minimize - * the critical section. - */ if (idle == CPU_NEWLY_IDLE && this_rq->nr_running) break; -#endif + } while (class && max_load_move > total_load_moved); return total_load_moved > 0; @@ -5236,7 +5145,7 @@ SYSCALL_DEFINE1(nice, int, increment) if (increment > 40) increment = 40; - nice = TASK_NICE(current) + increment; + nice = PRIO_TO_NICE(current->static_prio) + increment; if (nice < -20) nice = -20; if (nice > 19) @@ -8309,15 +8218,11 @@ static void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq) __set_bit(MAX_RT_PRIO, array->bitmap); #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED - rt_rq->highest_prio.curr = MAX_RT_PRIO; -#ifdef CONFIG_SMP - rt_rq->highest_prio.next = MAX_RT_PRIO; -#endif + rt_rq->highest_prio = MAX_RT_PRIO; #endif #ifdef CONFIG_SMP rt_rq->rt_nr_migratory = 0; rt_rq->overloaded = 0; - plist_head_init(&rq->rt.pushable_tasks, &rq->lock); #endif rt_rq->rt_time = 0; @@ -9684,7 +9589,7 @@ static void cpuacct_charge(struct task_struct *tsk, u64 cputime) struct cpuacct *ca; int cpu; - if (unlikely(!cpuacct_subsys.active)) + if (!cpuacct_subsys.active) return; cpu = task_cpu(tsk); diff --git a/trunk/kernel/sched_clock.c b/trunk/kernel/sched_clock.c index a0b0852414cc..a755d023805a 100644 --- a/trunk/kernel/sched_clock.c +++ b/trunk/kernel/sched_clock.c @@ -24,11 +24,11 @@ * The clock: sched_clock_cpu() is monotonic per cpu, and should be somewhat * consistent between cpus (never more than 2 jiffies difference). */ -#include -#include #include -#include #include +#include +#include +#include /* * Scheduler clock - returns current time in nanosec units. @@ -43,6 +43,10 @@ unsigned long long __attribute__((weak)) sched_clock(void) static __read_mostly int sched_clock_running; #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK +__read_mostly int sched_clock_stable; +#else +static const int sched_clock_stable = 1; +#endif struct sched_clock_data { /* @@ -87,7 +91,7 @@ void sched_clock_init(void) } /* - * min,max except they take wrapping into account + * min, max except they take wrapping into account */ static inline u64 wrap_min(u64 x, u64 y) @@ -116,10 +120,13 @@ static u64 __update_sched_clock(struct sched_clock_data *scd, u64 now) if (unlikely(delta < 0)) delta = 0; + if (unlikely(!sched_clock_running)) + return 0ull; + /* * scd->clock = clamp(scd->tick_gtod + delta, - * max(scd->tick_gtod, scd->clock), - * scd->tick_gtod + TICK_NSEC); + * max(scd->tick_gtod, scd->clock), + * scd->tick_gtod + TICK_NSEC); */ clock = scd->tick_gtod + delta; @@ -148,12 +155,13 @@ static void lock_double_clock(struct sched_clock_data *data1, u64 sched_clock_cpu(int cpu) { - struct sched_clock_data *scd = cpu_sdc(cpu); u64 now, clock, this_clock, remote_clock; + struct sched_clock_data *scd; - if (unlikely(!sched_clock_running)) - return 0ull; + if (sched_clock_stable) + return sched_clock(); + scd = cpu_sdc(cpu); WARN_ON_ONCE(!irqs_disabled()); now = sched_clock(); @@ -193,6 +201,8 @@ u64 sched_clock_cpu(int cpu) return clock; } +#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK + void sched_clock_tick(void) { struct sched_clock_data *scd = this_scd(); @@ -235,22 +245,7 @@ void sched_clock_idle_wakeup_event(u64 delta_ns) } EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event); -#else /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */ - -void sched_clock_init(void) -{ - sched_clock_running = 1; -} - -u64 sched_clock_cpu(int cpu) -{ - if (unlikely(!sched_clock_running)) - return 0; - - return sched_clock(); -} - -#endif +#endif /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */ unsigned long long cpu_clock(int cpu) { diff --git a/trunk/kernel/sched_debug.c b/trunk/kernel/sched_debug.c index 2b1260f0e800..16eeba4e4169 100644 --- a/trunk/kernel/sched_debug.c +++ b/trunk/kernel/sched_debug.c @@ -397,7 +397,6 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m) PN(se.vruntime); PN(se.sum_exec_runtime); PN(se.avg_overlap); - PN(se.avg_wakeup); nr_switches = p->nvcsw + p->nivcsw; diff --git a/trunk/kernel/sched_fair.c b/trunk/kernel/sched_fair.c index 3816f217f119..0566f2a03c42 100644 --- a/trunk/kernel/sched_fair.c +++ b/trunk/kernel/sched_fair.c @@ -1314,63 +1314,16 @@ static int select_task_rq_fair(struct task_struct *p, int sync) } #endif /* CONFIG_SMP */ -/* - * Adaptive granularity - * - * se->avg_wakeup gives the average time a task runs until it does a wakeup, - * with the limit of wakeup_gran -- when it never does a wakeup. - * - * So the smaller avg_wakeup is the faster we want this task to preempt, - * but we don't want to treat the preemptee unfairly and therefore allow it - * to run for at least the amount of time we'd like to run. - * - * NOTE: we use 2*avg_wakeup to increase the probability of actually doing one - * - * NOTE: we use *nr_running to scale with load, this nicely matches the - * degrading latency on load. - */ -static unsigned long -adaptive_gran(struct sched_entity *curr, struct sched_entity *se) -{ - u64 this_run = curr->sum_exec_runtime - curr->prev_sum_exec_runtime; - u64 expected_wakeup = 2*se->avg_wakeup * cfs_rq_of(se)->nr_running; - u64 gran = 0; - - if (this_run < expected_wakeup) - gran = expected_wakeup - this_run; - - return min_t(s64, gran, sysctl_sched_wakeup_granularity); -} - -static unsigned long -wakeup_gran(struct sched_entity *curr, struct sched_entity *se) +static unsigned long wakeup_gran(struct sched_entity *se) { unsigned long gran = sysctl_sched_wakeup_granularity; - if (cfs_rq_of(curr)->curr && sched_feat(ADAPTIVE_GRAN)) - gran = adaptive_gran(curr, se); - /* - * Since its curr running now, convert the gran from real-time - * to virtual-time in his units. + * More easily preempt - nice tasks, while not making it harder for + * + nice tasks. */ - if (sched_feat(ASYM_GRAN)) { - /* - * By using 'se' instead of 'curr' we penalize light tasks, so - * they get preempted easier. That is, if 'se' < 'curr' then - * the resulting gran will be larger, therefore penalizing the - * lighter, if otoh 'se' > 'curr' then the resulting gran will - * be smaller, again penalizing the lighter task. - * - * This is especially important for buddies when the leftmost - * task is higher priority than the buddy. - */ - if (unlikely(se->load.weight != NICE_0_LOAD)) - gran = calc_delta_fair(gran, se); - } else { - if (unlikely(curr->load.weight != NICE_0_LOAD)) - gran = calc_delta_fair(gran, curr); - } + if (!sched_feat(ASYM_GRAN) || se->load.weight > NICE_0_LOAD) + gran = calc_delta_fair(sysctl_sched_wakeup_granularity, se); return gran; } @@ -1397,7 +1350,7 @@ wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se) if (vdiff <= 0) return -1; - gran = wakeup_gran(curr, se); + gran = wakeup_gran(curr); if (vdiff > gran) return 1; diff --git a/trunk/kernel/sched_features.h b/trunk/kernel/sched_features.h index 76f61756e677..da5d93b5d2c6 100644 --- a/trunk/kernel/sched_features.h +++ b/trunk/kernel/sched_features.h @@ -1,6 +1,5 @@ SCHED_FEAT(NEW_FAIR_SLEEPERS, 1) -SCHED_FEAT(NORMALIZED_SLEEPER, 0) -SCHED_FEAT(ADAPTIVE_GRAN, 1) +SCHED_FEAT(NORMALIZED_SLEEPER, 1) SCHED_FEAT(WAKEUP_PREEMPT, 1) SCHED_FEAT(START_DEBIT, 1) SCHED_FEAT(AFFINE_WAKEUPS, 1) diff --git a/trunk/kernel/sched_rt.c b/trunk/kernel/sched_rt.c index c79dc7844012..bac1061cea2f 100644 --- a/trunk/kernel/sched_rt.c +++ b/trunk/kernel/sched_rt.c @@ -3,40 +3,6 @@ * policies) */ -static inline struct task_struct *rt_task_of(struct sched_rt_entity *rt_se) -{ - return container_of(rt_se, struct task_struct, rt); -} - -#ifdef CONFIG_RT_GROUP_SCHED - -static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq) -{ - return rt_rq->rq; -} - -static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se) -{ - return rt_se->rt_rq; -} - -#else /* CONFIG_RT_GROUP_SCHED */ - -static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq) -{ - return container_of(rt_rq, struct rq, rt); -} - -static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se) -{ - struct task_struct *p = rt_task_of(rt_se); - struct rq *rq = task_rq(p); - - return &rq->rt; -} - -#endif /* CONFIG_RT_GROUP_SCHED */ - #ifdef CONFIG_SMP static inline int rt_overloaded(struct rq *rq) @@ -71,69 +37,25 @@ static inline void rt_clear_overload(struct rq *rq) cpumask_clear_cpu(rq->cpu, rq->rd->rto_mask); } -static void update_rt_migration(struct rt_rq *rt_rq) +static void update_rt_migration(struct rq *rq) { - if (rt_rq->rt_nr_migratory && (rt_rq->rt_nr_running > 1)) { - if (!rt_rq->overloaded) { - rt_set_overload(rq_of_rt_rq(rt_rq)); - rt_rq->overloaded = 1; + if (rq->rt.rt_nr_migratory && (rq->rt.rt_nr_running > 1)) { + if (!rq->rt.overloaded) { + rt_set_overload(rq); + rq->rt.overloaded = 1; } - } else if (rt_rq->overloaded) { - rt_clear_overload(rq_of_rt_rq(rt_rq)); - rt_rq->overloaded = 0; + } else if (rq->rt.overloaded) { + rt_clear_overload(rq); + rq->rt.overloaded = 0; } } +#endif /* CONFIG_SMP */ -static void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) -{ - if (rt_se->nr_cpus_allowed > 1) - rt_rq->rt_nr_migratory++; - - update_rt_migration(rt_rq); -} - -static void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) -{ - if (rt_se->nr_cpus_allowed > 1) - rt_rq->rt_nr_migratory--; - - update_rt_migration(rt_rq); -} - -static void enqueue_pushable_task(struct rq *rq, struct task_struct *p) -{ - plist_del(&p->pushable_tasks, &rq->rt.pushable_tasks); - plist_node_init(&p->pushable_tasks, p->prio); - plist_add(&p->pushable_tasks, &rq->rt.pushable_tasks); -} - -static void dequeue_pushable_task(struct rq *rq, struct task_struct *p) -{ - plist_del(&p->pushable_tasks, &rq->rt.pushable_tasks); -} - -#else - -static inline void enqueue_pushable_task(struct rq *rq, struct task_struct *p) -{ -} - -static inline void dequeue_pushable_task(struct rq *rq, struct task_struct *p) -{ -} - -static inline -void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) -{ -} - -static inline -void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) +static inline struct task_struct *rt_task_of(struct sched_rt_entity *rt_se) { + return container_of(rt_se, struct task_struct, rt); } -#endif /* CONFIG_SMP */ - static inline int on_rt_rq(struct sched_rt_entity *rt_se) { return !list_empty(&rt_se->run_list); @@ -157,6 +79,16 @@ static inline u64 sched_rt_period(struct rt_rq *rt_rq) #define for_each_leaf_rt_rq(rt_rq, rq) \ list_for_each_entry_rcu(rt_rq, &rq->leaf_rt_rq_list, leaf_rt_rq_list) +static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq) +{ + return rt_rq->rq; +} + +static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se) +{ + return rt_se->rt_rq; +} + #define for_each_sched_rt_entity(rt_se) \ for (; rt_se; rt_se = rt_se->parent) @@ -176,7 +108,7 @@ static void sched_rt_rq_enqueue(struct rt_rq *rt_rq) if (rt_rq->rt_nr_running) { if (rt_se && !on_rt_rq(rt_se)) enqueue_rt_entity(rt_se); - if (rt_rq->highest_prio.curr < curr->prio) + if (rt_rq->highest_prio < curr->prio) resched_task(curr); } } @@ -244,6 +176,19 @@ static inline u64 sched_rt_period(struct rt_rq *rt_rq) #define for_each_leaf_rt_rq(rt_rq, rq) \ for (rt_rq = &rq->rt; rt_rq; rt_rq = NULL) +static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq) +{ + return container_of(rt_rq, struct rq, rt); +} + +static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se) +{ + struct task_struct *p = rt_task_of(rt_se); + struct rq *rq = task_rq(p); + + return &rq->rt; +} + #define for_each_sched_rt_entity(rt_se) \ for (; rt_se; rt_se = NULL) @@ -528,7 +473,7 @@ static inline int rt_se_prio(struct sched_rt_entity *rt_se) struct rt_rq *rt_rq = group_rt_rq(rt_se); if (rt_rq) - return rt_rq->highest_prio.curr; + return rt_rq->highest_prio; #endif return rt_task_of(rt_se)->prio; @@ -602,174 +547,91 @@ static void update_curr_rt(struct rq *rq) } } -#if defined CONFIG_SMP - -static struct task_struct *pick_next_highest_task_rt(struct rq *rq, int cpu); - -static inline int next_prio(struct rq *rq) -{ - struct task_struct *next = pick_next_highest_task_rt(rq, rq->cpu); - - if (next && rt_prio(next->prio)) - return next->prio; - else - return MAX_RT_PRIO; -} - -static void -inc_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) +static inline +void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) { - struct rq *rq = rq_of_rt_rq(rt_rq); - - if (prio < prev_prio) { - - /* - * If the new task is higher in priority than anything on the - * run-queue, we know that the previous high becomes our - * next-highest. - */ - rt_rq->highest_prio.next = prev_prio; + WARN_ON(!rt_prio(rt_se_prio(rt_se))); + rt_rq->rt_nr_running++; +#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED + if (rt_se_prio(rt_se) < rt_rq->highest_prio) { +#ifdef CONFIG_SMP + struct rq *rq = rq_of_rt_rq(rt_rq); +#endif + rt_rq->highest_prio = rt_se_prio(rt_se); +#ifdef CONFIG_SMP if (rq->online) - cpupri_set(&rq->rd->cpupri, rq->cpu, prio); - - } else if (prio == rt_rq->highest_prio.curr) - /* - * If the next task is equal in priority to the highest on - * the run-queue, then we implicitly know that the next highest - * task cannot be any lower than current - */ - rt_rq->highest_prio.next = prio; - else if (prio < rt_rq->highest_prio.next) - /* - * Otherwise, we need to recompute next-highest - */ - rt_rq->highest_prio.next = next_prio(rq); -} + cpupri_set(&rq->rd->cpupri, rq->cpu, + rt_se_prio(rt_se)); +#endif + } +#endif +#ifdef CONFIG_SMP + if (rt_se->nr_cpus_allowed > 1) { + struct rq *rq = rq_of_rt_rq(rt_rq); -static void -dec_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) -{ - struct rq *rq = rq_of_rt_rq(rt_rq); + rq->rt.rt_nr_migratory++; + } - if (rt_rq->rt_nr_running && (prio <= rt_rq->highest_prio.next)) - rt_rq->highest_prio.next = next_prio(rq); + update_rt_migration(rq_of_rt_rq(rt_rq)); +#endif +#ifdef CONFIG_RT_GROUP_SCHED + if (rt_se_boosted(rt_se)) + rt_rq->rt_nr_boosted++; - if (rq->online && rt_rq->highest_prio.curr != prev_prio) - cpupri_set(&rq->rd->cpupri, rq->cpu, rt_rq->highest_prio.curr); + if (rt_rq->tg) + start_rt_bandwidth(&rt_rq->tg->rt_bandwidth); +#else + start_rt_bandwidth(&def_rt_bandwidth); +#endif } -#else /* CONFIG_SMP */ - static inline -void inc_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) {} -static inline -void dec_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) {} - -#endif /* CONFIG_SMP */ - -#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED -static void -inc_rt_prio(struct rt_rq *rt_rq, int prio) -{ - int prev_prio = rt_rq->highest_prio.curr; - - if (prio < prev_prio) - rt_rq->highest_prio.curr = prio; - - inc_rt_prio_smp(rt_rq, prio, prev_prio); -} - -static void -dec_rt_prio(struct rt_rq *rt_rq, int prio) +void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) { - int prev_prio = rt_rq->highest_prio.curr; +#ifdef CONFIG_SMP + int highest_prio = rt_rq->highest_prio; +#endif + WARN_ON(!rt_prio(rt_se_prio(rt_se))); + WARN_ON(!rt_rq->rt_nr_running); + rt_rq->rt_nr_running--; +#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED if (rt_rq->rt_nr_running) { + struct rt_prio_array *array; - WARN_ON(prio < prev_prio); - - /* - * This may have been our highest task, and therefore - * we may have some recomputation to do - */ - if (prio == prev_prio) { - struct rt_prio_array *array = &rt_rq->active; - - rt_rq->highest_prio.curr = + WARN_ON(rt_se_prio(rt_se) < rt_rq->highest_prio); + if (rt_se_prio(rt_se) == rt_rq->highest_prio) { + /* recalculate */ + array = &rt_rq->active; + rt_rq->highest_prio = sched_find_first_bit(array->bitmap); - } - + } /* otherwise leave rq->highest prio alone */ } else - rt_rq->highest_prio.curr = MAX_RT_PRIO; - - dec_rt_prio_smp(rt_rq, prio, prev_prio); -} - -#else + rt_rq->highest_prio = MAX_RT_PRIO; +#endif +#ifdef CONFIG_SMP + if (rt_se->nr_cpus_allowed > 1) { + struct rq *rq = rq_of_rt_rq(rt_rq); + rq->rt.rt_nr_migratory--; + } -static inline void inc_rt_prio(struct rt_rq *rt_rq, int prio) {} -static inline void dec_rt_prio(struct rt_rq *rt_rq, int prio) {} + if (rt_rq->highest_prio != highest_prio) { + struct rq *rq = rq_of_rt_rq(rt_rq); -#endif /* CONFIG_SMP || CONFIG_RT_GROUP_SCHED */ + if (rq->online) + cpupri_set(&rq->rd->cpupri, rq->cpu, + rt_rq->highest_prio); + } + update_rt_migration(rq_of_rt_rq(rt_rq)); +#endif /* CONFIG_SMP */ #ifdef CONFIG_RT_GROUP_SCHED - -static void -inc_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) -{ - if (rt_se_boosted(rt_se)) - rt_rq->rt_nr_boosted++; - - if (rt_rq->tg) - start_rt_bandwidth(&rt_rq->tg->rt_bandwidth); -} - -static void -dec_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) -{ if (rt_se_boosted(rt_se)) rt_rq->rt_nr_boosted--; WARN_ON(!rt_rq->rt_nr_running && rt_rq->rt_nr_boosted); -} - -#else /* CONFIG_RT_GROUP_SCHED */ - -static void -inc_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) -{ - start_rt_bandwidth(&def_rt_bandwidth); -} - -static inline -void dec_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) {} - -#endif /* CONFIG_RT_GROUP_SCHED */ - -static inline -void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) -{ - int prio = rt_se_prio(rt_se); - - WARN_ON(!rt_prio(prio)); - rt_rq->rt_nr_running++; - - inc_rt_prio(rt_rq, prio); - inc_rt_migration(rt_se, rt_rq); - inc_rt_group(rt_se, rt_rq); -} - -static inline -void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) -{ - WARN_ON(!rt_prio(rt_se_prio(rt_se))); - WARN_ON(!rt_rq->rt_nr_running); - rt_rq->rt_nr_running--; - - dec_rt_prio(rt_rq, rt_se_prio(rt_se)); - dec_rt_migration(rt_se, rt_rq); - dec_rt_group(rt_se, rt_rq); +#endif } static void __enqueue_rt_entity(struct sched_rt_entity *rt_se) @@ -856,9 +718,6 @@ static void enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup) enqueue_rt_entity(rt_se); - if (!task_current(rq, p) && p->rt.nr_cpus_allowed > 1) - enqueue_pushable_task(rq, p); - inc_cpu_load(rq, p->se.load.weight); } @@ -869,8 +728,6 @@ static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep) update_curr_rt(rq); dequeue_rt_entity(rt_se); - dequeue_pushable_task(rq, p); - dec_cpu_load(rq, p->se.load.weight); } @@ -1021,7 +878,7 @@ static struct sched_rt_entity *pick_next_rt_entity(struct rq *rq, return next; } -static struct task_struct *_pick_next_task_rt(struct rq *rq) +static struct task_struct *pick_next_task_rt(struct rq *rq) { struct sched_rt_entity *rt_se; struct task_struct *p; @@ -1043,18 +900,6 @@ static struct task_struct *_pick_next_task_rt(struct rq *rq) p = rt_task_of(rt_se); p->se.exec_start = rq->clock; - - return p; -} - -static struct task_struct *pick_next_task_rt(struct rq *rq) -{ - struct task_struct *p = _pick_next_task_rt(rq); - - /* The running task is never eligible for pushing */ - if (p) - dequeue_pushable_task(rq, p); - return p; } @@ -1062,13 +907,6 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p) { update_curr_rt(rq); p->se.exec_start = 0; - - /* - * The previous task needs to be made eligible for pushing - * if it is still active - */ - if (p->se.on_rq && p->rt.nr_cpus_allowed > 1) - enqueue_pushable_task(rq, p); } #ifdef CONFIG_SMP @@ -1234,7 +1072,7 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq) } /* If this rq is still suitable use it. */ - if (lowest_rq->rt.highest_prio.curr > task->prio) + if (lowest_rq->rt.highest_prio > task->prio) break; /* try again */ @@ -1245,31 +1083,6 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq) return lowest_rq; } -static inline int has_pushable_tasks(struct rq *rq) -{ - return !plist_head_empty(&rq->rt.pushable_tasks); -} - -static struct task_struct *pick_next_pushable_task(struct rq *rq) -{ - struct task_struct *p; - - if (!has_pushable_tasks(rq)) - return NULL; - - p = plist_first_entry(&rq->rt.pushable_tasks, - struct task_struct, pushable_tasks); - - BUG_ON(rq->cpu != task_cpu(p)); - BUG_ON(task_current(rq, p)); - BUG_ON(p->rt.nr_cpus_allowed <= 1); - - BUG_ON(!p->se.on_rq); - BUG_ON(!rt_task(p)); - - return p; -} - /* * If the current CPU has more than one RT task, see if the non * running task can migrate over to a CPU that is running a task @@ -1279,11 +1092,13 @@ static int push_rt_task(struct rq *rq) { struct task_struct *next_task; struct rq *lowest_rq; + int ret = 0; + int paranoid = RT_MAX_TRIES; if (!rq->rt.overloaded) return 0; - next_task = pick_next_pushable_task(rq); + next_task = pick_next_highest_task_rt(rq, -1); if (!next_task) return 0; @@ -1312,34 +1127,16 @@ static int push_rt_task(struct rq *rq) struct task_struct *task; /* * find lock_lowest_rq releases rq->lock - * so it is possible that next_task has migrated. - * - * We need to make sure that the task is still on the same - * run-queue and is also still the next task eligible for - * pushing. + * so it is possible that next_task has changed. + * If it has, then try again. */ - task = pick_next_pushable_task(rq); - if (task_cpu(next_task) == rq->cpu && task == next_task) { - /* - * If we get here, the task hasnt moved at all, but - * it has failed to push. We will not try again, - * since the other cpus will pull from us when they - * are ready. - */ - dequeue_pushable_task(rq, next_task); - goto out; + task = pick_next_highest_task_rt(rq, -1); + if (unlikely(task != next_task) && task && paranoid--) { + put_task_struct(next_task); + next_task = task; + goto retry; } - - if (!task) - /* No more tasks, just exit */ - goto out; - - /* - * Something has shifted, try again. - */ - put_task_struct(next_task); - next_task = task; - goto retry; + goto out; } deactivate_task(rq, next_task, 0); @@ -1350,12 +1147,23 @@ static int push_rt_task(struct rq *rq) double_unlock_balance(rq, lowest_rq); + ret = 1; out: put_task_struct(next_task); - return 1; + return ret; } +/* + * TODO: Currently we just use the second highest prio task on + * the queue, and stop when it can't migrate (or there's + * no more RT tasks). There may be a case where a lower + * priority RT task has a different affinity than the + * higher RT task. In this case the lower RT task could + * possibly be able to migrate where as the higher priority + * RT task could not. We currently ignore this issue. + * Enhancements are welcome! + */ static void push_rt_tasks(struct rq *rq) { /* push_rt_task will return true if it moved an RT */ @@ -1366,35 +1174,33 @@ static void push_rt_tasks(struct rq *rq) static int pull_rt_task(struct rq *this_rq) { int this_cpu = this_rq->cpu, ret = 0, cpu; - struct task_struct *p; + struct task_struct *p, *next; struct rq *src_rq; if (likely(!rt_overloaded(this_rq))) return 0; + next = pick_next_task_rt(this_rq); + for_each_cpu(cpu, this_rq->rd->rto_mask) { if (this_cpu == cpu) continue; src_rq = cpu_rq(cpu); - - /* - * Don't bother taking the src_rq->lock if the next highest - * task is known to be lower-priority than our current task. - * This may look racy, but if this value is about to go - * logically higher, the src_rq will push this task away. - * And if its going logically lower, we do not care - */ - if (src_rq->rt.highest_prio.next >= - this_rq->rt.highest_prio.curr) - continue; - /* * We can potentially drop this_rq's lock in * double_lock_balance, and another CPU could - * alter this_rq + * steal our next task - hence we must cause + * the caller to recalculate the next task + * in that case: */ - double_lock_balance(this_rq, src_rq); + if (double_lock_balance(this_rq, src_rq)) { + struct task_struct *old_next = next; + + next = pick_next_task_rt(this_rq); + if (next != old_next) + ret = 1; + } /* * Are there still pullable RT tasks? @@ -1408,7 +1214,7 @@ static int pull_rt_task(struct rq *this_rq) * Do we have an RT task that preempts * the to-be-scheduled task? */ - if (p && (p->prio < this_rq->rt.highest_prio.curr)) { + if (p && (!next || (p->prio < next->prio))) { WARN_ON(p == src_rq->curr); WARN_ON(!p->se.on_rq); @@ -1418,9 +1224,12 @@ static int pull_rt_task(struct rq *this_rq) * This is just that p is wakeing up and hasn't * had a chance to schedule. We only pull * p if it is lower in priority than the - * current task on the run queue + * current task on the run queue or + * this_rq next task is lower in prio than + * the current task on that rq. */ - if (p->prio < src_rq->curr->prio) + if (p->prio < src_rq->curr->prio || + (next && next->prio < src_rq->curr->prio)) goto skip; ret = 1; @@ -1433,7 +1242,13 @@ static int pull_rt_task(struct rq *this_rq) * case there's an even higher prio task * in another runqueue. (low likelyhood * but possible) + * + * Update next so that we won't pick a task + * on another cpu with a priority lower (or equal) + * than the one we just picked. */ + next = p; + } skip: double_unlock_balance(this_rq, src_rq); @@ -1445,27 +1260,24 @@ static int pull_rt_task(struct rq *this_rq) static void pre_schedule_rt(struct rq *rq, struct task_struct *prev) { /* Try to pull RT tasks here if we lower this rq's prio */ - if (unlikely(rt_task(prev)) && rq->rt.highest_prio.curr > prev->prio) + if (unlikely(rt_task(prev)) && rq->rt.highest_prio > prev->prio) pull_rt_task(rq); } -/* - * assumes rq->lock is held - */ -static int needs_post_schedule_rt(struct rq *rq) -{ - return has_pushable_tasks(rq); -} - static void post_schedule_rt(struct rq *rq) { /* - * This is only called if needs_post_schedule_rt() indicates that - * we need to push tasks away + * If we have more than one rt_task queued, then + * see if we can push the other rt_tasks off to other CPUS. + * Note we may release the rq lock, and since + * the lock was owned by prev, we need to release it + * first via finish_lock_switch and then reaquire it here. */ - spin_lock_irq(&rq->lock); - push_rt_tasks(rq); - spin_unlock_irq(&rq->lock); + if (unlikely(rq->rt.overloaded)) { + spin_lock_irq(&rq->lock); + push_rt_tasks(rq); + spin_unlock_irq(&rq->lock); + } } /* @@ -1476,8 +1288,7 @@ static void task_wake_up_rt(struct rq *rq, struct task_struct *p) { if (!task_running(rq, p) && !test_tsk_need_resched(rq->curr) && - has_pushable_tasks(rq) && - p->rt.nr_cpus_allowed > 1) + rq->rt.overloaded) push_rt_tasks(rq); } @@ -1513,24 +1324,6 @@ static void set_cpus_allowed_rt(struct task_struct *p, if (p->se.on_rq && (weight != p->rt.nr_cpus_allowed)) { struct rq *rq = task_rq(p); - if (!task_current(rq, p)) { - /* - * Make sure we dequeue this task from the pushable list - * before going further. It will either remain off of - * the list because we are no longer pushable, or it - * will be requeued. - */ - if (p->rt.nr_cpus_allowed > 1) - dequeue_pushable_task(rq, p); - - /* - * Requeue if our weight is changing and still > 1 - */ - if (weight > 1) - enqueue_pushable_task(rq, p); - - } - if ((p->rt.nr_cpus_allowed <= 1) && (weight > 1)) { rq->rt.rt_nr_migratory++; } else if ((p->rt.nr_cpus_allowed > 1) && (weight <= 1)) { @@ -1538,7 +1331,7 @@ static void set_cpus_allowed_rt(struct task_struct *p, rq->rt.rt_nr_migratory--; } - update_rt_migration(&rq->rt); + update_rt_migration(rq); } cpumask_copy(&p->cpus_allowed, new_mask); @@ -1553,7 +1346,7 @@ static void rq_online_rt(struct rq *rq) __enable_runtime(rq); - cpupri_set(&rq->rd->cpupri, rq->cpu, rq->rt.highest_prio.curr); + cpupri_set(&rq->rd->cpupri, rq->cpu, rq->rt.highest_prio); } /* Assumes rq->lock is held */ @@ -1645,7 +1438,7 @@ static void prio_changed_rt(struct rq *rq, struct task_struct *p, * can release the rq lock and p could migrate. * Only reschedule if p is still on the same runqueue. */ - if (p->prio > rq->rt.highest_prio.curr && rq->curr == p) + if (p->prio > rq->rt.highest_prio && rq->curr == p) resched_task(p); #else /* For UP simply resched on drop of prio */ @@ -1716,9 +1509,6 @@ static void set_curr_task_rt(struct rq *rq) struct task_struct *p = rq->curr; p->se.exec_start = rq->clock; - - /* The running task is never eligible for pushing */ - dequeue_pushable_task(rq, p); } static const struct sched_class rt_sched_class = { @@ -1741,7 +1531,6 @@ static const struct sched_class rt_sched_class = { .rq_online = rq_online_rt, .rq_offline = rq_offline_rt, .pre_schedule = pre_schedule_rt, - .needs_post_schedule = needs_post_schedule_rt, .post_schedule = post_schedule_rt, .task_wake_up = task_wake_up_rt, .switched_from = switched_from_rt, diff --git a/trunk/lib/Kconfig b/trunk/lib/Kconfig index fc8ea1ca59d8..03c2c24b9083 100644 --- a/trunk/lib/Kconfig +++ b/trunk/lib/Kconfig @@ -136,6 +136,12 @@ config TEXTSEARCH_BM config TEXTSEARCH_FSM tristate +# +# plist support is select#ed if needed +# +config PLIST + boolean + config HAS_IOMEM boolean depends on !NO_IOMEM diff --git a/trunk/lib/Makefile b/trunk/lib/Makefile index 902d73851044..32b0e64ded27 100644 --- a/trunk/lib/Makefile +++ b/trunk/lib/Makefile @@ -11,8 +11,7 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \ rbtree.o radix-tree.o dump_stack.o \ idr.o int_sqrt.o extable.o prio_tree.o \ sha1.o irq_regs.o reciprocal_div.o argv_split.o \ - proportions.o prio_heap.o ratelimit.o show_mem.o \ - is_single_threaded.o plist.o + proportions.o prio_heap.o ratelimit.o show_mem.o is_single_threaded.o lib-$(CONFIG_MMU) += ioremap.o lib-$(CONFIG_SMP) += cpumask.o @@ -41,6 +40,7 @@ lib-$(CONFIG_GENERIC_FIND_NEXT_BIT) += find_next_bit.o lib-$(CONFIG_GENERIC_FIND_LAST_BIT) += find_last_bit.o obj-$(CONFIG_GENERIC_HWEIGHT) += hweight.o obj-$(CONFIG_LOCK_KERNEL) += kernel_lock.o +obj-$(CONFIG_PLIST) += plist.o obj-$(CONFIG_DEBUG_PREEMPT) += smp_processor_id.o obj-$(CONFIG_DEBUG_LIST) += list_debug.o obj-$(CONFIG_DEBUG_OBJECTS) += debugobjects.o