From ce09e0683a6bc8ebce6fa88ae7cfef54d94d3657 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 28 Nov 2011 21:12:40 +0100 Subject: [PATCH] --- yaml --- r: 276371 b: refs/heads/master c: 52cef189165d74a5d6030184a8e05595194c69ca h: refs/heads/master i: 276369: dbcde413ee6d8c5ab94f384413375608f15f19f9 276367: d160019213b3b0bf3a0b85975af65df11e483c33 v: v3 --- [refs] | 2 +- trunk/arch/s390/include/asm/pgtable.h | 8 +- trunk/arch/s390/kernel/ptrace.c | 30 ++--- trunk/arch/s390/kernel/setup.c | 2 +- trunk/arch/s390/kernel/signal.c | 8 +- trunk/arch/x86/include/asm/timer.h | 23 +--- trunk/drivers/gpio/Makefile | 2 +- trunk/drivers/s390/cio/chsc.c | 7 +- trunk/drivers/s390/cio/cio.h | 5 - trunk/drivers/s390/cio/css.c | 104 ++++++++--------- trunk/drivers/s390/cio/device.c | 4 +- trunk/drivers/s390/cio/device_fsm.c | 30 ++--- trunk/drivers/s390/cio/device_ops.c | 20 +--- trunk/drivers/s390/cio/io_sch.h | 5 +- trunk/drivers/s390/crypto/ap_bus.c | 2 - trunk/include/linux/init_task.h | 4 +- trunk/kernel/sched.c | 17 --- trunk/kernel/sched_fair.c | 159 ++++++-------------------- trunk/kernel/sched_features.h | 1 - trunk/kernel/sched_rt.c | 3 - trunk/mm/slab.c | 5 +- 21 files changed, 130 insertions(+), 311 deletions(-) diff --git a/[refs] b/[refs] index 2867ac0b92f5..81c8ca026c3c 100644 --- a/[refs] +++ b/[refs] @@ -1,2 +1,2 @@ --- -refs/heads/master: 7125faceabe43067293d0c9e2ef7154ecea51721 +refs/heads/master: 52cef189165d74a5d6030184a8e05595194c69ca diff --git a/trunk/arch/s390/include/asm/pgtable.h b/trunk/arch/s390/include/asm/pgtable.h index 4f289ff0b7fe..524d23b8610c 100644 --- a/trunk/arch/s390/include/asm/pgtable.h +++ b/trunk/arch/s390/include/asm/pgtable.h @@ -599,10 +599,10 @@ static inline pgste_t pgste_update_all(pte_t *ptep, pgste_t pgste) skey = page_get_storage_key(address); bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED); /* Clear page changed & referenced bit in the storage key */ - if (bits & _PAGE_CHANGED) - page_set_storage_key(address, skey ^ bits, 1); - else if (bits) - page_reset_referenced(address); + if (bits) { + skey ^= bits; + page_set_storage_key(address, skey, 1); + } /* Transfer page changed & referenced bit to guest bits in pgste */ pgste_val(pgste) |= bits << 48; /* RCP_GR_BIT & RCP_GC_BIT */ /* Get host changed & referenced bits from pgste */ diff --git a/trunk/arch/s390/kernel/ptrace.c b/trunk/arch/s390/kernel/ptrace.c index 573bc29551ef..450931a45b68 100644 --- a/trunk/arch/s390/kernel/ptrace.c +++ b/trunk/arch/s390/kernel/ptrace.c @@ -296,6 +296,13 @@ static int __poke_user(struct task_struct *child, addr_t addr, addr_t data) ((data & PSW_MASK_EA) && !(data & PSW_MASK_BA)))) /* Invalid psw mask. */ return -EINVAL; + if (addr == (addr_t) &dummy->regs.psw.addr) + /* + * The debugger changed the instruction address, + * reset system call restart, see signal.c:do_signal + */ + task_thread_info(child)->system_call = 0; + *(addr_t *)((addr_t) &task_pt_regs(child)->psw + addr) = data; } else if (addr < (addr_t) (&dummy->regs.orig_gpr2)) { @@ -607,6 +614,11 @@ static int __poke_user_compat(struct task_struct *child, /* Transfer 31 bit amode bit to psw mask. */ regs->psw.mask = (regs->psw.mask & ~PSW_MASK_BA) | (__u64)(tmp & PSW32_ADDR_AMODE); + /* + * The debugger changed the instruction address, + * reset system call restart, see signal.c:do_signal + */ + task_thread_info(child)->system_call = 0; } else { /* gpr 0-15 */ *(__u32*)((addr_t) ®s->psw + addr*2 + 4) = tmp; @@ -893,14 +905,6 @@ static int s390_last_break_get(struct task_struct *target, return 0; } -static int s390_last_break_set(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) -{ - return 0; -} - #endif static int s390_system_call_get(struct task_struct *target, @@ -947,7 +951,6 @@ static const struct user_regset s390_regsets[] = { .size = sizeof(long), .align = sizeof(long), .get = s390_last_break_get, - .set = s390_last_break_set, }, #endif [REGSET_SYSTEM_CALL] = { @@ -1113,14 +1116,6 @@ static int s390_compat_last_break_get(struct task_struct *target, return 0; } -static int s390_compat_last_break_set(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) -{ - return 0; -} - static const struct user_regset s390_compat_regsets[] = { [REGSET_GENERAL] = { .core_note_type = NT_PRSTATUS, @@ -1144,7 +1139,6 @@ static const struct user_regset s390_compat_regsets[] = { .size = sizeof(long), .align = sizeof(long), .get = s390_compat_last_break_get, - .set = s390_compat_last_break_set, }, [REGSET_SYSTEM_CALL] = { .core_note_type = NT_S390_SYSTEM_CALL, diff --git a/trunk/arch/s390/kernel/setup.c b/trunk/arch/s390/kernel/setup.c index e54c4ff8abaa..e58a462949b1 100644 --- a/trunk/arch/s390/kernel/setup.c +++ b/trunk/arch/s390/kernel/setup.c @@ -579,7 +579,7 @@ static unsigned long __init find_crash_base(unsigned long crash_size, *msg = "first memory chunk must be at least crashkernel size"; return 0; } - if (OLDMEM_BASE && crash_size == OLDMEM_SIZE) + if (is_kdump_kernel() && (crash_size == OLDMEM_SIZE)) return OLDMEM_BASE; for (i = MEMORY_CHUNKS - 1; i >= 0; i--) { diff --git a/trunk/arch/s390/kernel/signal.c b/trunk/arch/s390/kernel/signal.c index 7f6f9f354545..05a85bc14c98 100644 --- a/trunk/arch/s390/kernel/signal.c +++ b/trunk/arch/s390/kernel/signal.c @@ -460,9 +460,9 @@ void do_signal(struct pt_regs *regs) regs->svc_code >> 16); break; } + /* No longer in a system call */ + clear_thread_flag(TIF_SYSCALL); } - /* No longer in a system call */ - clear_thread_flag(TIF_SYSCALL); if ((is_compat_task() ? handle_signal32(signr, &ka, &info, oldset, regs) : @@ -486,7 +486,6 @@ void do_signal(struct pt_regs *regs) } /* No handlers present - check for system call restart */ - clear_thread_flag(TIF_SYSCALL); if (current_thread_info()->system_call) { regs->svc_code = current_thread_info()->system_call; switch (regs->gprs[2]) { @@ -501,6 +500,9 @@ void do_signal(struct pt_regs *regs) regs->gprs[2] = regs->orig_gpr2; set_thread_flag(TIF_SYSCALL); break; + default: + clear_thread_flag(TIF_SYSCALL); + break; } } diff --git a/trunk/arch/x86/include/asm/timer.h b/trunk/arch/x86/include/asm/timer.h index 431793e5d484..fa7b9176b76c 100644 --- a/trunk/arch/x86/include/asm/timer.h +++ b/trunk/arch/x86/include/asm/timer.h @@ -32,22 +32,6 @@ extern int no_timer_check; * (mathieu.desnoyers@polymtl.ca) * * -johnstul@us.ibm.com "math is hard, lets go shopping!" - * - * In: - * - * ns = cycles * cyc2ns_scale / SC - * - * Although we may still have enough bits to store the value of ns, - * in some cases, we may not have enough bits to store cycles * cyc2ns_scale, - * leading to an incorrect result. - * - * To avoid this, we can decompose 'cycles' into quotient and remainder - * of division by SC. Then, - * - * ns = (quot * SC + rem) * cyc2ns_scale / SC - * = quot * cyc2ns_scale + (rem * cyc2ns_scale) / SC - * - * - sqazi@google.com */ DECLARE_PER_CPU(unsigned long, cyc2ns); @@ -57,14 +41,9 @@ DECLARE_PER_CPU(unsigned long long, cyc2ns_offset); static inline unsigned long long __cycles_2_ns(unsigned long long cyc) { - unsigned long long quot; - unsigned long long rem; int cpu = smp_processor_id(); unsigned long long ns = per_cpu(cyc2ns_offset, cpu); - quot = (cyc >> CYC2NS_SCALE_FACTOR); - rem = cyc & ((1ULL << CYC2NS_SCALE_FACTOR) - 1); - ns += quot * per_cpu(cyc2ns, cpu) + - ((rem * per_cpu(cyc2ns, cpu)) >> CYC2NS_SCALE_FACTOR); + ns += cyc * per_cpu(cyc2ns, cpu) >> CYC2NS_SCALE_FACTOR; return ns; } diff --git a/trunk/drivers/gpio/Makefile b/trunk/drivers/gpio/Makefile index 4e018d6a7639..dbcb0bcfd8da 100644 --- a/trunk/drivers/gpio/Makefile +++ b/trunk/drivers/gpio/Makefile @@ -18,7 +18,7 @@ obj-$(CONFIG_ARCH_DAVINCI) += gpio-davinci.o obj-$(CONFIG_GPIO_EP93XX) += gpio-ep93xx.o obj-$(CONFIG_GPIO_IT8761E) += gpio-it8761e.o obj-$(CONFIG_GPIO_JANZ_TTL) += gpio-janz-ttl.o -obj-$(CONFIG_ARCH_KS8695) += gpio-ks8695.o +obj-$(CONFIG_MACH_KS8695) += gpio-ks8695.o obj-$(CONFIG_GPIO_LANGWELL) += gpio-langwell.o obj-$(CONFIG_ARCH_LPC32XX) += gpio-lpc32xx.o obj-$(CONFIG_GPIO_MAX730X) += gpio-max730x.o diff --git a/trunk/drivers/s390/cio/chsc.c b/trunk/drivers/s390/cio/chsc.c index a84631a7391d..75c3f1f8fd43 100644 --- a/trunk/drivers/s390/cio/chsc.c +++ b/trunk/drivers/s390/cio/chsc.c @@ -529,7 +529,10 @@ __s390_vary_chpid_on(struct subchannel_id schid, void *data) int chsc_chp_vary(struct chp_id chpid, int on) { struct channel_path *chp = chpid_to_chp(chpid); + struct chp_link link; + memset(&link, 0, sizeof(struct chp_link)); + link.chpid = chpid; /* Wait until previous actions have settled. */ css_wait_for_slow_path(); /* @@ -539,10 +542,10 @@ int chsc_chp_vary(struct chp_id chpid, int on) /* Try to update the channel path descritor. */ chsc_determine_base_channel_path_desc(chpid, &chp->desc); for_each_subchannel_staged(s390_subchannel_vary_chpid_on, - __s390_vary_chpid_on, &chpid); + __s390_vary_chpid_on, &link); } else for_each_subchannel_staged(s390_subchannel_vary_chpid_off, - NULL, &chpid); + NULL, &link); return 0; } diff --git a/trunk/drivers/s390/cio/cio.h b/trunk/drivers/s390/cio/cio.h index 4a1ff5c2eb88..155a82bcb9e5 100644 --- a/trunk/drivers/s390/cio/cio.h +++ b/trunk/drivers/s390/cio/cio.h @@ -68,13 +68,8 @@ struct schib { __u8 mda[4]; /* model dependent area */ } __attribute__ ((packed,aligned(4))); -/* - * When rescheduled, todo's with higher values will overwrite those - * with lower values. - */ enum sch_todo { SCH_TODO_NOTHING, - SCH_TODO_EVAL, SCH_TODO_UNREG, }; diff --git a/trunk/drivers/s390/cio/css.c b/trunk/drivers/s390/cio/css.c index 21908e67bf67..92d7324acb1c 100644 --- a/trunk/drivers/s390/cio/css.c +++ b/trunk/drivers/s390/cio/css.c @@ -195,6 +195,51 @@ void css_sch_device_unregister(struct subchannel *sch) } EXPORT_SYMBOL_GPL(css_sch_device_unregister); +static void css_sch_todo(struct work_struct *work) +{ + struct subchannel *sch; + enum sch_todo todo; + + sch = container_of(work, struct subchannel, todo_work); + /* Find out todo. */ + spin_lock_irq(sch->lock); + todo = sch->todo; + CIO_MSG_EVENT(4, "sch_todo: sch=0.%x.%04x, todo=%d\n", sch->schid.ssid, + sch->schid.sch_no, todo); + sch->todo = SCH_TODO_NOTHING; + spin_unlock_irq(sch->lock); + /* Perform todo. */ + if (todo == SCH_TODO_UNREG) + css_sch_device_unregister(sch); + /* Release workqueue ref. */ + put_device(&sch->dev); +} + +/** + * css_sched_sch_todo - schedule a subchannel operation + * @sch: subchannel + * @todo: todo + * + * Schedule the operation identified by @todo to be performed on the slow path + * workqueue. Do nothing if another operation with higher priority is already + * scheduled. Needs to be called with subchannel lock held. + */ +void css_sched_sch_todo(struct subchannel *sch, enum sch_todo todo) +{ + CIO_MSG_EVENT(4, "sch_todo: sched sch=0.%x.%04x todo=%d\n", + sch->schid.ssid, sch->schid.sch_no, todo); + if (sch->todo >= todo) + return; + /* Get workqueue ref. */ + if (!get_device(&sch->dev)) + return; + sch->todo = todo; + if (!queue_work(cio_work_q, &sch->todo_work)) { + /* Already queued, release workqueue ref. */ + put_device(&sch->dev); + } +} + static void ssd_from_pmcw(struct chsc_ssd_info *ssd, struct pmcw *pmcw) { int i; @@ -421,65 +466,6 @@ static void css_evaluate_subchannel(struct subchannel_id schid, int slow) css_schedule_eval(schid); } -/** - * css_sched_sch_todo - schedule a subchannel operation - * @sch: subchannel - * @todo: todo - * - * Schedule the operation identified by @todo to be performed on the slow path - * workqueue. Do nothing if another operation with higher priority is already - * scheduled. Needs to be called with subchannel lock held. - */ -void css_sched_sch_todo(struct subchannel *sch, enum sch_todo todo) -{ - CIO_MSG_EVENT(4, "sch_todo: sched sch=0.%x.%04x todo=%d\n", - sch->schid.ssid, sch->schid.sch_no, todo); - if (sch->todo >= todo) - return; - /* Get workqueue ref. */ - if (!get_device(&sch->dev)) - return; - sch->todo = todo; - if (!queue_work(cio_work_q, &sch->todo_work)) { - /* Already queued, release workqueue ref. */ - put_device(&sch->dev); - } -} - -static void css_sch_todo(struct work_struct *work) -{ - struct subchannel *sch; - enum sch_todo todo; - int ret; - - sch = container_of(work, struct subchannel, todo_work); - /* Find out todo. */ - spin_lock_irq(sch->lock); - todo = sch->todo; - CIO_MSG_EVENT(4, "sch_todo: sch=0.%x.%04x, todo=%d\n", sch->schid.ssid, - sch->schid.sch_no, todo); - sch->todo = SCH_TODO_NOTHING; - spin_unlock_irq(sch->lock); - /* Perform todo. */ - switch (todo) { - case SCH_TODO_NOTHING: - break; - case SCH_TODO_EVAL: - ret = css_evaluate_known_subchannel(sch, 1); - if (ret == -EAGAIN) { - spin_lock_irq(sch->lock); - css_sched_sch_todo(sch, todo); - spin_unlock_irq(sch->lock); - } - break; - case SCH_TODO_UNREG: - css_sch_device_unregister(sch); - break; - } - /* Release workqueue ref. */ - put_device(&sch->dev); -} - static struct idset *slow_subchannel_set; static spinlock_t slow_subchannel_lock; static wait_queue_head_t css_eval_wq; diff --git a/trunk/drivers/s390/cio/device.c b/trunk/drivers/s390/cio/device.c index 47269858ecb6..d734f4a0ecac 100644 --- a/trunk/drivers/s390/cio/device.c +++ b/trunk/drivers/s390/cio/device.c @@ -1868,9 +1868,9 @@ static void __ccw_device_pm_restore(struct ccw_device *cdev) */ cdev->private->flags.resuming = 1; cdev->private->path_new_mask = LPM_ANYPATH; - css_sched_sch_todo(sch, SCH_TODO_EVAL); + css_schedule_eval(sch->schid); spin_unlock_irq(sch->lock); - css_wait_for_slow_path(); + css_complete_work(); /* cdev may have been moved to a different subchannel. */ sch = to_subchannel(cdev->dev.parent); diff --git a/trunk/drivers/s390/cio/device_fsm.c b/trunk/drivers/s390/cio/device_fsm.c index 1b853513c891..52c233fa2b12 100644 --- a/trunk/drivers/s390/cio/device_fsm.c +++ b/trunk/drivers/s390/cio/device_fsm.c @@ -496,26 +496,8 @@ static void ccw_device_reset_path_events(struct ccw_device *cdev) cdev->private->pgid_reset_mask = 0; } -static void create_fake_irb(struct irb *irb, int type) -{ - memset(irb, 0, sizeof(*irb)); - if (type == FAKE_CMD_IRB) { - struct cmd_scsw *scsw = &irb->scsw.cmd; - scsw->cc = 1; - scsw->fctl = SCSW_FCTL_START_FUNC; - scsw->actl = SCSW_ACTL_START_PEND; - scsw->stctl = SCSW_STCTL_STATUS_PEND; - } else if (type == FAKE_TM_IRB) { - struct tm_scsw *scsw = &irb->scsw.tm; - scsw->x = 1; - scsw->cc = 1; - scsw->fctl = SCSW_FCTL_START_FUNC; - scsw->actl = SCSW_ACTL_START_PEND; - scsw->stctl = SCSW_STCTL_STATUS_PEND; - } -} - -void ccw_device_verify_done(struct ccw_device *cdev, int err) +void +ccw_device_verify_done(struct ccw_device *cdev, int err) { struct subchannel *sch; @@ -538,8 +520,12 @@ void ccw_device_verify_done(struct ccw_device *cdev, int err) ccw_device_done(cdev, DEV_STATE_ONLINE); /* Deliver fake irb to device driver, if needed. */ if (cdev->private->flags.fake_irb) { - create_fake_irb(&cdev->private->irb, - cdev->private->flags.fake_irb); + memset(&cdev->private->irb, 0, sizeof(struct irb)); + cdev->private->irb.scsw.cmd.cc = 1; + cdev->private->irb.scsw.cmd.fctl = SCSW_FCTL_START_FUNC; + cdev->private->irb.scsw.cmd.actl = SCSW_ACTL_START_PEND; + cdev->private->irb.scsw.cmd.stctl = + SCSW_STCTL_STATUS_PEND; cdev->private->flags.fake_irb = 0; if (cdev->handler) cdev->handler(cdev, cdev->private->intparm, diff --git a/trunk/drivers/s390/cio/device_ops.c b/trunk/drivers/s390/cio/device_ops.c index ec7fb6d3b479..f98698d5735e 100644 --- a/trunk/drivers/s390/cio/device_ops.c +++ b/trunk/drivers/s390/cio/device_ops.c @@ -198,7 +198,7 @@ int ccw_device_start_key(struct ccw_device *cdev, struct ccw1 *cpa, if (cdev->private->state == DEV_STATE_VERIFY) { /* Remember to fake irb when finished. */ if (!cdev->private->flags.fake_irb) { - cdev->private->flags.fake_irb = FAKE_CMD_IRB; + cdev->private->flags.fake_irb = 1; cdev->private->intparm = intparm; return 0; } else @@ -213,9 +213,9 @@ int ccw_device_start_key(struct ccw_device *cdev, struct ccw1 *cpa, ret = cio_set_options (sch, flags); if (ret) return ret; - /* Adjust requested path mask to exclude unusable paths. */ + /* Adjust requested path mask to excluded varied off paths. */ if (lpm) { - lpm &= sch->lpm; + lpm &= sch->opm; if (lpm == 0) return -EACCES; } @@ -605,21 +605,11 @@ int ccw_device_tm_start_key(struct ccw_device *cdev, struct tcw *tcw, sch = to_subchannel(cdev->dev.parent); if (!sch->schib.pmcw.ena) return -EINVAL; - if (cdev->private->state == DEV_STATE_VERIFY) { - /* Remember to fake irb when finished. */ - if (!cdev->private->flags.fake_irb) { - cdev->private->flags.fake_irb = FAKE_TM_IRB; - cdev->private->intparm = intparm; - return 0; - } else - /* There's already a fake I/O around. */ - return -EBUSY; - } if (cdev->private->state != DEV_STATE_ONLINE) return -EIO; - /* Adjust requested path mask to exclude unusable paths. */ + /* Adjust requested path mask to excluded varied off paths. */ if (lpm) { - lpm &= sch->lpm; + lpm &= sch->opm; if (lpm == 0) return -EACCES; } diff --git a/trunk/drivers/s390/cio/io_sch.h b/trunk/drivers/s390/cio/io_sch.h index 76253dfcc1be..2ebb492a5c17 100644 --- a/trunk/drivers/s390/cio/io_sch.h +++ b/trunk/drivers/s390/cio/io_sch.h @@ -111,9 +111,6 @@ enum cdev_todo { CDEV_TODO_UNREG_EVAL, }; -#define FAKE_CMD_IRB 1 -#define FAKE_TM_IRB 2 - struct ccw_device_private { struct ccw_device *cdev; struct subchannel *sch; @@ -141,7 +138,7 @@ struct ccw_device_private { unsigned int doverify:1; /* delayed path verification */ unsigned int donotify:1; /* call notify function */ unsigned int recog_done:1; /* dev. recog. complete */ - unsigned int fake_irb:2; /* deliver faked irb */ + unsigned int fake_irb:1; /* deliver faked irb */ unsigned int resuming:1; /* recognition while resume */ unsigned int pgroup:1; /* pathgroup is set up */ unsigned int mpath:1; /* multipathing is set up */ diff --git a/trunk/drivers/s390/crypto/ap_bus.c b/trunk/drivers/s390/crypto/ap_bus.c index 96bbe9d12a79..ec94f049e995 100644 --- a/trunk/drivers/s390/crypto/ap_bus.c +++ b/trunk/drivers/s390/crypto/ap_bus.c @@ -1552,8 +1552,6 @@ static void ap_reset(struct ap_device *ap_dev) rc = ap_init_queue(ap_dev->qid); if (rc == -ENODEV) ap_dev->unregistered = 1; - else - __ap_schedule_poll_timer(); } static int __ap_poll_device(struct ap_device *ap_dev, unsigned long *flags) diff --git a/trunk/include/linux/init_task.h b/trunk/include/linux/init_task.h index 32574eef9394..94b1e356c02a 100644 --- a/trunk/include/linux/init_task.h +++ b/trunk/include/linux/init_task.h @@ -126,8 +126,6 @@ extern struct cred init_cred; # define INIT_PERF_EVENTS(tsk) #endif -#define INIT_TASK_COMM "swapper" - /* * INIT_TASK is used to set up the first task table, touch at * your own risk!. Base=0, limit=0x1fffff (=2MB) @@ -164,7 +162,7 @@ extern struct cred init_cred; .group_leader = &tsk, \ RCU_INIT_POINTER(.real_cred, &init_cred), \ RCU_INIT_POINTER(.cred, &init_cred), \ - .comm = INIT_TASK_COMM, \ + .comm = "swapper", \ .thread = INIT_THREAD, \ .fs = &init_fs, \ .files = &init_files, \ diff --git a/trunk/kernel/sched.c b/trunk/kernel/sched.c index d6b149ccf925..0e9344a71be3 100644 --- a/trunk/kernel/sched.c +++ b/trunk/kernel/sched.c @@ -71,7 +71,6 @@ #include #include #include -#include #include #include @@ -4811,9 +4810,6 @@ EXPORT_SYMBOL(wait_for_completion); * This waits for either a completion of a specific task to be signaled or for a * specified timeout to expire. The timeout is in jiffies. It is not * interruptible. - * - * The return value is 0 if timed out, and positive (at least 1, or number of - * jiffies left till timeout) if completed. */ unsigned long __sched wait_for_completion_timeout(struct completion *x, unsigned long timeout) @@ -4828,8 +4824,6 @@ EXPORT_SYMBOL(wait_for_completion_timeout); * * This waits for completion of a specific task to be signaled. It is * interruptible. - * - * The return value is -ERESTARTSYS if interrupted, 0 if completed. */ int __sched wait_for_completion_interruptible(struct completion *x) { @@ -4847,9 +4841,6 @@ EXPORT_SYMBOL(wait_for_completion_interruptible); * * This waits for either a completion of a specific task to be signaled or for a * specified timeout to expire. It is interruptible. The timeout is in jiffies. - * - * The return value is -ERESTARTSYS if interrupted, 0 if timed out, - * positive (at least 1, or number of jiffies left till timeout) if completed. */ long __sched wait_for_completion_interruptible_timeout(struct completion *x, @@ -4865,8 +4856,6 @@ EXPORT_SYMBOL(wait_for_completion_interruptible_timeout); * * This waits to be signaled for completion of a specific task. It can be * interrupted by a kill signal. - * - * The return value is -ERESTARTSYS if interrupted, 0 if completed. */ int __sched wait_for_completion_killable(struct completion *x) { @@ -4885,9 +4874,6 @@ EXPORT_SYMBOL(wait_for_completion_killable); * This waits for either a completion of a specific task to be * signaled or for a specified timeout to expire. It can be * interrupted by a kill signal. The timeout is in jiffies. - * - * The return value is -ERESTARTSYS if interrupted, 0 if timed out, - * positive (at least 1, or number of jiffies left till timeout) if completed. */ long __sched wait_for_completion_killable_timeout(struct completion *x, @@ -6113,9 +6099,6 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu) */ idle->sched_class = &idle_sched_class; ftrace_graph_init_idle_task(idle, cpu); -#if defined(CONFIG_SMP) - sprintf(idle->comm, "%s/%d", INIT_TASK_COMM, cpu); -#endif } /* diff --git a/trunk/kernel/sched_fair.c b/trunk/kernel/sched_fair.c index a78ed2736ba7..5c9e67923b7c 100644 --- a/trunk/kernel/sched_fair.c +++ b/trunk/kernel/sched_fair.c @@ -772,32 +772,19 @@ static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update) list_del_leaf_cfs_rq(cfs_rq); } -static inline long calc_tg_weight(struct task_group *tg, struct cfs_rq *cfs_rq) -{ - long tg_weight; - - /* - * Use this CPU's actual weight instead of the last load_contribution - * to gain a more accurate current total weight. See - * update_cfs_rq_load_contribution(). - */ - tg_weight = atomic_read(&tg->load_weight); - tg_weight -= cfs_rq->load_contribution; - tg_weight += cfs_rq->load.weight; - - return tg_weight; -} - static long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg) { - long tg_weight, load, shares; + long load_weight, load, shares; - tg_weight = calc_tg_weight(tg, cfs_rq); load = cfs_rq->load.weight; + load_weight = atomic_read(&tg->load_weight); + load_weight += load; + load_weight -= cfs_rq->load_contribution; + shares = (tg->shares * load); - if (tg_weight) - shares /= tg_weight; + if (load_weight) + shares /= load_weight; if (shares < MIN_SHARES) shares = MIN_SHARES; @@ -1756,7 +1743,7 @@ static void __return_cfs_rq_runtime(struct cfs_rq *cfs_rq) static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq) { - if (!cfs_rq->runtime_enabled || cfs_rq->nr_running) + if (!cfs_rq->runtime_enabled || !cfs_rq->nr_running) return; __return_cfs_rq_runtime(cfs_rq); @@ -2049,100 +2036,36 @@ static void task_waking_fair(struct task_struct *p) * Adding load to a group doesn't make a group heavier, but can cause movement * of group shares between cpus. Assuming the shares were perfectly aligned one * can calculate the shift in shares. - * - * Calculate the effective load difference if @wl is added (subtracted) to @tg - * on this @cpu and results in a total addition (subtraction) of @wg to the - * total group weight. - * - * Given a runqueue weight distribution (rw_i) we can compute a shares - * distribution (s_i) using: - * - * s_i = rw_i / \Sum rw_j (1) - * - * Suppose we have 4 CPUs and our @tg is a direct child of the root group and - * has 7 equal weight tasks, distributed as below (rw_i), with the resulting - * shares distribution (s_i): - * - * rw_i = { 2, 4, 1, 0 } - * s_i = { 2/7, 4/7, 1/7, 0 } - * - * As per wake_affine() we're interested in the load of two CPUs (the CPU the - * task used to run on and the CPU the waker is running on), we need to - * compute the effect of waking a task on either CPU and, in case of a sync - * wakeup, compute the effect of the current task going to sleep. - * - * So for a change of @wl to the local @cpu with an overall group weight change - * of @wl we can compute the new shares distribution (s'_i) using: - * - * s'_i = (rw_i + @wl) / (@wg + \Sum rw_j) (2) - * - * Suppose we're interested in CPUs 0 and 1, and want to compute the load - * differences in waking a task to CPU 0. The additional task changes the - * weight and shares distributions like: - * - * rw'_i = { 3, 4, 1, 0 } - * s'_i = { 3/8, 4/8, 1/8, 0 } - * - * We can then compute the difference in effective weight by using: - * - * dw_i = S * (s'_i - s_i) (3) - * - * Where 'S' is the group weight as seen by its parent. - * - * Therefore the effective change in loads on CPU 0 would be 5/56 (3/8 - 2/7) - * times the weight of the group. The effect on CPU 1 would be -4/56 (4/8 - - * 4/7) times the weight of the group. */ static long effective_load(struct task_group *tg, int cpu, long wl, long wg) { struct sched_entity *se = tg->se[cpu]; - if (!tg->parent) /* the trivial, non-cgroup case */ + if (!tg->parent) return wl; for_each_sched_entity(se) { - long w, W; + long lw, w; tg = se->my_q->tg; + w = se->my_q->load.weight; - /* - * W = @wg + \Sum rw_j - */ - W = wg + calc_tg_weight(tg, se->my_q); + /* use this cpu's instantaneous contribution */ + lw = atomic_read(&tg->load_weight); + lw -= se->my_q->load_contribution; + lw += w + wg; - /* - * w = rw_i + @wl - */ - w = se->my_q->load.weight + wl; + wl += w; - /* - * wl = S * s'_i; see (2) - */ - if (W > 0 && w < W) - wl = (w * tg->shares) / W; + if (lw > 0 && wl < lw) + wl = (wl * tg->shares) / lw; else wl = tg->shares; - /* - * Per the above, wl is the new se->load.weight value; since - * those are clipped to [MIN_SHARES, ...) do so now. See - * calc_cfs_shares(). - */ + /* zero point is MIN_SHARES */ if (wl < MIN_SHARES) wl = MIN_SHARES; - - /* - * wl = dw_i = S * (s'_i - s_i); see (3) - */ wl -= se->load.weight; - - /* - * Recursively apply this logic to all parent groups to compute - * the final effective load change on the root group. Since - * only the @tg group gets extra weight, all parent groups can - * only redistribute existing shares. @wl is the shift in shares - * resulting from this level per the above. - */ wg = 0; } @@ -2326,8 +2249,7 @@ static int select_idle_sibling(struct task_struct *p, int target) int cpu = smp_processor_id(); int prev_cpu = task_cpu(p); struct sched_domain *sd; - struct sched_group *sg; - int i, smt = 0; + int i; /* * If the task is going to be woken-up on this cpu and if it is @@ -2347,38 +2269,25 @@ static int select_idle_sibling(struct task_struct *p, int target) * Otherwise, iterate the domains and find an elegible idle cpu. */ rcu_read_lock(); -again: for_each_domain(target, sd) { - if (!smt && (sd->flags & SD_SHARE_CPUPOWER)) - continue; - - if (!(sd->flags & SD_SHARE_PKG_RESOURCES)) { - if (!smt) { - smt = 1; - goto again; - } + if (!(sd->flags & SD_SHARE_PKG_RESOURCES)) break; - } - - sg = sd->groups; - do { - if (!cpumask_intersects(sched_group_cpus(sg), - tsk_cpus_allowed(p))) - goto next; - for_each_cpu(i, sched_group_cpus(sg)) { - if (!idle_cpu(i)) - goto next; + for_each_cpu_and(i, sched_domain_span(sd), tsk_cpus_allowed(p)) { + if (idle_cpu(i)) { + target = i; + break; } + } - target = cpumask_first_and(sched_group_cpus(sg), - tsk_cpus_allowed(p)); - goto done; -next: - sg = sg->next; - } while (sg != sd->groups); + /* + * Lets stop looking for an idle sibling when we reached + * the domain that spans the current cpu and prev_cpu. + */ + if (cpumask_test_cpu(cpu, sched_domain_span(sd)) && + cpumask_test_cpu(prev_cpu, sched_domain_span(sd))) + break; } -done: rcu_read_unlock(); return target; @@ -3602,7 +3511,7 @@ static bool update_sd_pick_busiest(struct sched_domain *sd, } /** - * update_sd_lb_stats - Update sched_domain's statistics for load balancing. + * update_sd_lb_stats - Update sched_group's statistics for load balancing. * @sd: sched_domain whose statistics are to be updated. * @this_cpu: Cpu for which load balance is currently performed. * @idle: Idle status of this_cpu diff --git a/trunk/kernel/sched_features.h b/trunk/kernel/sched_features.h index 84802245abd2..efa0a7b75dde 100644 --- a/trunk/kernel/sched_features.h +++ b/trunk/kernel/sched_features.h @@ -67,4 +67,3 @@ SCHED_FEAT(NONTASK_POWER, 1) SCHED_FEAT(TTWU_QUEUE, 1) SCHED_FEAT(FORCE_SD_OVERLAP, 0) -SCHED_FEAT(RT_RUNTIME_SHARE, 1) diff --git a/trunk/kernel/sched_rt.c b/trunk/kernel/sched_rt.c index 583a1368afe6..056cbd2e2a27 100644 --- a/trunk/kernel/sched_rt.c +++ b/trunk/kernel/sched_rt.c @@ -560,9 +560,6 @@ static int balance_runtime(struct rt_rq *rt_rq) { int more = 0; - if (!sched_feat(RT_RUNTIME_SHARE)) - return more; - if (rt_rq->rt_time > rt_rq->rt_runtime) { raw_spin_unlock(&rt_rq->rt_runtime_lock); more = do_balance_runtime(rt_rq); diff --git a/trunk/mm/slab.c b/trunk/mm/slab.c index 708efe886154..83311c9aaf9d 100644 --- a/trunk/mm/slab.c +++ b/trunk/mm/slab.c @@ -595,6 +595,7 @@ static enum { PARTIAL_AC, PARTIAL_L3, EARLY, + LATE, FULL } g_cpucache_up; @@ -671,7 +672,7 @@ static void init_node_lock_keys(int q) { struct cache_sizes *s = malloc_sizes; - if (g_cpucache_up != FULL) + if (g_cpucache_up < LATE) return; for (s = malloc_sizes; s->cs_size != ULONG_MAX; s++) { @@ -1666,6 +1667,8 @@ void __init kmem_cache_init_late(void) { struct kmem_cache *cachep; + g_cpucache_up = LATE; + /* Annotate slab for lockdep -- annotate the malloc caches */ init_lock_keys();