diff --git a/[refs] b/[refs] index cb171ed747ed..e042bed48697 100644 --- a/[refs] +++ b/[refs] @@ -1,2 +1,2 @@ --- -refs/heads/master: db670dac49b5423b39b5e523d28fe32045d71b10 +refs/heads/master: ec514c487c3d4b652943da7b0afbc094eee08cfa diff --git a/trunk/Documentation/trace/kprobetrace.txt b/trunk/Documentation/trace/kprobetrace.txt index c83bd6b4e6e8..6d27ab8d6e9f 100644 --- a/trunk/Documentation/trace/kprobetrace.txt +++ b/trunk/Documentation/trace/kprobetrace.txt @@ -120,6 +120,7 @@ format: field:unsigned char common_flags; offset:2; size:1; signed:0; field:unsigned char common_preempt_count; offset:3; size:1;signed:0; field:int common_pid; offset:4; size:4; signed:1; + field:int common_lock_depth; offset:8; size:4; signed:1; field:unsigned long __probe_ip; offset:12; size:4; signed:0; field:int __probe_nargs; offset:16; size:4; signed:1; diff --git a/trunk/MAINTAINERS b/trunk/MAINTAINERS index 16a5c5f2c6a6..69f19f10314a 100644 --- a/trunk/MAINTAINERS +++ b/trunk/MAINTAINERS @@ -2813,38 +2813,19 @@ F: Documentation/gpio.txt F: drivers/gpio/ F: include/linux/gpio* +GRE DEMULTIPLEXER DRIVER +M: Dmitry Kozlov +L: netdev@vger.kernel.org +S: Maintained +F: net/ipv4/gre.c +F: include/net/gre.h + GRETH 10/100/1G Ethernet MAC device driver M: Kristoffer Glembo L: netdev@vger.kernel.org S: Maintained F: drivers/net/greth* -HARD DRIVE ACTIVE PROTECTION SYSTEM (HDAPS) DRIVER -M: Frank Seidel -L: platform-driver-x86@vger.kernel.org -W: http://www.kernel.org/pub/linux/kernel/people/fseidel/hdaps/ -S: Maintained -F: drivers/platform/x86/hdaps.c - -HWPOISON MEMORY FAILURE HANDLING -M: Andi Kleen -L: linux-mm@kvack.org -T: git git://git.kernel.org/pub/scm/linux/kernel/git/ak/linux-mce-2.6.git hwpoison -S: Maintained -F: mm/memory-failure.c -F: mm/hwpoison-inject.c - -HYPERVISOR VIRTUAL CONSOLE DRIVER -L: linuxppc-dev@lists.ozlabs.org -S: Odd Fixes -F: drivers/tty/hvc/ - -iSCSI BOOT FIRMWARE TABLE (iBFT) DRIVER -M: Peter Jones -M: Konrad Rzeszutek Wilk -S: Maintained -F: drivers/firmware/iscsi_ibft* - GSPCA FINEPIX SUBDRIVER M: Frank Zago L: linux-media@vger.kernel.org @@ -2895,6 +2876,26 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/mchehab/linux-2.6.git S: Maintained F: drivers/media/video/gspca/ +HARD DRIVE ACTIVE PROTECTION SYSTEM (HDAPS) DRIVER +M: Frank Seidel +L: platform-driver-x86@vger.kernel.org +W: http://www.kernel.org/pub/linux/kernel/people/fseidel/hdaps/ +S: Maintained +F: drivers/platform/x86/hdaps.c + +HWPOISON MEMORY FAILURE HANDLING +M: Andi Kleen +L: linux-mm@kvack.org +T: git git://git.kernel.org/pub/scm/linux/kernel/git/ak/linux-mce-2.6.git hwpoison +S: Maintained +F: mm/memory-failure.c +F: mm/hwpoison-inject.c + +HYPERVISOR VIRTUAL CONSOLE DRIVER +L: linuxppc-dev@lists.ozlabs.org +S: Odd Fixes +F: drivers/tty/hvc/ + HARDWARE MONITORING M: Jean Delvare M: Guenter Roeck @@ -3478,6 +3479,12 @@ F: Documentation/isapnp.txt F: drivers/pnp/isapnp/ F: include/linux/isapnp.h +iSCSI BOOT FIRMWARE TABLE (iBFT) DRIVER +M: Peter Jones +M: Konrad Rzeszutek Wilk +S: Maintained +F: drivers/firmware/iscsi_ibft* + ISCSI M: Mike Christie L: open-iscsi@googlegroups.com @@ -4989,6 +4996,13 @@ F: Documentation/pps/ F: drivers/pps/ F: include/linux/pps*.h +PPTP DRIVER +M: Dmitry Kozlov +L: netdev@vger.kernel.org +S: Maintained +F: drivers/net/pptp.c +W: http://sourceforge.net/projects/accel-pptp + PREEMPTIBLE KERNEL M: Robert Love L: kpreempt-tech@lists.sourceforge.net @@ -7024,20 +7038,6 @@ M: "Maciej W. Rozycki" S: Maintained F: drivers/tty/serial/zs.* -GRE DEMULTIPLEXER DRIVER -M: Dmitry Kozlov -L: netdev@vger.kernel.org -S: Maintained -F: net/ipv4/gre.c -F: include/net/gre.h - -PPTP DRIVER -M: Dmitry Kozlov -L: netdev@vger.kernel.org -S: Maintained -F: drivers/net/pptp.c -W: http://sourceforge.net/projects/accel-pptp - THE REST M: Linus Torvalds L: linux-kernel@vger.kernel.org diff --git a/trunk/arch/alpha/include/asm/unistd.h b/trunk/arch/alpha/include/asm/unistd.h index 058937bf5a77..b1834166922d 100644 --- a/trunk/arch/alpha/include/asm/unistd.h +++ b/trunk/arch/alpha/include/asm/unistd.h @@ -452,10 +452,14 @@ #define __NR_fanotify_init 494 #define __NR_fanotify_mark 495 #define __NR_prlimit64 496 +#define __NR_name_to_handle_at 497 +#define __NR_open_by_handle_at 498 +#define __NR_clock_adjtime 499 +#define __NR_syncfs 500 #ifdef __KERNEL__ -#define NR_SYSCALLS 497 +#define NR_SYSCALLS 501 #define __ARCH_WANT_IPC_PARSE_VERSION #define __ARCH_WANT_OLD_READDIR diff --git a/trunk/arch/alpha/kernel/smp.c b/trunk/arch/alpha/kernel/smp.c index 5a621c6d22ab..42aa078a5e4d 100644 --- a/trunk/arch/alpha/kernel/smp.c +++ b/trunk/arch/alpha/kernel/smp.c @@ -585,7 +585,8 @@ handle_ipi(struct pt_regs *regs) switch (which) { case IPI_RESCHEDULE: - scheduler_ipi(); + /* Reschedule callback. Everything to be done + is done by the interrupt return path. */ break; case IPI_CALL_FUNC: diff --git a/trunk/arch/alpha/kernel/systbls.S b/trunk/arch/alpha/kernel/systbls.S index a6a1de9db16f..15f999d41c75 100644 --- a/trunk/arch/alpha/kernel/systbls.S +++ b/trunk/arch/alpha/kernel/systbls.S @@ -498,23 +498,27 @@ sys_call_table: .quad sys_ni_syscall /* sys_timerfd */ .quad sys_eventfd .quad sys_recvmmsg - .quad sys_fallocate /* 480 */ + .quad sys_fallocate /* 480 */ .quad sys_timerfd_create .quad sys_timerfd_settime .quad sys_timerfd_gettime .quad sys_signalfd4 - .quad sys_eventfd2 /* 485 */ + .quad sys_eventfd2 /* 485 */ .quad sys_epoll_create1 .quad sys_dup3 .quad sys_pipe2 .quad sys_inotify_init1 - .quad sys_preadv /* 490 */ + .quad sys_preadv /* 490 */ .quad sys_pwritev .quad sys_rt_tgsigqueueinfo .quad sys_perf_event_open .quad sys_fanotify_init - .quad sys_fanotify_mark /* 495 */ + .quad sys_fanotify_mark /* 495 */ .quad sys_prlimit64 + .quad sys_name_to_handle_at + .quad sys_open_by_handle_at + .quad sys_clock_adjtime + .quad sys_syncfs /* 500 */ .size sys_call_table, . - sys_call_table .type sys_call_table, @object diff --git a/trunk/arch/alpha/kernel/time.c b/trunk/arch/alpha/kernel/time.c index 918e8e0b72ff..818e74ed45dc 100644 --- a/trunk/arch/alpha/kernel/time.c +++ b/trunk/arch/alpha/kernel/time.c @@ -375,8 +375,7 @@ static struct clocksource clocksource_rpcc = { static inline void register_rpcc_clocksource(long cycle_freq) { - clocksource_calc_mult_shift(&clocksource_rpcc, cycle_freq, 4); - clocksource_register(&clocksource_rpcc); + clocksource_register_hz(&clocksource_rpcc, cycle_freq); } #else /* !CONFIG_SMP */ static inline void register_rpcc_clocksource(long cycle_freq) diff --git a/trunk/arch/arm/boot/compressed/Makefile b/trunk/arch/arm/boot/compressed/Makefile index 8ebbb511c783..0c6852d93506 100644 --- a/trunk/arch/arm/boot/compressed/Makefile +++ b/trunk/arch/arm/boot/compressed/Makefile @@ -74,7 +74,7 @@ ZTEXTADDR := $(CONFIG_ZBOOT_ROM_TEXT) ZBSSADDR := $(CONFIG_ZBOOT_ROM_BSS) else ZTEXTADDR := 0 -ZBSSADDR := ALIGN(4) +ZBSSADDR := ALIGN(8) endif SEDFLAGS = s/TEXT_START/$(ZTEXTADDR)/;s/BSS_START/$(ZBSSADDR)/ diff --git a/trunk/arch/arm/boot/compressed/head.S b/trunk/arch/arm/boot/compressed/head.S index adf583cd0c35..49f5b2eaaa87 100644 --- a/trunk/arch/arm/boot/compressed/head.S +++ b/trunk/arch/arm/boot/compressed/head.S @@ -179,15 +179,14 @@ not_angel: bl cache_on restart: adr r0, LC0 - ldmia r0, {r1, r2, r3, r5, r6, r9, r11, r12} - ldr sp, [r0, #32] + ldmia r0, {r1, r2, r3, r6, r9, r11, r12} + ldr sp, [r0, #28] /* * We might be running at a different address. We need * to fix up various pointers. */ sub r0, r0, r1 @ calculate the delta offset - add r5, r5, r0 @ _start add r6, r6, r0 @ _edata #ifndef CONFIG_ZBOOT_ROM @@ -206,31 +205,40 @@ restart: adr r0, LC0 /* * Check to see if we will overwrite ourselves. * r4 = final kernel address - * r5 = start of this image * r9 = size of decompressed image * r10 = end of this image, including bss/stack/malloc space if non XIP * We basically want: - * r4 >= r10 -> OK - * r4 + image length <= r5 -> OK + * r4 - 16k page directory >= r10 -> OK + * r4 + image length <= current position (pc) -> OK */ + add r10, r10, #16384 cmp r4, r10 bhs wont_overwrite add r10, r4, r9 - cmp r10, r5 + ARM( cmp r10, pc ) + THUMB( mov lr, pc ) + THUMB( cmp r10, lr ) bls wont_overwrite /* * Relocate ourselves past the end of the decompressed kernel. - * r5 = start of this image * r6 = _edata * r10 = end of the decompressed kernel * Because we always copy ahead, we need to do it from the end and go * backward in case the source and destination overlap. */ - /* Round up to next 256-byte boundary. */ - add r10, r10, #256 + /* + * Bump to the next 256-byte boundary with the size of + * the relocation code added. This avoids overwriting + * ourself when the offset is small. + */ + add r10, r10, #((reloc_code_end - restart + 256) & ~255) bic r10, r10, #255 + /* Get start of code we want to copy and align it down. */ + adr r5, restart + bic r5, r5, #31 + sub r9, r6, r5 @ size to copy add r9, r9, #31 @ rounded up to a multiple bic r9, r9, #31 @ ... of 32 bytes @@ -245,6 +253,11 @@ restart: adr r0, LC0 /* Preserve offset to relocated code. */ sub r6, r9, r6 +#ifndef CONFIG_ZBOOT_ROM + /* cache_clean_flush may use the stack, so relocate it */ + add sp, sp, r6 +#endif + bl cache_clean_flush adr r0, BSYM(restart) @@ -333,7 +346,6 @@ not_relocated: mov r0, #0 LC0: .word LC0 @ r1 .word __bss_start @ r2 .word _end @ r3 - .word _start @ r5 .word _edata @ r6 .word _image_size @ r9 .word _got_start @ r11 @@ -1062,6 +1074,7 @@ memdump: mov r12, r0 #endif .ltorg +reloc_code_end: .align .section ".stack", "aw", %nobits diff --git a/trunk/arch/arm/boot/compressed/vmlinux.lds.in b/trunk/arch/arm/boot/compressed/vmlinux.lds.in index 5309909d7282..ea80abe78844 100644 --- a/trunk/arch/arm/boot/compressed/vmlinux.lds.in +++ b/trunk/arch/arm/boot/compressed/vmlinux.lds.in @@ -54,6 +54,7 @@ SECTIONS .bss : { *(.bss) } _end = .; + . = ALIGN(8); /* the stack must be 64-bit aligned */ .stack : { *(.stack) } .stab 0 : { *(.stab) } diff --git a/trunk/arch/arm/include/asm/system.h b/trunk/arch/arm/include/asm/system.h index 885be097769d..832888d0c20c 100644 --- a/trunk/arch/arm/include/asm/system.h +++ b/trunk/arch/arm/include/asm/system.h @@ -159,7 +159,7 @@ extern unsigned int user_debug; #include #elif defined(CONFIG_ARM_DMA_MEM_BUFFERABLE) || defined(CONFIG_SMP) #define mb() do { dsb(); outer_sync(); } while (0) -#define rmb() dmb() +#define rmb() dsb() #define wmb() mb() #else #include diff --git a/trunk/arch/arm/kernel/signal.c b/trunk/arch/arm/kernel/signal.c index cb8398317644..0340224cf73c 100644 --- a/trunk/arch/arm/kernel/signal.c +++ b/trunk/arch/arm/kernel/signal.c @@ -597,45 +597,19 @@ setup_rt_frame(int usig, struct k_sigaction *ka, siginfo_t *info, return err; } -static inline void setup_syscall_restart(struct pt_regs *regs) -{ - regs->ARM_r0 = regs->ARM_ORIG_r0; - regs->ARM_pc -= thumb_mode(regs) ? 2 : 4; -} - /* * OK, we're invoking a handler */ static int handle_signal(unsigned long sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *oldset, - struct pt_regs * regs, int syscall) + struct pt_regs * regs) { struct thread_info *thread = current_thread_info(); struct task_struct *tsk = current; int usig = sig; int ret; - /* - * If we were from a system call, check for system call restarting... - */ - if (syscall) { - switch (regs->ARM_r0) { - case -ERESTART_RESTARTBLOCK: - case -ERESTARTNOHAND: - regs->ARM_r0 = -EINTR; - break; - case -ERESTARTSYS: - if (!(ka->sa.sa_flags & SA_RESTART)) { - regs->ARM_r0 = -EINTR; - break; - } - /* fallthrough */ - case -ERESTARTNOINTR: - setup_syscall_restart(regs); - } - } - /* * translate the signal */ @@ -685,6 +659,7 @@ handle_signal(unsigned long sig, struct k_sigaction *ka, */ static void do_signal(struct pt_regs *regs, int syscall) { + unsigned int retval = 0, continue_addr = 0, restart_addr = 0; struct k_sigaction ka; siginfo_t info; int signr; @@ -698,18 +673,61 @@ static void do_signal(struct pt_regs *regs, int syscall) if (!user_mode(regs)) return; + /* + * If we were from a system call, check for system call restarting... + */ + if (syscall) { + continue_addr = regs->ARM_pc; + restart_addr = continue_addr - (thumb_mode(regs) ? 2 : 4); + retval = regs->ARM_r0; + + /* + * Prepare for system call restart. We do this here so that a + * debugger will see the already changed PSW. + */ + switch (retval) { + case -ERESTARTNOHAND: + case -ERESTARTSYS: + case -ERESTARTNOINTR: + regs->ARM_r0 = regs->ARM_ORIG_r0; + regs->ARM_pc = restart_addr; + break; + case -ERESTART_RESTARTBLOCK: + regs->ARM_r0 = -EINTR; + break; + } + } + if (try_to_freeze()) goto no_signal; + /* + * Get the signal to deliver. When running under ptrace, at this + * point the debugger may change all our registers ... + */ signr = get_signal_to_deliver(&info, &ka, regs, NULL); if (signr > 0) { sigset_t *oldset; + /* + * Depending on the signal settings we may need to revert the + * decision to restart the system call. But skip this if a + * debugger has chosen to restart at a different PC. + */ + if (regs->ARM_pc == restart_addr) { + if (retval == -ERESTARTNOHAND + || (retval == -ERESTARTSYS + && !(ka.sa.sa_flags & SA_RESTART))) { + regs->ARM_r0 = -EINTR; + regs->ARM_pc = continue_addr; + } + } + if (test_thread_flag(TIF_RESTORE_SIGMASK)) oldset = ¤t->saved_sigmask; else oldset = ¤t->blocked; - if (handle_signal(signr, &ka, &info, oldset, regs, syscall) == 0) { + if (handle_signal(signr, &ka, &info, oldset, regs) == 0) { /* * A signal was successfully delivered; the saved * sigmask will have been stored in the signal frame, @@ -723,11 +741,14 @@ static void do_signal(struct pt_regs *regs, int syscall) } no_signal: - /* - * No signal to deliver to the process - restart the syscall. - */ if (syscall) { - if (regs->ARM_r0 == -ERESTART_RESTARTBLOCK) { + /* + * Handle restarting a different system call. As above, + * if a debugger has chosen to restart at a different PC, + * ignore the restart. + */ + if (retval == -ERESTART_RESTARTBLOCK + && regs->ARM_pc == continue_addr) { if (thumb_mode(regs)) { regs->ARM_r7 = __NR_restart_syscall - __NR_SYSCALL_BASE; regs->ARM_pc -= 2; @@ -750,11 +771,6 @@ static void do_signal(struct pt_regs *regs, int syscall) #endif } } - if (regs->ARM_r0 == -ERESTARTNOHAND || - regs->ARM_r0 == -ERESTARTSYS || - regs->ARM_r0 == -ERESTARTNOINTR) { - setup_syscall_restart(regs); - } /* If there's no signal to deliver, we just put the saved sigmask * back. diff --git a/trunk/arch/arm/kernel/smp.c b/trunk/arch/arm/kernel/smp.c index 007a0a950e75..f29b8a29b174 100644 --- a/trunk/arch/arm/kernel/smp.c +++ b/trunk/arch/arm/kernel/smp.c @@ -560,7 +560,10 @@ asmlinkage void __exception_irq_entry do_IPI(int ipinr, struct pt_regs *regs) break; case IPI_RESCHEDULE: - scheduler_ipi(); + /* + * nothing more to do - eveything is + * done on the interrupt return path + */ break; case IPI_CALL_FUNC: diff --git a/trunk/arch/arm/mach-realview/include/mach/barriers.h b/trunk/arch/arm/mach-realview/include/mach/barriers.h index 0c5d749d7b5f..9a732195aa1c 100644 --- a/trunk/arch/arm/mach-realview/include/mach/barriers.h +++ b/trunk/arch/arm/mach-realview/include/mach/barriers.h @@ -4,5 +4,5 @@ * operation to deadlock the system. */ #define mb() dsb() -#define rmb() dmb() +#define rmb() dsb() #define wmb() mb() diff --git a/trunk/arch/arm/mach-tegra/include/mach/barriers.h b/trunk/arch/arm/mach-tegra/include/mach/barriers.h index cc115174899b..425b42e91ef6 100644 --- a/trunk/arch/arm/mach-tegra/include/mach/barriers.h +++ b/trunk/arch/arm/mach-tegra/include/mach/barriers.h @@ -23,7 +23,7 @@ #include -#define rmb() dmb() +#define rmb() dsb() #define wmb() do { dsb(); outer_sync(); } while (0) #define mb() wmb() diff --git a/trunk/arch/arm/mm/init.c b/trunk/arch/arm/mm/init.c index e5f6fc428348..e591513bb53e 100644 --- a/trunk/arch/arm/mm/init.c +++ b/trunk/arch/arm/mm/init.c @@ -392,7 +392,7 @@ free_memmap(unsigned long start_pfn, unsigned long end_pfn) * Convert start_pfn/end_pfn to a struct page pointer. */ start_pg = pfn_to_page(start_pfn - 1) + 1; - end_pg = pfn_to_page(end_pfn); + end_pg = pfn_to_page(end_pfn - 1) + 1; /* * Convert to physical addresses, and @@ -426,6 +426,14 @@ static void __init free_unused_memmap(struct meminfo *mi) bank_start = bank_pfn_start(bank); +#ifdef CONFIG_SPARSEMEM + /* + * Take care not to free memmap entries that don't exist + * due to SPARSEMEM sections which aren't present. + */ + bank_start = min(bank_start, + ALIGN(prev_bank_end, PAGES_PER_SECTION)); +#endif /* * If we had a previous bank, and there is a space * between the current bank and the previous, free it. @@ -440,6 +448,12 @@ static void __init free_unused_memmap(struct meminfo *mi) */ prev_bank_end = ALIGN(bank_pfn_end(bank), MAX_ORDER_NR_PAGES); } + +#ifdef CONFIG_SPARSEMEM + if (!IS_ALIGNED(prev_bank_end, PAGES_PER_SECTION)) + free_memmap(prev_bank_end, + ALIGN(prev_bank_end, PAGES_PER_SECTION)); +#endif } static void __init free_highpages(void) diff --git a/trunk/arch/blackfin/mach-common/smp.c b/trunk/arch/blackfin/mach-common/smp.c index 1fbd94c44457..8bce5ed031e4 100644 --- a/trunk/arch/blackfin/mach-common/smp.c +++ b/trunk/arch/blackfin/mach-common/smp.c @@ -177,9 +177,6 @@ static irqreturn_t ipi_handler_int1(int irq, void *dev_instance) while (msg_queue->count) { msg = &msg_queue->ipi_message[msg_queue->head]; switch (msg->type) { - case BFIN_IPI_RESCHEDULE: - scheduler_ipi(); - break; case BFIN_IPI_CALL_FUNC: spin_unlock_irqrestore(&msg_queue->lock, flags); ipi_call_function(cpu, msg); diff --git a/trunk/arch/cris/arch-v32/kernel/smp.c b/trunk/arch/cris/arch-v32/kernel/smp.c index 66cc75657e2f..4c9e3e1ba5d1 100644 --- a/trunk/arch/cris/arch-v32/kernel/smp.c +++ b/trunk/arch/cris/arch-v32/kernel/smp.c @@ -342,18 +342,15 @@ irqreturn_t crisv32_ipi_interrupt(int irq, void *dev_id) ipi = REG_RD(intr_vect, irq_regs[smp_processor_id()], rw_ipi); - if (ipi.vector & IPI_SCHEDULE) { - scheduler_ipi(); - } if (ipi.vector & IPI_CALL) { - func(info); + func(info); } if (ipi.vector & IPI_FLUSH_TLB) { - if (flush_mm == FLUSH_ALL) - __flush_tlb_all(); - else if (flush_vma == FLUSH_ALL) + if (flush_mm == FLUSH_ALL) + __flush_tlb_all(); + else if (flush_vma == FLUSH_ALL) __flush_tlb_mm(flush_mm); - else + else __flush_tlb_page(flush_vma, flush_addr); } diff --git a/trunk/arch/ia64/kernel/irq_ia64.c b/trunk/arch/ia64/kernel/irq_ia64.c index 782c3a357f24..5b704740f160 100644 --- a/trunk/arch/ia64/kernel/irq_ia64.c +++ b/trunk/arch/ia64/kernel/irq_ia64.c @@ -31,7 +31,6 @@ #include #include #include -#include #include #include @@ -497,7 +496,6 @@ ia64_handle_irq (ia64_vector vector, struct pt_regs *regs) smp_local_flush_tlb(); kstat_incr_irqs_this_cpu(irq, desc); } else if (unlikely(IS_RESCHEDULE(vector))) { - scheduler_ipi(); kstat_incr_irqs_this_cpu(irq, desc); } else { ia64_setreg(_IA64_REG_CR_TPR, vector); diff --git a/trunk/arch/ia64/xen/irq_xen.c b/trunk/arch/ia64/xen/irq_xen.c index b279e142c633..108bb858acf2 100644 --- a/trunk/arch/ia64/xen/irq_xen.c +++ b/trunk/arch/ia64/xen/irq_xen.c @@ -92,8 +92,6 @@ static unsigned short saved_irq_cnt; static int xen_slab_ready; #ifdef CONFIG_SMP -#include - /* Dummy stub. Though we may check XEN_RESCHEDULE_VECTOR before __do_IRQ, * it ends up to issue several memory accesses upon percpu data and * thus adds unnecessary traffic to other paths. @@ -101,13 +99,7 @@ static int xen_slab_ready; static irqreturn_t xen_dummy_handler(int irq, void *dev_id) { - return IRQ_HANDLED; -} -static irqreturn_t -xen_resched_handler(int irq, void *dev_id) -{ - scheduler_ipi(); return IRQ_HANDLED; } @@ -118,7 +110,7 @@ static struct irqaction xen_ipi_irqaction = { }; static struct irqaction xen_resched_irqaction = { - .handler = xen_resched_handler, + .handler = xen_dummy_handler, .flags = IRQF_DISABLED, .name = "resched" }; diff --git a/trunk/arch/m32r/kernel/smp.c b/trunk/arch/m32r/kernel/smp.c index fc10b39893d4..31cef20b2996 100644 --- a/trunk/arch/m32r/kernel/smp.c +++ b/trunk/arch/m32r/kernel/smp.c @@ -122,6 +122,8 @@ void smp_send_reschedule(int cpu_id) * * Description: This routine executes on CPU which received * 'RESCHEDULE_IPI'. + * Rescheduling is processed at the exit of interrupt + * operation. * * Born on Date: 2002.02.05 * @@ -136,7 +138,7 @@ void smp_send_reschedule(int cpu_id) *==========================================================================*/ void smp_reschedule_interrupt(void) { - scheduler_ipi(); + /* nothing to do */ } /*==========================================================================* diff --git a/trunk/arch/mips/Kconfig b/trunk/arch/mips/Kconfig index 8e256cc5dcd9..351c80fbba7e 100644 --- a/trunk/arch/mips/Kconfig +++ b/trunk/arch/mips/Kconfig @@ -997,9 +997,6 @@ config IRQ_GT641XX config IRQ_GIC bool -config IRQ_CPU_OCTEON - bool - config MIPS_BOARDS_GEN bool @@ -1359,8 +1356,6 @@ config CPU_SB1 config CPU_CAVIUM_OCTEON bool "Cavium Octeon processor" depends on SYS_HAS_CPU_CAVIUM_OCTEON - select IRQ_CPU - select IRQ_CPU_OCTEON select CPU_HAS_PREFETCH select CPU_SUPPORTS_64BIT_KERNEL select SYS_SUPPORTS_SMP diff --git a/trunk/arch/mips/alchemy/devboards/db1x00/board_setup.c b/trunk/arch/mips/alchemy/devboards/db1x00/board_setup.c index 05f120ff90f9..5c956fe8760f 100644 --- a/trunk/arch/mips/alchemy/devboards/db1x00/board_setup.c +++ b/trunk/arch/mips/alchemy/devboards/db1x00/board_setup.c @@ -127,13 +127,10 @@ const char *get_system_type(void) void __init board_setup(void) { unsigned long bcsr1, bcsr2; - u32 pin_func; bcsr1 = DB1000_BCSR_PHYS_ADDR; bcsr2 = DB1000_BCSR_PHYS_ADDR + DB1000_BCSR_HEXLED_OFS; - pin_func = 0; - #ifdef CONFIG_MIPS_DB1000 printk(KERN_INFO "AMD Alchemy Au1000/Db1000 Board\n"); #endif @@ -164,12 +161,16 @@ void __init board_setup(void) /* Not valid for Au1550 */ #if defined(CONFIG_IRDA) && \ (defined(CONFIG_SOC_AU1000) || defined(CONFIG_SOC_AU1100)) - /* Set IRFIRSEL instead of GPIO15 */ - pin_func = au_readl(SYS_PINFUNC) | SYS_PF_IRF; - au_writel(pin_func, SYS_PINFUNC); - /* Power off until the driver is in use */ - bcsr_mod(BCSR_RESETS, BCSR_RESETS_IRDA_MODE_MASK, - BCSR_RESETS_IRDA_MODE_OFF); + { + u32 pin_func; + + /* Set IRFIRSEL instead of GPIO15 */ + pin_func = au_readl(SYS_PINFUNC) | SYS_PF_IRF; + au_writel(pin_func, SYS_PINFUNC); + /* Power off until the driver is in use */ + bcsr_mod(BCSR_RESETS, BCSR_RESETS_IRDA_MODE_MASK, + BCSR_RESETS_IRDA_MODE_OFF); + } #endif bcsr_write(BCSR_PCMCIA, 0); /* turn off PCMCIA power */ @@ -177,31 +178,35 @@ void __init board_setup(void) alchemy_gpio1_input_enable(); #ifdef CONFIG_MIPS_MIRAGE - /* GPIO[20] is output */ - alchemy_gpio_direction_output(20, 0); + { + u32 pin_func; - /* Set GPIO[210:208] instead of SSI_0 */ - pin_func = au_readl(SYS_PINFUNC) | SYS_PF_S0; + /* GPIO[20] is output */ + alchemy_gpio_direction_output(20, 0); - /* Set GPIO[215:211] for LEDs */ - pin_func |= 5 << 2; + /* Set GPIO[210:208] instead of SSI_0 */ + pin_func = au_readl(SYS_PINFUNC) | SYS_PF_S0; - /* Set GPIO[214:213] for more LEDs */ - pin_func |= 5 << 12; + /* Set GPIO[215:211] for LEDs */ + pin_func |= 5 << 2; - /* Set GPIO[207:200] instead of PCMCIA/LCD */ - pin_func |= SYS_PF_LCD | SYS_PF_PC; - au_writel(pin_func, SYS_PINFUNC); + /* Set GPIO[214:213] for more LEDs */ + pin_func |= 5 << 12; - /* - * Enable speaker amplifier. This should - * be part of the audio driver. - */ - alchemy_gpio_direction_output(209, 1); + /* Set GPIO[207:200] instead of PCMCIA/LCD */ + pin_func |= SYS_PF_LCD | SYS_PF_PC; + au_writel(pin_func, SYS_PINFUNC); - pm_power_off = mirage_power_off; - _machine_halt = mirage_power_off; - _machine_restart = (void(*)(char *))mips_softreset; + /* + * Enable speaker amplifier. This should + * be part of the audio driver. + */ + alchemy_gpio_direction_output(209, 1); + + pm_power_off = mirage_power_off; + _machine_halt = mirage_power_off; + _machine_restart = (void(*)(char *))mips_softreset; + } #endif #ifdef CONFIG_MIPS_BOSPORUS diff --git a/trunk/arch/mips/alchemy/xxs1500/init.c b/trunk/arch/mips/alchemy/xxs1500/init.c index 15125c2fda7d..34a90a4bb6f4 100644 --- a/trunk/arch/mips/alchemy/xxs1500/init.c +++ b/trunk/arch/mips/alchemy/xxs1500/init.c @@ -51,10 +51,9 @@ void __init prom_init(void) prom_init_cmdline(); memsize_str = prom_getenv("memsize"); - if (!memsize_str) + if (!memsize_str || strict_strtoul(memsize_str, 0, &memsize)) memsize = 0x04000000; - else - strict_strtoul(memsize_str, 0, &memsize); + add_memory_region(0, memsize, BOOT_MEM_RAM); } diff --git a/trunk/arch/mips/boot/compressed/calc_vmlinuz_load_addr.c b/trunk/arch/mips/boot/compressed/calc_vmlinuz_load_addr.c index 88c9d963be88..9a6243676e22 100644 --- a/trunk/arch/mips/boot/compressed/calc_vmlinuz_load_addr.c +++ b/trunk/arch/mips/boot/compressed/calc_vmlinuz_load_addr.c @@ -16,8 +16,8 @@ int main(int argc, char *argv[]) { + unsigned long long vmlinux_size, vmlinux_load_addr, vmlinuz_load_addr; struct stat sb; - uint64_t vmlinux_size, vmlinux_load_addr, vmlinuz_load_addr; if (argc != 3) { fprintf(stderr, "Usage: %s \n", diff --git a/trunk/arch/mips/cavium-octeon/Kconfig b/trunk/arch/mips/cavium-octeon/Kconfig index caae22858163..cad555ebeca3 100644 --- a/trunk/arch/mips/cavium-octeon/Kconfig +++ b/trunk/arch/mips/cavium-octeon/Kconfig @@ -1,11 +1,7 @@ -config CAVIUM_OCTEON_SPECIFIC_OPTIONS - bool "Enable Octeon specific options" - depends on CPU_CAVIUM_OCTEON - default "y" +if CPU_CAVIUM_OCTEON config CAVIUM_CN63XXP1 bool "Enable CN63XXP1 errata worarounds" - depends on CAVIUM_OCTEON_SPECIFIC_OPTIONS default "n" help The CN63XXP1 chip requires build time workarounds to @@ -16,7 +12,6 @@ config CAVIUM_CN63XXP1 config CAVIUM_OCTEON_2ND_KERNEL bool "Build the kernel to be used as a 2nd kernel on the same chip" - depends on CAVIUM_OCTEON_SPECIFIC_OPTIONS default "n" help This option configures this kernel to be linked at a different @@ -26,7 +21,6 @@ config CAVIUM_OCTEON_2ND_KERNEL config CAVIUM_OCTEON_HW_FIX_UNALIGNED bool "Enable hardware fixups of unaligned loads and stores" - depends on CAVIUM_OCTEON_SPECIFIC_OPTIONS default "y" help Configure the Octeon hardware to automatically fix unaligned loads @@ -38,7 +32,6 @@ config CAVIUM_OCTEON_HW_FIX_UNALIGNED config CAVIUM_OCTEON_CVMSEG_SIZE int "Number of L1 cache lines reserved for CVMSEG memory" - depends on CAVIUM_OCTEON_SPECIFIC_OPTIONS range 0 54 default 1 help @@ -50,7 +43,6 @@ config CAVIUM_OCTEON_CVMSEG_SIZE config CAVIUM_OCTEON_LOCK_L2 bool "Lock often used kernel code in the L2" - depends on CAVIUM_OCTEON_SPECIFIC_OPTIONS default "y" help Enable locking parts of the kernel into the L2 cache. @@ -93,7 +85,6 @@ config CAVIUM_OCTEON_LOCK_L2_MEMCPY config ARCH_SPARSEMEM_ENABLE def_bool y select SPARSEMEM_STATIC - depends on CPU_CAVIUM_OCTEON config CAVIUM_OCTEON_HELPER def_bool y @@ -107,6 +98,8 @@ config NEED_SG_DMA_LENGTH config SWIOTLB def_bool y - depends on CPU_CAVIUM_OCTEON select IOMMU_HELPER select NEED_SG_DMA_LENGTH + + +endif # CPU_CAVIUM_OCTEON diff --git a/trunk/arch/mips/cavium-octeon/smp.c b/trunk/arch/mips/cavium-octeon/smp.c index 76923eeb58b9..ba78b21cc8d0 100644 --- a/trunk/arch/mips/cavium-octeon/smp.c +++ b/trunk/arch/mips/cavium-octeon/smp.c @@ -44,8 +44,6 @@ static irqreturn_t mailbox_interrupt(int irq, void *dev_id) if (action & SMP_CALL_FUNCTION) smp_call_function_interrupt(); - if (action & SMP_RESCHEDULE_YOURSELF) - scheduler_ipi(); /* Check if we've been told to flush the icache */ if (action & SMP_ICACHE_FLUSH) diff --git a/trunk/arch/mips/include/asm/cache.h b/trunk/arch/mips/include/asm/cache.h index 650ac9ba734c..b4db69fbc40c 100644 --- a/trunk/arch/mips/include/asm/cache.h +++ b/trunk/arch/mips/include/asm/cache.h @@ -17,6 +17,6 @@ #define SMP_CACHE_SHIFT L1_CACHE_SHIFT #define SMP_CACHE_BYTES L1_CACHE_BYTES -#define __read_mostly __attribute__((__section__(".data.read_mostly"))) +#define __read_mostly __attribute__((__section__(".data..read_mostly"))) #endif /* _ASM_CACHE_H */ diff --git a/trunk/arch/mips/include/asm/cevt-r4k.h b/trunk/arch/mips/include/asm/cevt-r4k.h index fa4328f9124f..65f9bdd02f1f 100644 --- a/trunk/arch/mips/include/asm/cevt-r4k.h +++ b/trunk/arch/mips/include/asm/cevt-r4k.h @@ -14,6 +14,9 @@ #ifndef __ASM_CEVT_R4K_H #define __ASM_CEVT_R4K_H +#include +#include + DECLARE_PER_CPU(struct clock_event_device, mips_clockevent_device); void mips_event_handler(struct clock_event_device *dev); diff --git a/trunk/arch/mips/include/asm/hugetlb.h b/trunk/arch/mips/include/asm/hugetlb.h index f5e856015329..c565b7c3f0b5 100644 --- a/trunk/arch/mips/include/asm/hugetlb.h +++ b/trunk/arch/mips/include/asm/hugetlb.h @@ -70,6 +70,7 @@ static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm, static inline void huge_ptep_clear_flush(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) { + flush_tlb_mm(vma->vm_mm); } static inline int huge_pte_none(pte_t pte) diff --git a/trunk/arch/mips/include/asm/mach-bcm63xx/bcm963xx_tag.h b/trunk/arch/mips/include/asm/mach-bcm63xx/bcm963xx_tag.h index 32978d32561a..ed72e6a26b73 100644 --- a/trunk/arch/mips/include/asm/mach-bcm63xx/bcm963xx_tag.h +++ b/trunk/arch/mips/include/asm/mach-bcm63xx/bcm963xx_tag.h @@ -88,7 +88,7 @@ struct bcm_tag { char kernel_crc[CRC_LEN]; /* 228-235: Unused at present */ char reserved1[8]; - /* 236-239: CRC32 of header excluding tagVersion */ + /* 236-239: CRC32 of header excluding last 20 bytes */ char header_crc[CRC_LEN]; /* 240-255: Unused at present */ char reserved2[16]; diff --git a/trunk/arch/mips/jazz/jazzdma.c b/trunk/arch/mips/jazz/jazzdma.c index 9ce9f64cb76f..2d8e447cb828 100644 --- a/trunk/arch/mips/jazz/jazzdma.c +++ b/trunk/arch/mips/jazz/jazzdma.c @@ -211,7 +211,7 @@ EXPORT_SYMBOL(vdma_free); */ int vdma_remap(unsigned long laddr, unsigned long paddr, unsigned long size) { - int first, pages, npages; + int first, pages; if (laddr > 0xffffff) { if (vdma_debug) @@ -228,8 +228,7 @@ int vdma_remap(unsigned long laddr, unsigned long paddr, unsigned long size) return -EINVAL; /* invalid physical address */ } - npages = pages = - (((paddr & (VDMA_PAGESIZE - 1)) + size) >> 12) + 1; + pages = (((paddr & (VDMA_PAGESIZE - 1)) + size) >> 12) + 1; first = laddr >> 12; if (vdma_debug) printk("vdma_remap: first=%x, pages=%x\n", first, pages); diff --git a/trunk/arch/mips/jz4740/dma.c b/trunk/arch/mips/jz4740/dma.c index 5ebe75a68350..d7feb898692c 100644 --- a/trunk/arch/mips/jz4740/dma.c +++ b/trunk/arch/mips/jz4740/dma.c @@ -242,9 +242,7 @@ EXPORT_SYMBOL_GPL(jz4740_dma_get_residue); static void jz4740_dma_chan_irq(struct jz4740_dma_chan *dma) { - uint32_t status; - - status = jz4740_dma_read(JZ_REG_DMA_STATUS_CTRL(dma->id)); + (void) jz4740_dma_read(JZ_REG_DMA_STATUS_CTRL(dma->id)); jz4740_dma_write_mask(JZ_REG_DMA_STATUS_CTRL(dma->id), 0, JZ_DMA_STATUS_CTRL_ENABLE | JZ_DMA_STATUS_CTRL_TRANSFER_DONE); diff --git a/trunk/arch/mips/jz4740/time.c b/trunk/arch/mips/jz4740/time.c index fe01678d94fd..eaa853a54af6 100644 --- a/trunk/arch/mips/jz4740/time.c +++ b/trunk/arch/mips/jz4740/time.c @@ -89,7 +89,7 @@ static int jz4740_clockevent_set_next(unsigned long evt, static struct clock_event_device jz4740_clockevent = { .name = "jz4740-timer", - .features = CLOCK_EVT_FEAT_PERIODIC, + .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT, .set_next_event = jz4740_clockevent_set_next, .set_mode = jz4740_clockevent_set_mode, .rating = 200, diff --git a/trunk/arch/mips/jz4740/timer.c b/trunk/arch/mips/jz4740/timer.c index b2c015129055..654d5c3900b6 100644 --- a/trunk/arch/mips/jz4740/timer.c +++ b/trunk/arch/mips/jz4740/timer.c @@ -27,11 +27,13 @@ void jz4740_timer_enable_watchdog(void) { writel(BIT(16), jz4740_timer_base + JZ_REG_TIMER_STOP_CLEAR); } +EXPORT_SYMBOL_GPL(jz4740_timer_enable_watchdog); void jz4740_timer_disable_watchdog(void) { writel(BIT(16), jz4740_timer_base + JZ_REG_TIMER_STOP_SET); } +EXPORT_SYMBOL_GPL(jz4740_timer_disable_watchdog); void __init jz4740_timer_init(void) { diff --git a/trunk/arch/mips/kernel/ftrace.c b/trunk/arch/mips/kernel/ftrace.c index 94ca2b018af7..feb8021a305f 100644 --- a/trunk/arch/mips/kernel/ftrace.c +++ b/trunk/arch/mips/kernel/ftrace.c @@ -23,6 +23,7 @@ #define JAL 0x0c000000 /* jump & link: ip --> ra, jump to target */ #define ADDR_MASK 0x03ffffff /* op_code|addr : 31...26|25 ....0 */ +#define JUMP_RANGE_MASK ((1UL << 28) - 1) #define INSN_NOP 0x00000000 /* nop */ #define INSN_JAL(addr) \ @@ -44,12 +45,12 @@ static inline void ftrace_dyn_arch_init_insns(void) /* jal (ftrace_caller + 8), jump over the first two instruction */ buf = (u32 *)&insn_jal_ftrace_caller; - uasm_i_jal(&buf, (FTRACE_ADDR + 8)); + uasm_i_jal(&buf, (FTRACE_ADDR + 8) & JUMP_RANGE_MASK); #ifdef CONFIG_FUNCTION_GRAPH_TRACER /* j ftrace_graph_caller */ buf = (u32 *)&insn_j_ftrace_graph_caller; - uasm_i_j(&buf, (unsigned long)ftrace_graph_caller); + uasm_i_j(&buf, (unsigned long)ftrace_graph_caller & JUMP_RANGE_MASK); #endif } diff --git a/trunk/arch/mips/kernel/ptrace.c b/trunk/arch/mips/kernel/ptrace.c index d21c388c0116..584e6b55c865 100644 --- a/trunk/arch/mips/kernel/ptrace.c +++ b/trunk/arch/mips/kernel/ptrace.c @@ -540,8 +540,8 @@ asmlinkage void do_syscall_trace(struct pt_regs *regs, int entryexit) secure_computing(regs->regs[2]); if (unlikely(current->audit_context) && entryexit) - audit_syscall_exit(AUDITSC_RESULT(regs->regs[2]), - regs->regs[2]); + audit_syscall_exit(AUDITSC_RESULT(regs->regs[7]), + -regs->regs[2]); if (!(current->ptrace & PT_PTRACED)) goto out; diff --git a/trunk/arch/mips/kernel/scall32-o32.S b/trunk/arch/mips/kernel/scall32-o32.S index 7f5468b38d4c..7f1377eb22d3 100644 --- a/trunk/arch/mips/kernel/scall32-o32.S +++ b/trunk/arch/mips/kernel/scall32-o32.S @@ -565,7 +565,7 @@ einval: li v0, -ENOSYS sys sys_ioprio_get 2 /* 4315 */ sys sys_utimensat 4 sys sys_signalfd 3 - sys sys_ni_syscall 0 + sys sys_ni_syscall 0 /* was timerfd */ sys sys_eventfd 1 sys sys_fallocate 6 /* 4320 */ sys sys_timerfd_create 2 diff --git a/trunk/arch/mips/kernel/scall64-64.S b/trunk/arch/mips/kernel/scall64-64.S index a2e1fcbc41dc..7c0ef7f128bf 100644 --- a/trunk/arch/mips/kernel/scall64-64.S +++ b/trunk/arch/mips/kernel/scall64-64.S @@ -404,7 +404,7 @@ sys_call_table: PTR sys_ioprio_get PTR sys_utimensat /* 5275 */ PTR sys_signalfd - PTR sys_ni_syscall + PTR sys_ni_syscall /* was timerfd */ PTR sys_eventfd PTR sys_fallocate PTR sys_timerfd_create /* 5280 */ diff --git a/trunk/arch/mips/kernel/scall64-n32.S b/trunk/arch/mips/kernel/scall64-n32.S index b2c7624995b8..de6c5563beab 100644 --- a/trunk/arch/mips/kernel/scall64-n32.S +++ b/trunk/arch/mips/kernel/scall64-n32.S @@ -403,7 +403,7 @@ EXPORT(sysn32_call_table) PTR sys_ioprio_get PTR compat_sys_utimensat PTR compat_sys_signalfd /* 6280 */ - PTR sys_ni_syscall + PTR sys_ni_syscall /* was timerfd */ PTR sys_eventfd PTR sys_fallocate PTR sys_timerfd_create diff --git a/trunk/arch/mips/kernel/scall64-o32.S b/trunk/arch/mips/kernel/scall64-o32.S index 049a9c8c49a0..b0541dda8830 100644 --- a/trunk/arch/mips/kernel/scall64-o32.S +++ b/trunk/arch/mips/kernel/scall64-o32.S @@ -522,7 +522,7 @@ sys_call_table: PTR sys_ioprio_get /* 4315 */ PTR compat_sys_utimensat PTR compat_sys_signalfd - PTR sys_ni_syscall + PTR sys_ni_syscall /* was timerfd */ PTR sys_eventfd PTR sys32_fallocate /* 4320 */ PTR sys_timerfd_create diff --git a/trunk/arch/mips/kernel/smtc.c b/trunk/arch/mips/kernel/smtc.c index cedac4633741..5a88cc4ccd5a 100644 --- a/trunk/arch/mips/kernel/smtc.c +++ b/trunk/arch/mips/kernel/smtc.c @@ -929,7 +929,7 @@ static void post_direct_ipi(int cpu, struct smtc_ipi *pipi) static void ipi_resched_interrupt(void) { - scheduler_ipi(); + /* Return from interrupt should be enough to cause scheduler check */ } static void ipi_call_interrupt(void) diff --git a/trunk/arch/mips/kernel/vmlinux.lds.S b/trunk/arch/mips/kernel/vmlinux.lds.S index 832afbb87588..e4b0b0bec039 100644 --- a/trunk/arch/mips/kernel/vmlinux.lds.S +++ b/trunk/arch/mips/kernel/vmlinux.lds.S @@ -74,6 +74,7 @@ SECTIONS INIT_TASK_DATA(PAGE_SIZE) NOSAVE_DATA CACHELINE_ALIGNED_DATA(1 << CONFIG_MIPS_L1_CACHE_SHIFT) + READ_MOSTLY_DATA(1 << CONFIG_MIPS_L1_CACHE_SHIFT) DATA_DATA CONSTRUCTORS } diff --git a/trunk/arch/mips/loongson/common/env.c b/trunk/arch/mips/loongson/common/env.c index 11b193f848f8..d93830ad6113 100644 --- a/trunk/arch/mips/loongson/common/env.c +++ b/trunk/arch/mips/loongson/common/env.c @@ -29,9 +29,10 @@ unsigned long memsize, highmemsize; #define parse_even_earlier(res, option, p) \ do { \ - int ret; \ + unsigned int tmp __maybe_unused; \ + \ if (strncmp(option, (char *)p, strlen(option)) == 0) \ - ret = strict_strtol((char *)p + strlen(option"="), 10, &res); \ + tmp = strict_strtol((char *)p + strlen(option"="), 10, &res); \ } while (0) void __init prom_init_env(void) diff --git a/trunk/arch/mips/mm/c-r4k.c b/trunk/arch/mips/mm/c-r4k.c index b4923a75cb4b..71bddf8f7d25 100644 --- a/trunk/arch/mips/mm/c-r4k.c +++ b/trunk/arch/mips/mm/c-r4k.c @@ -1075,7 +1075,6 @@ static int __cpuinit probe_scache(void) unsigned long flags, addr, begin, end, pow2; unsigned int config = read_c0_config(); struct cpuinfo_mips *c = ¤t_cpu_data; - int tmp; if (config & CONF_SC) return 0; @@ -1108,7 +1107,6 @@ static int __cpuinit probe_scache(void) /* Now search for the wrap around point. */ pow2 = (128 * 1024); - tmp = 0; for (addr = begin + (128 * 1024); addr < end; addr = begin + pow2) { cache_op(Index_Load_Tag_SD, addr); __asm__ __volatile__("nop; nop; nop; nop;"); /* hazard... */ diff --git a/trunk/arch/mips/mm/tlbex.c b/trunk/arch/mips/mm/tlbex.c index 5ef294fbb6e7..f5734c2c8097 100644 --- a/trunk/arch/mips/mm/tlbex.c +++ b/trunk/arch/mips/mm/tlbex.c @@ -1151,8 +1151,8 @@ static void __cpuinit build_r4000_tlb_refill_handler(void) struct uasm_reloc *r = relocs; u32 *f; unsigned int final_len; - struct mips_huge_tlb_info htlb_info; - enum vmalloc64_mode vmalloc_mode; + struct mips_huge_tlb_info htlb_info __maybe_unused; + enum vmalloc64_mode vmalloc_mode __maybe_unused; memset(tlb_handler, 0, sizeof(tlb_handler)); memset(labels, 0, sizeof(labels)); diff --git a/trunk/arch/mips/mti-malta/malta-init.c b/trunk/arch/mips/mti-malta/malta-init.c index 414f0c99b196..31180c321a1a 100644 --- a/trunk/arch/mips/mti-malta/malta-init.c +++ b/trunk/arch/mips/mti-malta/malta-init.c @@ -193,8 +193,6 @@ extern struct plat_smp_ops msmtc_smp_ops; void __init prom_init(void) { - int result; - prom_argc = fw_arg0; _prom_argv = (int *) fw_arg1; _prom_envp = (int *) fw_arg2; @@ -360,20 +358,14 @@ void __init prom_init(void) #ifdef CONFIG_SERIAL_8250_CONSOLE console_config(); #endif - /* Early detection of CMP support */ - result = gcmp_probe(GCMP_BASE_ADDR, GCMP_ADDRSPACE_SZ); - #ifdef CONFIG_MIPS_CMP - if (result) + /* Early detection of CMP support */ + if (gcmp_probe(GCMP_BASE_ADDR, GCMP_ADDRSPACE_SZ)) register_smp_ops(&cmp_smp_ops); + else #endif #ifdef CONFIG_MIPS_MT_SMP -#ifdef CONFIG_MIPS_CMP - if (!result) register_smp_ops(&vsmp_smp_ops); -#else - register_smp_ops(&vsmp_smp_ops); -#endif #endif #ifdef CONFIG_MIPS_MT_SMTC register_smp_ops(&msmtc_smp_ops); diff --git a/trunk/arch/mips/mti-malta/malta-int.c b/trunk/arch/mips/mti-malta/malta-int.c index 7d93e6fbfa5a..e85c977328da 100644 --- a/trunk/arch/mips/mti-malta/malta-int.c +++ b/trunk/arch/mips/mti-malta/malta-int.c @@ -56,7 +56,6 @@ static DEFINE_RAW_SPINLOCK(mips_irq_lock); static inline int mips_pcibios_iack(void) { int irq; - u32 dummy; /* * Determine highest priority pending interrupt by performing @@ -83,7 +82,7 @@ static inline int mips_pcibios_iack(void) BONITO_PCIMAP_CFG = 0x20000; /* Flush Bonito register block */ - dummy = BONITO_PCIMAP_CFG; + (void) BONITO_PCIMAP_CFG; iob(); /* sync */ irq = __raw_readl((u32 *)_pcictrl_bonito_pcicfg); @@ -309,8 +308,6 @@ static void ipi_call_dispatch(void) static irqreturn_t ipi_resched_interrupt(int irq, void *dev_id) { - scheduler_ipi(); - return IRQ_HANDLED; } diff --git a/trunk/arch/mips/pmc-sierra/msp71xx/msp_irq_per.c b/trunk/arch/mips/pmc-sierra/msp71xx/msp_irq_per.c index f9b9dcdfa9dd..98fd0099d964 100644 --- a/trunk/arch/mips/pmc-sierra/msp71xx/msp_irq_per.c +++ b/trunk/arch/mips/pmc-sierra/msp71xx/msp_irq_per.c @@ -97,7 +97,7 @@ static int msp_per_irq_set_affinity(struct irq_data *d, static struct irq_chip msp_per_irq_controller = { .name = "MSP_PER", - .irq_enable = unmask_per_irq. + .irq_enable = unmask_per_irq, .irq_disable = mask_per_irq, .irq_ack = msp_per_irq_ack, #ifdef CONFIG_SMP diff --git a/trunk/arch/mips/pmc-sierra/yosemite/smp.c b/trunk/arch/mips/pmc-sierra/yosemite/smp.c index 2608752898c0..efc9e889b349 100644 --- a/trunk/arch/mips/pmc-sierra/yosemite/smp.c +++ b/trunk/arch/mips/pmc-sierra/yosemite/smp.c @@ -55,8 +55,6 @@ void titan_mailbox_irq(void) if (status & 0x2) smp_call_function_interrupt(); - if (status & 0x4) - scheduler_ipi(); break; case 1: @@ -65,8 +63,6 @@ void titan_mailbox_irq(void) if (status & 0x2) smp_call_function_interrupt(); - if (status & 0x4) - scheduler_ipi(); break; } } diff --git a/trunk/arch/mips/power/hibernate.S b/trunk/arch/mips/power/hibernate.S index dbb5c7b4b70f..f8a751c03282 100644 --- a/trunk/arch/mips/power/hibernate.S +++ b/trunk/arch/mips/power/hibernate.S @@ -35,7 +35,7 @@ LEAF(swsusp_arch_resume) 0: PTR_L t1, PBE_ADDRESS(t0) /* source */ PTR_L t2, PBE_ORIG_ADDRESS(t0) /* destination */ - PTR_ADDIU t3, t1, PAGE_SIZE + PTR_ADDU t3, t1, PAGE_SIZE 1: REG_L t8, (t1) REG_S t8, (t2) diff --git a/trunk/arch/mips/sgi-ip22/ip22-platform.c b/trunk/arch/mips/sgi-ip22/ip22-platform.c index deddbf0ebe5c..698904daf901 100644 --- a/trunk/arch/mips/sgi-ip22/ip22-platform.c +++ b/trunk/arch/mips/sgi-ip22/ip22-platform.c @@ -132,7 +132,7 @@ static struct platform_device eth1_device = { */ static int __init sgiseeq_devinit(void) { - unsigned int tmp; + unsigned int pbdma __maybe_unused; int res, i; eth0_pd.hpc = hpc3c0; @@ -151,7 +151,7 @@ static int __init sgiseeq_devinit(void) /* Second HPC is missing? */ if (ip22_is_fullhouse() || - get_dbe(tmp, (unsigned int *)&hpc3c1->pbdma[1])) + get_dbe(pbdma, (unsigned int *)&hpc3c1->pbdma[1])) return 0; sgimc->giopar |= SGIMC_GIOPAR_MASTEREXP1 | SGIMC_GIOPAR_EXP164 | diff --git a/trunk/arch/mips/sgi-ip22/ip22-time.c b/trunk/arch/mips/sgi-ip22/ip22-time.c index 603fc91c1030..1a94c9894188 100644 --- a/trunk/arch/mips/sgi-ip22/ip22-time.c +++ b/trunk/arch/mips/sgi-ip22/ip22-time.c @@ -32,7 +32,7 @@ static unsigned long dosample(void) { u32 ct0, ct1; - u8 msb, lsb; + u8 msb; /* Start the counter. */ sgint->tcword = (SGINT_TCWORD_CNT2 | SGINT_TCWORD_CALL | @@ -46,7 +46,7 @@ static unsigned long dosample(void) /* Latch and spin until top byte of counter2 is zero */ do { writeb(SGINT_TCWORD_CNT2 | SGINT_TCWORD_CLAT, &sgint->tcword); - lsb = readb(&sgint->tcnt2); + (void) readb(&sgint->tcnt2); msb = readb(&sgint->tcnt2); ct1 = read_c0_count(); } while (msb); diff --git a/trunk/arch/mips/sgi-ip27/ip27-hubio.c b/trunk/arch/mips/sgi-ip27/ip27-hubio.c index a1fa4abb3f6a..cd0d5b06cd83 100644 --- a/trunk/arch/mips/sgi-ip27/ip27-hubio.c +++ b/trunk/arch/mips/sgi-ip27/ip27-hubio.c @@ -29,7 +29,6 @@ unsigned long hub_pio_map(cnodeid_t cnode, xwidgetnum_t widget, unsigned long xtalk_addr, size_t size) { nasid_t nasid = COMPACT_TO_NASID_NODEID(cnode); - volatile hubreg_t junk; unsigned i; /* use small-window mapping if possible */ @@ -64,7 +63,7 @@ unsigned long hub_pio_map(cnodeid_t cnode, xwidgetnum_t widget, * after we write it. */ IIO_ITTE_PUT(nasid, i, HUB_PIO_MAP_TO_MEM, widget, xtalk_addr); - junk = HUB_L(IIO_ITTE_GET(nasid, i)); + (void) HUB_L(IIO_ITTE_GET(nasid, i)); return NODE_BWIN_BASE(nasid, widget) + (xtalk_addr % BWIN_SIZE); } diff --git a/trunk/arch/mips/sgi-ip27/ip27-irq.c b/trunk/arch/mips/sgi-ip27/ip27-irq.c index b18b04e48577..0a04603d577c 100644 --- a/trunk/arch/mips/sgi-ip27/ip27-irq.c +++ b/trunk/arch/mips/sgi-ip27/ip27-irq.c @@ -147,10 +147,8 @@ static void ip27_do_irq_mask0(void) #ifdef CONFIG_SMP if (pend0 & (1UL << CPU_RESCHED_A_IRQ)) { LOCAL_HUB_CLR_INTR(CPU_RESCHED_A_IRQ); - scheduler_ipi(); } else if (pend0 & (1UL << CPU_RESCHED_B_IRQ)) { LOCAL_HUB_CLR_INTR(CPU_RESCHED_B_IRQ); - scheduler_ipi(); } else if (pend0 & (1UL << CPU_CALL_A_IRQ)) { LOCAL_HUB_CLR_INTR(CPU_CALL_A_IRQ); smp_call_function_interrupt(); diff --git a/trunk/arch/mips/sgi-ip27/ip27-klnuma.c b/trunk/arch/mips/sgi-ip27/ip27-klnuma.c index c3d30a88daf3..1d1919a44e88 100644 --- a/trunk/arch/mips/sgi-ip27/ip27-klnuma.c +++ b/trunk/arch/mips/sgi-ip27/ip27-klnuma.c @@ -54,11 +54,8 @@ void __init setup_replication_mask(void) static __init void set_ktext_source(nasid_t client_nasid, nasid_t server_nasid) { - cnodeid_t client_cnode; kern_vars_t *kvp; - client_cnode = NASID_TO_COMPACT_NODEID(client_nasid); - kvp = &hub_data(client_nasid)->kern_vars; KERN_VARS_ADDR(client_nasid) = (unsigned long)kvp; diff --git a/trunk/arch/mips/sibyte/bcm1480/smp.c b/trunk/arch/mips/sibyte/bcm1480/smp.c index d667875be564..47b347c992ea 100644 --- a/trunk/arch/mips/sibyte/bcm1480/smp.c +++ b/trunk/arch/mips/sibyte/bcm1480/smp.c @@ -20,7 +20,6 @@ #include #include #include -#include #include #include @@ -190,8 +189,10 @@ void bcm1480_mailbox_interrupt(void) /* Clear the mailbox to clear the interrupt */ __raw_writeq(((u64)action)<<48, mailbox_0_clear_regs[cpu]); - if (action & SMP_RESCHEDULE_YOURSELF) - scheduler_ipi(); + /* + * Nothing to do for SMP_RESCHEDULE_YOURSELF; returning from the + * interrupt will do the reschedule for us + */ if (action & SMP_CALL_FUNCTION) smp_call_function_interrupt(); diff --git a/trunk/arch/mips/sibyte/sb1250/smp.c b/trunk/arch/mips/sibyte/sb1250/smp.c index 38e7f6bd7922..c00a5cb1128d 100644 --- a/trunk/arch/mips/sibyte/sb1250/smp.c +++ b/trunk/arch/mips/sibyte/sb1250/smp.c @@ -21,7 +21,6 @@ #include #include #include -#include #include #include @@ -178,8 +177,10 @@ void sb1250_mailbox_interrupt(void) /* Clear the mailbox to clear the interrupt */ ____raw_writeq(((u64)action) << 48, mailbox_clear_regs[cpu]); - if (action & SMP_RESCHEDULE_YOURSELF) - scheduler_ipi(); + /* + * Nothing to do for SMP_RESCHEDULE_YOURSELF; returning from the + * interrupt will do the reschedule for us + */ if (action & SMP_CALL_FUNCTION) smp_call_function_interrupt(); diff --git a/trunk/arch/mips/sni/time.c b/trunk/arch/mips/sni/time.c index c76151b56568..0904d4d30cb3 100644 --- a/trunk/arch/mips/sni/time.c +++ b/trunk/arch/mips/sni/time.c @@ -95,7 +95,7 @@ static void __init sni_a20r_timer_setup(void) static __init unsigned long dosample(void) { u32 ct0, ct1; - volatile u8 msb, lsb; + volatile u8 msb; /* Start the counter. */ outb_p(0x34, 0x43); @@ -108,7 +108,7 @@ static __init unsigned long dosample(void) /* Latch and spin until top byte of counter0 is zero */ do { outb(0x00, 0x43); - lsb = inb(0x40); + (void) inb(0x40); msb = inb(0x40); ct1 = read_c0_count(); } while (msb); diff --git a/trunk/arch/mn10300/kernel/smp.c b/trunk/arch/mn10300/kernel/smp.c index 83fb27912231..226c826a2194 100644 --- a/trunk/arch/mn10300/kernel/smp.c +++ b/trunk/arch/mn10300/kernel/smp.c @@ -494,11 +494,14 @@ void smp_send_stop(void) * @irq: The interrupt number. * @dev_id: The device ID. * + * We need do nothing here, since the scheduling will be effected on our way + * back through entry.S. + * * Returns IRQ_HANDLED to indicate we handled the interrupt successfully. */ static irqreturn_t smp_reschedule_interrupt(int irq, void *dev_id) { - scheduler_ipi(); + /* do nothing */ return IRQ_HANDLED; } diff --git a/trunk/arch/parisc/kernel/smp.c b/trunk/arch/parisc/kernel/smp.c index 828305f19cff..69d63d354ef0 100644 --- a/trunk/arch/parisc/kernel/smp.c +++ b/trunk/arch/parisc/kernel/smp.c @@ -155,7 +155,10 @@ ipi_interrupt(int irq, void *dev_id) case IPI_RESCHEDULE: smp_debug(100, KERN_DEBUG "CPU%d IPI_RESCHEDULE\n", this_cpu); - scheduler_ipi(); + /* + * Reschedule callback. Everything to be + * done is done by the interrupt return path. + */ break; case IPI_CALL_FUNC: diff --git a/trunk/arch/powerpc/kernel/smp.c b/trunk/arch/powerpc/kernel/smp.c index 9f9c204bef69..cbdbb14be4b0 100644 --- a/trunk/arch/powerpc/kernel/smp.c +++ b/trunk/arch/powerpc/kernel/smp.c @@ -116,7 +116,7 @@ void smp_message_recv(int msg) generic_smp_call_function_interrupt(); break; case PPC_MSG_RESCHEDULE: - scheduler_ipi(); + /* we notice need_resched on exit */ break; case PPC_MSG_CALL_FUNC_SINGLE: generic_smp_call_function_single_interrupt(); @@ -146,7 +146,7 @@ static irqreturn_t call_function_action(int irq, void *data) static irqreturn_t reschedule_action(int irq, void *data) { - scheduler_ipi(); + /* we just need the return path side effect of checking need_resched */ return IRQ_HANDLED; } diff --git a/trunk/arch/s390/include/asm/diag.h b/trunk/arch/s390/include/asm/diag.h index 72b2e2f2d32d..7e91c58072e2 100644 --- a/trunk/arch/s390/include/asm/diag.h +++ b/trunk/arch/s390/include/asm/diag.h @@ -9,9 +9,22 @@ #define _ASM_S390_DIAG_H /* - * Diagnose 10: Release pages + * Diagnose 10: Release page range */ -extern void diag10(unsigned long addr); +static inline void diag10_range(unsigned long start_pfn, unsigned long num_pfn) +{ + unsigned long start_addr, end_addr; + + start_addr = start_pfn << PAGE_SHIFT; + end_addr = (start_pfn + num_pfn - 1) << PAGE_SHIFT; + + asm volatile( + "0: diag %0,%1,0x10\n" + "1:\n" + EX_TABLE(0b, 1b) + EX_TABLE(1b, 1b) + : : "a" (start_addr), "a" (end_addr)); +} /* * Diagnose 14: Input spool file manipulation diff --git a/trunk/arch/s390/include/asm/mmu_context.h b/trunk/arch/s390/include/asm/mmu_context.h index a6f0e7cc9cde..8c277caa8d3a 100644 --- a/trunk/arch/s390/include/asm/mmu_context.h +++ b/trunk/arch/s390/include/asm/mmu_context.h @@ -23,7 +23,7 @@ static inline int init_new_context(struct task_struct *tsk, #ifdef CONFIG_64BIT mm->context.asce_bits |= _ASCE_TYPE_REGION3; #endif - if (current->mm->context.alloc_pgste) { + if (current->mm && current->mm->context.alloc_pgste) { /* * alloc_pgste indicates, that any NEW context will be created * with extended page tables. The old context is unchanged. The diff --git a/trunk/arch/s390/kernel/diag.c b/trunk/arch/s390/kernel/diag.c index c032d11da8a1..8237fc07ac79 100644 --- a/trunk/arch/s390/kernel/diag.c +++ b/trunk/arch/s390/kernel/diag.c @@ -8,27 +8,6 @@ #include #include -/* - * Diagnose 10: Release pages - */ -void diag10(unsigned long addr) -{ - if (addr >= 0x7ff00000) - return; - asm volatile( -#ifdef CONFIG_64BIT - " sam31\n" - " diag %0,%0,0x10\n" - "0: sam64\n" -#else - " diag %0,%0,0x10\n" - "0:\n" -#endif - EX_TABLE(0b, 0b) - : : "a" (addr)); -} -EXPORT_SYMBOL(diag10); - /* * Diagnose 14: Input spool file manipulation */ diff --git a/trunk/arch/s390/kernel/dis.c b/trunk/arch/s390/kernel/dis.c index c83726c9fe03..3d4a78fc1adc 100644 --- a/trunk/arch/s390/kernel/dis.c +++ b/trunk/arch/s390/kernel/dis.c @@ -672,6 +672,7 @@ static struct insn opcode_b2[] = { { "rp", 0x77, INSTR_S_RD }, { "stcke", 0x78, INSTR_S_RD }, { "sacf", 0x79, INSTR_S_RD }, + { "spp", 0x80, INSTR_S_RD }, { "stsi", 0x7d, INSTR_S_RD }, { "srnm", 0x99, INSTR_S_RD }, { "stfpc", 0x9c, INSTR_S_RD }, diff --git a/trunk/arch/s390/kernel/entry.S b/trunk/arch/s390/kernel/entry.S index 648f64239a9d..1b67fc6ebdc2 100644 --- a/trunk/arch/s390/kernel/entry.S +++ b/trunk/arch/s390/kernel/entry.S @@ -836,7 +836,7 @@ restart_base: stosm __SF_EMPTY(%r15),0x04 # now we can turn dat on basr %r14,0 l %r14,restart_addr-.(%r14) - br %r14 # branch to start_secondary + basr %r14,%r14 # branch to start_secondary restart_addr: .long start_secondary .align 8 diff --git a/trunk/arch/s390/kernel/entry64.S b/trunk/arch/s390/kernel/entry64.S index 9d3603d6c511..9fd864563499 100644 --- a/trunk/arch/s390/kernel/entry64.S +++ b/trunk/arch/s390/kernel/entry64.S @@ -841,7 +841,7 @@ restart_base: mvc __LC_SYSTEM_TIMER(8),__TI_system_timer(%r1) xc __LC_STEAL_TIMER(8),__LC_STEAL_TIMER stosm __SF_EMPTY(%r15),0x04 # now we can turn dat on - jg start_secondary + brasl %r14,start_secondary .align 8 restart_vtime: .long 0x7fffffff,0xffffffff diff --git a/trunk/arch/s390/kernel/smp.c b/trunk/arch/s390/kernel/smp.c index 63c7d9ff220d..63a97db83f96 100644 --- a/trunk/arch/s390/kernel/smp.c +++ b/trunk/arch/s390/kernel/smp.c @@ -165,12 +165,12 @@ static void do_ext_call_interrupt(unsigned int ext_int_code, kstat_cpu(smp_processor_id()).irqs[EXTINT_IPI]++; /* * handle bit signal external calls + * + * For the ec_schedule signal we have to do nothing. All the work + * is done automatically when we return from the interrupt. */ bits = xchg(&S390_lowcore.ext_call_fast, 0); - if (test_bit(ec_schedule, &bits)) - scheduler_ipi(); - if (test_bit(ec_call_function, &bits)) generic_smp_call_function_interrupt(); diff --git a/trunk/arch/s390/mm/cmm.c b/trunk/arch/s390/mm/cmm.c index c66ffd8dbbb7..1f1dba9dcf58 100644 --- a/trunk/arch/s390/mm/cmm.c +++ b/trunk/arch/s390/mm/cmm.c @@ -91,7 +91,7 @@ static long cmm_alloc_pages(long nr, long *counter, } else free_page((unsigned long) npa); } - diag10(addr); + diag10_range(addr >> PAGE_SHIFT, 1); pa->pages[pa->index++] = addr; (*counter)++; spin_unlock(&cmm_lock); diff --git a/trunk/arch/s390/oprofile/hwsampler.c b/trunk/arch/s390/oprofile/hwsampler.c index 4952872d6f0a..33cbd373cce4 100644 --- a/trunk/arch/s390/oprofile/hwsampler.c +++ b/trunk/arch/s390/oprofile/hwsampler.c @@ -1021,20 +1021,14 @@ int hwsampler_deallocate() return rc; } -long hwsampler_query_min_interval(void) +unsigned long hwsampler_query_min_interval(void) { - if (min_sampler_rate) - return min_sampler_rate; - else - return -EINVAL; + return min_sampler_rate; } -long hwsampler_query_max_interval(void) +unsigned long hwsampler_query_max_interval(void) { - if (max_sampler_rate) - return max_sampler_rate; - else - return -EINVAL; + return max_sampler_rate; } unsigned long hwsampler_get_sample_overflow_count(unsigned int cpu) diff --git a/trunk/arch/s390/oprofile/hwsampler.h b/trunk/arch/s390/oprofile/hwsampler.h index 8c72b59316b5..1912f3bb190c 100644 --- a/trunk/arch/s390/oprofile/hwsampler.h +++ b/trunk/arch/s390/oprofile/hwsampler.h @@ -102,8 +102,8 @@ int hwsampler_setup(void); int hwsampler_shutdown(void); int hwsampler_allocate(unsigned long sdbt, unsigned long sdb); int hwsampler_deallocate(void); -long hwsampler_query_min_interval(void); -long hwsampler_query_max_interval(void); +unsigned long hwsampler_query_min_interval(void); +unsigned long hwsampler_query_max_interval(void); int hwsampler_start_all(unsigned long interval); int hwsampler_stop_all(void); int hwsampler_deactivate(unsigned int cpu); diff --git a/trunk/arch/s390/oprofile/init.c b/trunk/arch/s390/oprofile/init.c index c63d7e58352b..5995e9bc72d9 100644 --- a/trunk/arch/s390/oprofile/init.c +++ b/trunk/arch/s390/oprofile/init.c @@ -145,15 +145,11 @@ static int oprofile_hwsampler_init(struct oprofile_operations *ops) * create hwsampler files only if hwsampler_setup() succeeds. */ oprofile_min_interval = hwsampler_query_min_interval(); - if (oprofile_min_interval < 0) { - oprofile_min_interval = 0; + if (oprofile_min_interval == 0) return -ENODEV; - } oprofile_max_interval = hwsampler_query_max_interval(); - if (oprofile_max_interval < 0) { - oprofile_max_interval = 0; + if (oprofile_max_interval == 0) return -ENODEV; - } if (oprofile_timer_init(ops)) return -ENODEV; diff --git a/trunk/arch/sh/kernel/smp.c b/trunk/arch/sh/kernel/smp.c index 6207561ea34a..509b36b45115 100644 --- a/trunk/arch/sh/kernel/smp.c +++ b/trunk/arch/sh/kernel/smp.c @@ -20,7 +20,6 @@ #include #include #include -#include #include #include #include @@ -324,7 +323,6 @@ void smp_message_recv(unsigned int msg) generic_smp_call_function_interrupt(); break; case SMP_MSG_RESCHEDULE: - scheduler_ipi(); break; case SMP_MSG_FUNCTION_SINGLE: generic_smp_call_function_single_interrupt(); diff --git a/trunk/arch/sparc/include/asm/topology_64.h b/trunk/arch/sparc/include/asm/topology_64.h index 8b9c556d630b..1c79f32734a0 100644 --- a/trunk/arch/sparc/include/asm/topology_64.h +++ b/trunk/arch/sparc/include/asm/topology_64.h @@ -65,10 +65,6 @@ static inline int pcibus_to_node(struct pci_bus *pbus) #define smt_capable() (sparc64_multi_core) #endif /* CONFIG_SMP */ -extern cpumask_t cpu_core_map[NR_CPUS]; -static inline const struct cpumask *cpu_coregroup_mask(int cpu) -{ - return &cpu_core_map[cpu]; -} +#define cpu_coregroup_mask(cpu) (&cpu_core_map[cpu]) #endif /* _ASM_SPARC64_TOPOLOGY_H */ diff --git a/trunk/arch/sparc/kernel/apc.c b/trunk/arch/sparc/kernel/apc.c index f679c57644d5..1e34f29e58bb 100644 --- a/trunk/arch/sparc/kernel/apc.c +++ b/trunk/arch/sparc/kernel/apc.c @@ -165,7 +165,7 @@ static int __devinit apc_probe(struct platform_device *op) return 0; } -static struct of_device_id __initdata apc_match[] = { +static struct of_device_id apc_match[] = { { .name = APC_OBPNAME, }, diff --git a/trunk/arch/sparc/kernel/pmc.c b/trunk/arch/sparc/kernel/pmc.c index 93d7b4465f8d..6a585d393580 100644 --- a/trunk/arch/sparc/kernel/pmc.c +++ b/trunk/arch/sparc/kernel/pmc.c @@ -69,7 +69,7 @@ static int __devinit pmc_probe(struct platform_device *op) return 0; } -static struct of_device_id __initdata pmc_match[] = { +static struct of_device_id pmc_match[] = { { .name = PMC_OBPNAME, }, diff --git a/trunk/arch/sparc/kernel/smp_32.c b/trunk/arch/sparc/kernel/smp_32.c index f95690c167b6..850a1360c0d6 100644 --- a/trunk/arch/sparc/kernel/smp_32.c +++ b/trunk/arch/sparc/kernel/smp_32.c @@ -53,6 +53,7 @@ cpumask_t smp_commenced_mask = CPU_MASK_NONE; void __cpuinit smp_store_cpu_info(int id) { int cpu_node; + int mid; cpu_data(id).udelay_val = loops_per_jiffy; @@ -60,10 +61,13 @@ void __cpuinit smp_store_cpu_info(int id) cpu_data(id).clock_tick = prom_getintdefault(cpu_node, "clock-frequency", 0); cpu_data(id).prom_node = cpu_node; - cpu_data(id).mid = cpu_get_hwmid(cpu_node); + mid = cpu_get_hwmid(cpu_node); - if (cpu_data(id).mid < 0) - panic("No MID found for CPU%d at node 0x%08d", id, cpu_node); + if (mid < 0) { + printk(KERN_NOTICE "No MID found for CPU%d at node 0x%08d", id, cpu_node); + mid = 0; + } + cpu_data(id).mid = mid; } void __init smp_cpus_done(unsigned int max_cpus) @@ -125,9 +129,7 @@ struct linux_prom_registers smp_penguin_ctable __cpuinitdata = { 0 }; void smp_send_reschedule(int cpu) { - /* - * XXX missing reschedule IPI, see scheduler_ipi() - */ + /* See sparc64 */ } void smp_send_stop(void) diff --git a/trunk/arch/sparc/kernel/smp_64.c b/trunk/arch/sparc/kernel/smp_64.c index 9478da7fdb3e..3e94a8c23238 100644 --- a/trunk/arch/sparc/kernel/smp_64.c +++ b/trunk/arch/sparc/kernel/smp_64.c @@ -1368,7 +1368,6 @@ void smp_send_reschedule(int cpu) void __irq_entry smp_receive_signal_client(int irq, struct pt_regs *regs) { clear_softint(1 << irq); - scheduler_ipi(); } /* This is a nop because we capture all other cpus diff --git a/trunk/arch/sparc/kernel/time_32.c b/trunk/arch/sparc/kernel/time_32.c index 4e236391b635..96046a4024c2 100644 --- a/trunk/arch/sparc/kernel/time_32.c +++ b/trunk/arch/sparc/kernel/time_32.c @@ -168,7 +168,7 @@ static int __devinit clock_probe(struct platform_device *op) return 0; } -static struct of_device_id __initdata clock_match[] = { +static struct of_device_id clock_match[] = { { .name = "eeprom", }, diff --git a/trunk/arch/sparc/lib/checksum_32.S b/trunk/arch/sparc/lib/checksum_32.S index 3632cb34e914..0084c3361e15 100644 --- a/trunk/arch/sparc/lib/checksum_32.S +++ b/trunk/arch/sparc/lib/checksum_32.S @@ -289,10 +289,16 @@ cc_end_cruft: /* Also, handle the alignment code out of band. */ cc_dword_align: - cmp %g1, 6 - bl,a ccte + cmp %g1, 16 + bge 1f + srl %g1, 1, %o3 +2: cmp %o3, 0 + be,a ccte andcc %g1, 0xf, %o3 - andcc %o0, 0x1, %g0 + andcc %o3, %o0, %g0 ! Check %o0 only (%o1 has the same last 2 bits) + be,a 2b + srl %o3, 1, %o3 +1: andcc %o0, 0x1, %g0 bne ccslow andcc %o0, 0x2, %g0 be 1f diff --git a/trunk/arch/tile/kernel/smp.c b/trunk/arch/tile/kernel/smp.c index c52224d5ed45..a4293102ef81 100644 --- a/trunk/arch/tile/kernel/smp.c +++ b/trunk/arch/tile/kernel/smp.c @@ -189,8 +189,12 @@ void flush_icache_range(unsigned long start, unsigned long end) /* Called when smp_send_reschedule() triggers IRQ_RESCHEDULE. */ static irqreturn_t handle_reschedule_ipi(int irq, void *token) { + /* + * Nothing to do here; when we return from interrupt, the + * rescheduling will occur there. But do bump the interrupt + * profiler count in the meantime. + */ __get_cpu_var(irq_stat).irq_resched_count++; - scheduler_ipi(); return IRQ_HANDLED; } diff --git a/trunk/arch/um/kernel/smp.c b/trunk/arch/um/kernel/smp.c index eefb107d2d73..106bf27e2a9a 100644 --- a/trunk/arch/um/kernel/smp.c +++ b/trunk/arch/um/kernel/smp.c @@ -173,7 +173,7 @@ void IPI_handler(int cpu) break; case 'R': - scheduler_ipi(); + set_tsk_need_resched(current); break; case 'S': diff --git a/trunk/arch/x86/include/asm/pgtable_types.h b/trunk/arch/x86/include/asm/pgtable_types.h index 7db7723d1f32..d56187c6b838 100644 --- a/trunk/arch/x86/include/asm/pgtable_types.h +++ b/trunk/arch/x86/include/asm/pgtable_types.h @@ -299,6 +299,7 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, /* Install a pte for a particular vaddr in kernel space. */ void set_pte_vaddr(unsigned long vaddr, pte_t pte); +extern void native_pagetable_reserve(u64 start, u64 end); #ifdef CONFIG_X86_32 extern void native_pagetable_setup_start(pgd_t *base); extern void native_pagetable_setup_done(pgd_t *base); diff --git a/trunk/arch/x86/include/asm/x86_init.h b/trunk/arch/x86/include/asm/x86_init.h index 643ebf2e2ad8..d3d859035af9 100644 --- a/trunk/arch/x86/include/asm/x86_init.h +++ b/trunk/arch/x86/include/asm/x86_init.h @@ -67,6 +67,17 @@ struct x86_init_oem { void (*banner)(void); }; +/** + * struct x86_init_mapping - platform specific initial kernel pagetable setup + * @pagetable_reserve: reserve a range of addresses for kernel pagetable usage + * + * For more details on the purpose of this hook, look in + * init_memory_mapping and the commit that added it. + */ +struct x86_init_mapping { + void (*pagetable_reserve)(u64 start, u64 end); +}; + /** * struct x86_init_paging - platform specific paging functions * @pagetable_setup_start: platform specific pre paging_init() call @@ -123,6 +134,7 @@ struct x86_init_ops { struct x86_init_mpparse mpparse; struct x86_init_irqs irqs; struct x86_init_oem oem; + struct x86_init_mapping mapping; struct x86_init_paging paging; struct x86_init_timers timers; struct x86_init_iommu iommu; diff --git a/trunk/arch/x86/kernel/smp.c b/trunk/arch/x86/kernel/smp.c index 013e7eba83bb..513deac7228d 100644 --- a/trunk/arch/x86/kernel/smp.c +++ b/trunk/arch/x86/kernel/smp.c @@ -194,13 +194,14 @@ static void native_stop_other_cpus(int wait) } /* - * Reschedule call back. + * Reschedule call back. Nothing to do, + * all the work is done automatically when + * we return from the interrupt. */ void smp_reschedule_interrupt(struct pt_regs *regs) { ack_APIC_irq(); inc_irq_stat(irq_resched_count); - scheduler_ipi(); /* * KVM uses this interrupt to force a cpu out of guest mode */ diff --git a/trunk/arch/x86/kernel/x86_init.c b/trunk/arch/x86/kernel/x86_init.c index c11514e9128b..75ef4b18e9b7 100644 --- a/trunk/arch/x86/kernel/x86_init.c +++ b/trunk/arch/x86/kernel/x86_init.c @@ -61,6 +61,10 @@ struct x86_init_ops x86_init __initdata = { .banner = default_banner, }, + .mapping = { + .pagetable_reserve = native_pagetable_reserve, + }, + .paging = { .pagetable_setup_start = native_pagetable_setup_start, .pagetable_setup_done = native_pagetable_setup_done, diff --git a/trunk/arch/x86/mm/init.c b/trunk/arch/x86/mm/init.c index 286d289b039b..37b8b0fe8320 100644 --- a/trunk/arch/x86/mm/init.c +++ b/trunk/arch/x86/mm/init.c @@ -81,6 +81,11 @@ static void __init find_early_table_space(unsigned long end, int use_pse, end, pgt_buf_start << PAGE_SHIFT, pgt_buf_top << PAGE_SHIFT); } +void __init native_pagetable_reserve(u64 start, u64 end) +{ + memblock_x86_reserve_range(start, end, "PGTABLE"); +} + struct map_range { unsigned long start; unsigned long end; @@ -272,9 +277,24 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, __flush_tlb_all(); + /* + * Reserve the kernel pagetable pages we used (pgt_buf_start - + * pgt_buf_end) and free the other ones (pgt_buf_end - pgt_buf_top) + * so that they can be reused for other purposes. + * + * On native it just means calling memblock_x86_reserve_range, on Xen it + * also means marking RW the pagetable pages that we allocated before + * but that haven't been used. + * + * In fact on xen we mark RO the whole range pgt_buf_start - + * pgt_buf_top, because we have to make sure that when + * init_memory_mapping reaches the pagetable pages area, it maps + * RO all the pagetable pages, including the ones that are beyond + * pgt_buf_end at that time. + */ if (!after_bootmem && pgt_buf_end > pgt_buf_start) - memblock_x86_reserve_range(pgt_buf_start << PAGE_SHIFT, - pgt_buf_end << PAGE_SHIFT, "PGTABLE"); + x86_init.mapping.pagetable_reserve(PFN_PHYS(pgt_buf_start), + PFN_PHYS(pgt_buf_end)); if (!after_bootmem) early_memtest(start, end); diff --git a/trunk/arch/x86/xen/mmu.c b/trunk/arch/x86/xen/mmu.c index 55c965b38c27..0684f3c74d53 100644 --- a/trunk/arch/x86/xen/mmu.c +++ b/trunk/arch/x86/xen/mmu.c @@ -1275,6 +1275,20 @@ static __init void xen_pagetable_setup_start(pgd_t *base) { } +static __init void xen_mapping_pagetable_reserve(u64 start, u64 end) +{ + /* reserve the range used */ + native_pagetable_reserve(start, end); + + /* set as RW the rest */ + printk(KERN_DEBUG "xen: setting RW the range %llx - %llx\n", end, + PFN_PHYS(pgt_buf_top)); + while (end < PFN_PHYS(pgt_buf_top)) { + make_lowmem_page_readwrite(__va(end)); + end += PAGE_SIZE; + } +} + static void xen_post_allocator_init(void); static __init void xen_pagetable_setup_done(pgd_t *base) @@ -1463,119 +1477,6 @@ static int xen_pgd_alloc(struct mm_struct *mm) return ret; } -#ifdef CONFIG_X86_64 -static __initdata u64 __last_pgt_set_rw = 0; -static __initdata u64 __pgt_buf_start = 0; -static __initdata u64 __pgt_buf_end = 0; -static __initdata u64 __pgt_buf_top = 0; -/* - * As a consequence of the commit: - * - * commit 4b239f458c229de044d6905c2b0f9fe16ed9e01e - * Author: Yinghai Lu - * Date: Fri Dec 17 16:58:28 2010 -0800 - * - * x86-64, mm: Put early page table high - * - * at some point init_memory_mapping is going to reach the pagetable pages - * area and map those pages too (mapping them as normal memory that falls - * in the range of addresses passed to init_memory_mapping as argument). - * Some of those pages are already pagetable pages (they are in the range - * pgt_buf_start-pgt_buf_end) therefore they are going to be mapped RO and - * everything is fine. - * Some of these pages are not pagetable pages yet (they fall in the range - * pgt_buf_end-pgt_buf_top; for example the page at pgt_buf_end) so they - * are going to be mapped RW. When these pages become pagetable pages and - * are hooked into the pagetable, xen will find that the guest has already - * a RW mapping of them somewhere and fail the operation. - * The reason Xen requires pagetables to be RO is that the hypervisor needs - * to verify that the pagetables are valid before using them. The validation - * operations are called "pinning". - * - * In order to fix the issue we mark all the pages in the entire range - * pgt_buf_start-pgt_buf_top as RO, however when the pagetable allocation - * is completed only the range pgt_buf_start-pgt_buf_end is reserved by - * init_memory_mapping. Hence the kernel is going to crash as soon as one - * of the pages in the range pgt_buf_end-pgt_buf_top is reused (b/c those - * ranges are RO). - * - * For this reason, 'mark_rw_past_pgt' is introduced which is called _after_ - * the init_memory_mapping has completed (in a perfect world we would - * call this function from init_memory_mapping, but lets ignore that). - * - * Because we are called _after_ init_memory_mapping the pgt_buf_[start, - * end,top] have all changed to new values (b/c init_memory_mapping - * is called and setting up another new page-table). Hence, the first time - * we enter this function, we save away the pgt_buf_start value and update - * the pgt_buf_[end,top]. - * - * When we detect that the "old" pgt_buf_start through pgt_buf_end - * PFNs have been reserved (so memblock_x86_reserve_range has been called), - * we immediately set out to RW the "old" pgt_buf_end through pgt_buf_top. - * - * And then we update those "old" pgt_buf_[end|top] with the new ones - * so that we can redo this on the next pagetable. - */ -static __init void mark_rw_past_pgt(void) { - - if (pgt_buf_end > pgt_buf_start) { - u64 addr, size; - - /* Save it away. */ - if (!__pgt_buf_start) { - __pgt_buf_start = pgt_buf_start; - __pgt_buf_end = pgt_buf_end; - __pgt_buf_top = pgt_buf_top; - return; - } - /* If we get the range that starts at __pgt_buf_end that means - * the range is reserved, and that in 'init_memory_mapping' - * the 'memblock_x86_reserve_range' has been called with the - * outdated __pgt_buf_start, __pgt_buf_end (the "new" - * pgt_buf_[start|end|top] refer now to a new pagetable. - * Note: we are called _after_ the pgt_buf_[..] have been - * updated.*/ - - addr = memblock_x86_find_in_range_size(PFN_PHYS(__pgt_buf_start), - &size, PAGE_SIZE); - - /* Still not reserved, meaning 'memblock_x86_reserve_range' - * hasn't been called yet. Update the _end and _top.*/ - if (addr == PFN_PHYS(__pgt_buf_start)) { - __pgt_buf_end = pgt_buf_end; - __pgt_buf_top = pgt_buf_top; - return; - } - - /* OK, the area is reserved, meaning it is time for us to - * set RW for the old end->top PFNs. */ - - /* ..unless we had already done this. */ - if (__pgt_buf_end == __last_pgt_set_rw) - return; - - addr = PFN_PHYS(__pgt_buf_end); - - /* set as RW the rest */ - printk(KERN_DEBUG "xen: setting RW the range %llx - %llx\n", - PFN_PHYS(__pgt_buf_end), PFN_PHYS(__pgt_buf_top)); - - while (addr < PFN_PHYS(__pgt_buf_top)) { - make_lowmem_page_readwrite(__va(addr)); - addr += PAGE_SIZE; - } - /* And update everything so that we are ready for the next - * pagetable (the one created for regions past 4GB) */ - __last_pgt_set_rw = __pgt_buf_end; - __pgt_buf_start = pgt_buf_start; - __pgt_buf_end = pgt_buf_end; - __pgt_buf_top = pgt_buf_top; - } - return; -} -#else -static __init void mark_rw_past_pgt(void) { } -#endif static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd) { #ifdef CONFIG_X86_64 @@ -1601,14 +1502,6 @@ static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte) { unsigned long pfn = pte_pfn(pte); - /* - * A bit of optimization. We do not need to call the workaround - * when xen_set_pte_init is called with a PTE with 0 as PFN. - * That is b/c the pagetable at that point are just being populated - * with empty values and we can save some cycles by not calling - * the 'memblock' code.*/ - if (pfn) - mark_rw_past_pgt(); /* * If the new pfn is within the range of the newly allocated * kernel pagetable, and it isn't being mapped into an @@ -2118,8 +2011,6 @@ __init void xen_ident_map_ISA(void) static __init void xen_post_allocator_init(void) { - mark_rw_past_pgt(); - #ifdef CONFIG_XEN_DEBUG pv_mmu_ops.make_pte = PV_CALLEE_SAVE(xen_make_pte_debug); #endif @@ -2228,6 +2119,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = { void __init xen_init_mmu_ops(void) { + x86_init.mapping.pagetable_reserve = xen_mapping_pagetable_reserve; x86_init.paging.pagetable_setup_start = xen_pagetable_setup_start; x86_init.paging.pagetable_setup_done = xen_pagetable_setup_done; pv_mmu_ops = xen_mmu_ops; diff --git a/trunk/arch/x86/xen/smp.c b/trunk/arch/x86/xen/smp.c index 762b46ab14d5..30612441ed99 100644 --- a/trunk/arch/x86/xen/smp.c +++ b/trunk/arch/x86/xen/smp.c @@ -46,12 +46,13 @@ static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id); static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id); /* - * Reschedule call back. + * Reschedule call back. Nothing to do, + * all the work is done automatically when + * we return from the interrupt. */ static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id) { inc_irq_stat(irq_resched_count); - scheduler_ipi(); return IRQ_HANDLED; } diff --git a/trunk/drivers/ata/libahci.c b/trunk/drivers/ata/libahci.c index ff9d832a163d..d38c40fe4ddb 100644 --- a/trunk/drivers/ata/libahci.c +++ b/trunk/drivers/ata/libahci.c @@ -561,27 +561,6 @@ void ahci_start_engine(struct ata_port *ap) { void __iomem *port_mmio = ahci_port_base(ap); u32 tmp; - u8 status; - - status = readl(port_mmio + PORT_TFDATA) & 0xFF; - - /* - * At end of section 10.1 of AHCI spec (rev 1.3), it states - * Software shall not set PxCMD.ST to 1 until it is determined - * that a functoinal device is present on the port as determined by - * PxTFD.STS.BSY=0, PxTFD.STS.DRQ=0 and PxSSTS.DET=3h - * - * Even though most AHCI host controllers work without this check, - * specific controller will fail under this condition - */ - if (status & (ATA_BUSY | ATA_DRQ)) - return; - else { - ahci_scr_read(&ap->link, SCR_STATUS, &tmp); - - if ((tmp & 0xf) != 0x3) - return; - } /* start DMA */ tmp = readl(port_mmio + PORT_CMD); diff --git a/trunk/drivers/ata/libata-eh.c b/trunk/drivers/ata/libata-eh.c index f26f2fe3480a..dad9fd660f37 100644 --- a/trunk/drivers/ata/libata-eh.c +++ b/trunk/drivers/ata/libata-eh.c @@ -3316,7 +3316,7 @@ static int ata_eh_set_lpm(struct ata_link *link, enum ata_lpm_policy policy, struct ata_eh_context *ehc = &link->eh_context; struct ata_device *dev, *link_dev = NULL, *lpm_dev = NULL; enum ata_lpm_policy old_policy = link->lpm_policy; - bool no_dipm = ap->flags & ATA_FLAG_NO_DIPM; + bool no_dipm = link->ap->flags & ATA_FLAG_NO_DIPM; unsigned int hints = ATA_LPM_EMPTY | ATA_LPM_HIPM; unsigned int err_mask; int rc; diff --git a/trunk/drivers/block/rbd.c b/trunk/drivers/block/rbd.c index 3e904717c1c0..9712fad82bc6 100644 --- a/trunk/drivers/block/rbd.c +++ b/trunk/drivers/block/rbd.c @@ -92,6 +92,8 @@ struct rbd_client { struct list_head node; }; +struct rbd_req_coll; + /* * a single io request */ @@ -100,6 +102,24 @@ struct rbd_request { struct bio *bio; /* cloned bio */ struct page **pages; /* list of used pages */ u64 len; + int coll_index; + struct rbd_req_coll *coll; +}; + +struct rbd_req_status { + int done; + int rc; + u64 bytes; +}; + +/* + * a collection of requests + */ +struct rbd_req_coll { + int total; + int num_done; + struct kref kref; + struct rbd_req_status status[0]; }; struct rbd_snap { @@ -416,6 +436,17 @@ static void rbd_put_client(struct rbd_device *rbd_dev) rbd_dev->client = NULL; } +/* + * Destroy requests collection + */ +static void rbd_coll_release(struct kref *kref) +{ + struct rbd_req_coll *coll = + container_of(kref, struct rbd_req_coll, kref); + + dout("rbd_coll_release %p\n", coll); + kfree(coll); +} /* * Create a new header structure, translate header format from the on-disk @@ -590,6 +621,14 @@ static u64 rbd_get_segment(struct rbd_image_header *header, return len; } +static int rbd_get_num_segments(struct rbd_image_header *header, + u64 ofs, u64 len) +{ + u64 start_seg = ofs >> header->obj_order; + u64 end_seg = (ofs + len - 1) >> header->obj_order; + return end_seg - start_seg + 1; +} + /* * bio helpers */ @@ -735,6 +774,50 @@ static void rbd_destroy_ops(struct ceph_osd_req_op *ops) kfree(ops); } +static void rbd_coll_end_req_index(struct request *rq, + struct rbd_req_coll *coll, + int index, + int ret, u64 len) +{ + struct request_queue *q; + int min, max, i; + + dout("rbd_coll_end_req_index %p index %d ret %d len %lld\n", + coll, index, ret, len); + + if (!rq) + return; + + if (!coll) { + blk_end_request(rq, ret, len); + return; + } + + q = rq->q; + + spin_lock_irq(q->queue_lock); + coll->status[index].done = 1; + coll->status[index].rc = ret; + coll->status[index].bytes = len; + max = min = coll->num_done; + while (max < coll->total && coll->status[max].done) + max++; + + for (i = min; istatus[i].rc, + coll->status[i].bytes); + coll->num_done++; + kref_put(&coll->kref, rbd_coll_release); + } + spin_unlock_irq(q->queue_lock); +} + +static void rbd_coll_end_req(struct rbd_request *req, + int ret, u64 len) +{ + rbd_coll_end_req_index(req->rq, req->coll, req->coll_index, ret, len); +} + /* * Send ceph osd request */ @@ -749,6 +832,8 @@ static int rbd_do_request(struct request *rq, int flags, struct ceph_osd_req_op *ops, int num_reply, + struct rbd_req_coll *coll, + int coll_index, void (*rbd_cb)(struct ceph_osd_request *req, struct ceph_msg *msg), struct ceph_osd_request **linger_req, @@ -763,12 +848,20 @@ static int rbd_do_request(struct request *rq, struct ceph_osd_request_head *reqhead; struct rbd_image_header *header = &dev->header; - ret = -ENOMEM; req_data = kzalloc(sizeof(*req_data), GFP_NOIO); - if (!req_data) - goto done; + if (!req_data) { + if (coll) + rbd_coll_end_req_index(rq, coll, coll_index, + -ENOMEM, len); + return -ENOMEM; + } - dout("rbd_do_request len=%lld ofs=%lld\n", len, ofs); + if (coll) { + req_data->coll = coll; + req_data->coll_index = coll_index; + } + + dout("rbd_do_request obj=%s ofs=%lld len=%lld\n", obj, len, ofs); down_read(&header->snap_rwsem); @@ -828,7 +921,8 @@ static int rbd_do_request(struct request *rq, ret = ceph_osdc_wait_request(&dev->client->osdc, req); if (ver) *ver = le64_to_cpu(req->r_reassert_version.version); - dout("reassert_ver=%lld\n", le64_to_cpu(req->r_reassert_version.version)); + dout("reassert_ver=%lld\n", + le64_to_cpu(req->r_reassert_version.version)); ceph_osdc_put_request(req); } return ret; @@ -837,10 +931,8 @@ static int rbd_do_request(struct request *rq, bio_chain_put(req_data->bio); ceph_osdc_put_request(req); done_pages: + rbd_coll_end_req(req_data, ret, len); kfree(req_data); -done: - if (rq) - blk_end_request(rq, ret, len); return ret; } @@ -874,7 +966,7 @@ static void rbd_req_cb(struct ceph_osd_request *req, struct ceph_msg *msg) bytes = req_data->len; } - blk_end_request(req_data->rq, rc, bytes); + rbd_coll_end_req(req_data, rc, bytes); if (req_data->bio) bio_chain_put(req_data->bio); @@ -934,6 +1026,7 @@ static int rbd_req_sync_op(struct rbd_device *dev, flags, ops, 2, + NULL, 0, NULL, linger_req, ver); if (ret < 0) @@ -959,7 +1052,9 @@ static int rbd_do_op(struct request *rq, u64 snapid, int opcode, int flags, int num_reply, u64 ofs, u64 len, - struct bio *bio) + struct bio *bio, + struct rbd_req_coll *coll, + int coll_index) { char *seg_name; u64 seg_ofs; @@ -995,7 +1090,10 @@ static int rbd_do_op(struct request *rq, flags, ops, num_reply, + coll, coll_index, rbd_req_cb, 0, NULL); + + rbd_destroy_ops(ops); done: kfree(seg_name); return ret; @@ -1008,13 +1106,15 @@ static int rbd_req_write(struct request *rq, struct rbd_device *rbd_dev, struct ceph_snap_context *snapc, u64 ofs, u64 len, - struct bio *bio) + struct bio *bio, + struct rbd_req_coll *coll, + int coll_index) { return rbd_do_op(rq, rbd_dev, snapc, CEPH_NOSNAP, CEPH_OSD_OP_WRITE, CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK, 2, - ofs, len, bio); + ofs, len, bio, coll, coll_index); } /* @@ -1024,14 +1124,16 @@ static int rbd_req_read(struct request *rq, struct rbd_device *rbd_dev, u64 snapid, u64 ofs, u64 len, - struct bio *bio) + struct bio *bio, + struct rbd_req_coll *coll, + int coll_index) { return rbd_do_op(rq, rbd_dev, NULL, (snapid ? snapid : CEPH_NOSNAP), CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ, 2, - ofs, len, bio); + ofs, len, bio, coll, coll_index); } /* @@ -1063,7 +1165,9 @@ static int rbd_req_sync_notify_ack(struct rbd_device *dev, { struct ceph_osd_req_op *ops; struct page **pages = NULL; - int ret = rbd_create_rw_ops(&ops, 1, CEPH_OSD_OP_NOTIFY_ACK, 0); + int ret; + + ret = rbd_create_rw_ops(&ops, 1, CEPH_OSD_OP_NOTIFY_ACK, 0); if (ret < 0) return ret; @@ -1077,6 +1181,7 @@ static int rbd_req_sync_notify_ack(struct rbd_device *dev, CEPH_OSD_FLAG_READ, ops, 1, + NULL, 0, rbd_simple_req_cb, 0, NULL); rbd_destroy_ops(ops); @@ -1274,6 +1379,20 @@ static int rbd_req_sync_exec(struct rbd_device *dev, return ret; } +static struct rbd_req_coll *rbd_alloc_coll(int num_reqs) +{ + struct rbd_req_coll *coll = + kzalloc(sizeof(struct rbd_req_coll) + + sizeof(struct rbd_req_status) * num_reqs, + GFP_ATOMIC); + + if (!coll) + return NULL; + coll->total = num_reqs; + kref_init(&coll->kref); + return coll; +} + /* * block device queue callback */ @@ -1291,6 +1410,8 @@ static void rbd_rq_fn(struct request_queue *q) bool do_write; int size, op_size = 0; u64 ofs; + int num_segs, cur_seg = 0; + struct rbd_req_coll *coll; /* peek at request from block layer */ if (!rq) @@ -1321,6 +1442,14 @@ static void rbd_rq_fn(struct request_queue *q) do_write ? "write" : "read", size, blk_rq_pos(rq) * 512ULL); + num_segs = rbd_get_num_segments(&rbd_dev->header, ofs, size); + coll = rbd_alloc_coll(num_segs); + if (!coll) { + spin_lock_irq(q->queue_lock); + __blk_end_request_all(rq, -ENOMEM); + goto next; + } + do { /* a bio clone to be passed down to OSD req */ dout("rq->bio->bi_vcnt=%d\n", rq->bio->bi_vcnt); @@ -1328,35 +1457,41 @@ static void rbd_rq_fn(struct request_queue *q) rbd_dev->header.block_name, ofs, size, NULL, NULL); + kref_get(&coll->kref); bio = bio_chain_clone(&rq_bio, &next_bio, &bp, op_size, GFP_ATOMIC); if (!bio) { - spin_lock_irq(q->queue_lock); - __blk_end_request_all(rq, -ENOMEM); - goto next; + rbd_coll_end_req_index(rq, coll, cur_seg, + -ENOMEM, op_size); + goto next_seg; } + /* init OSD command: write or read */ if (do_write) rbd_req_write(rq, rbd_dev, rbd_dev->header.snapc, ofs, - op_size, bio); + op_size, bio, + coll, cur_seg); else rbd_req_read(rq, rbd_dev, cur_snap_id(rbd_dev), ofs, - op_size, bio); + op_size, bio, + coll, cur_seg); +next_seg: size -= op_size; ofs += op_size; + cur_seg++; rq_bio = next_bio; } while (size > 0); + kref_put(&coll->kref, rbd_coll_release); if (bp) bio_pair_release(bp); - spin_lock_irq(q->queue_lock); next: rq = blk_fetch_request(q); diff --git a/trunk/drivers/gpu/drm/i915/i915_drv.c b/trunk/drivers/gpu/drm/i915/i915_drv.c index c34a8dd31d02..32d1b3e829c8 100644 --- a/trunk/drivers/gpu/drm/i915/i915_drv.c +++ b/trunk/drivers/gpu/drm/i915/i915_drv.c @@ -49,7 +49,7 @@ module_param_named(panel_ignore_lid, i915_panel_ignore_lid, int, 0600); unsigned int i915_powersave = 1; module_param_named(powersave, i915_powersave, int, 0600); -unsigned int i915_semaphores = 1; +unsigned int i915_semaphores = 0; module_param_named(semaphores, i915_semaphores, int, 0600); unsigned int i915_enable_rc6 = 0; diff --git a/trunk/drivers/gpu/drm/i915/intel_display.c b/trunk/drivers/gpu/drm/i915/intel_display.c index 373c2a005ec1..2166ee071ddb 100644 --- a/trunk/drivers/gpu/drm/i915/intel_display.c +++ b/trunk/drivers/gpu/drm/i915/intel_display.c @@ -5154,6 +5154,8 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc, I915_WRITE(DSPCNTR(plane), dspcntr); POSTING_READ(DSPCNTR(plane)); + if (!HAS_PCH_SPLIT(dev)) + intel_enable_plane(dev_priv, plane, pipe); ret = intel_pipe_set_base(crtc, x, y, old_fb); diff --git a/trunk/drivers/gpu/drm/nouveau/nouveau_sgdma.c b/trunk/drivers/gpu/drm/nouveau/nouveau_sgdma.c index 4bce801bc588..c77111eca6ac 100644 --- a/trunk/drivers/gpu/drm/nouveau/nouveau_sgdma.c +++ b/trunk/drivers/gpu/drm/nouveau/nouveau_sgdma.c @@ -42,7 +42,8 @@ nouveau_sgdma_populate(struct ttm_backend *be, unsigned long num_pages, nvbe->nr_pages = 0; while (num_pages--) { - if (dma_addrs[nvbe->nr_pages] != DMA_ERROR_CODE) { + /* this code path isn't called and is incorrect anyways */ + if (0) { /*dma_addrs[nvbe->nr_pages] != DMA_ERROR_CODE)*/ nvbe->pages[nvbe->nr_pages] = dma_addrs[nvbe->nr_pages]; nvbe->ttm_alloced[nvbe->nr_pages] = true; diff --git a/trunk/drivers/gpu/drm/radeon/ni.c b/trunk/drivers/gpu/drm/radeon/ni.c index 7aade20f63a8..3d8a7634bbe9 100644 --- a/trunk/drivers/gpu/drm/radeon/ni.c +++ b/trunk/drivers/gpu/drm/radeon/ni.c @@ -674,7 +674,7 @@ static void cayman_gpu_init(struct radeon_device *rdev) cc_rb_backend_disable = RREG32(CC_RB_BACKEND_DISABLE); cc_gc_shader_pipe_config = RREG32(CC_GC_SHADER_PIPE_CONFIG); - cgts_tcc_disable = RREG32(CGTS_TCC_DISABLE); + cgts_tcc_disable = 0xff000000; gc_user_rb_backend_disable = RREG32(GC_USER_RB_BACKEND_DISABLE); gc_user_shader_pipe_config = RREG32(GC_USER_SHADER_PIPE_CONFIG); cgts_user_tcc_disable = RREG32(CGTS_USER_TCC_DISABLE); @@ -871,7 +871,7 @@ static void cayman_gpu_init(struct radeon_device *rdev) smx_dc_ctl0 = RREG32(SMX_DC_CTL0); smx_dc_ctl0 &= ~NUMBER_OF_SETS(0x1ff); - smx_dc_ctl0 |= NUMBER_OF_SETS(rdev->config.evergreen.sx_num_of_sets); + smx_dc_ctl0 |= NUMBER_OF_SETS(rdev->config.cayman.sx_num_of_sets); WREG32(SMX_DC_CTL0, smx_dc_ctl0); WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4) | CRC_SIMD_ID_WADDR_DISABLE); @@ -887,20 +887,20 @@ static void cayman_gpu_init(struct radeon_device *rdev) WREG32(TA_CNTL_AUX, DISABLE_CUBE_ANISO); - WREG32(SX_EXPORT_BUFFER_SIZES, (COLOR_BUFFER_SIZE((rdev->config.evergreen.sx_max_export_size / 4) - 1) | - POSITION_BUFFER_SIZE((rdev->config.evergreen.sx_max_export_pos_size / 4) - 1) | - SMX_BUFFER_SIZE((rdev->config.evergreen.sx_max_export_smx_size / 4) - 1))); + WREG32(SX_EXPORT_BUFFER_SIZES, (COLOR_BUFFER_SIZE((rdev->config.cayman.sx_max_export_size / 4) - 1) | + POSITION_BUFFER_SIZE((rdev->config.cayman.sx_max_export_pos_size / 4) - 1) | + SMX_BUFFER_SIZE((rdev->config.cayman.sx_max_export_smx_size / 4) - 1))); - WREG32(PA_SC_FIFO_SIZE, (SC_PRIM_FIFO_SIZE(rdev->config.evergreen.sc_prim_fifo_size) | - SC_HIZ_TILE_FIFO_SIZE(rdev->config.evergreen.sc_hiz_tile_fifo_size) | - SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.evergreen.sc_earlyz_tile_fifo_size))); + WREG32(PA_SC_FIFO_SIZE, (SC_PRIM_FIFO_SIZE(rdev->config.cayman.sc_prim_fifo_size) | + SC_HIZ_TILE_FIFO_SIZE(rdev->config.cayman.sc_hiz_tile_fifo_size) | + SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cayman.sc_earlyz_tile_fifo_size))); WREG32(VGT_NUM_INSTANCES, 1); WREG32(CP_PERFMON_CNTL, 0); - WREG32(SQ_MS_FIFO_SIZES, (CACHE_FIFO_SIZE(16 * rdev->config.evergreen.sq_num_cf_insts) | + WREG32(SQ_MS_FIFO_SIZES, (CACHE_FIFO_SIZE(16 * rdev->config.cayman.sq_num_cf_insts) | FETCH_FIFO_HIWATER(0x4) | DONE_FIFO_HIWATER(0xe0) | ALU_UPDATE_FIFO_HIWATER(0x8))); diff --git a/trunk/drivers/gpu/drm/radeon/radeon_gart.c b/trunk/drivers/gpu/drm/radeon/radeon_gart.c index 8a955bbdb608..a533f52fd163 100644 --- a/trunk/drivers/gpu/drm/radeon/radeon_gart.c +++ b/trunk/drivers/gpu/drm/radeon/radeon_gart.c @@ -181,9 +181,9 @@ int radeon_gart_bind(struct radeon_device *rdev, unsigned offset, p = t / (PAGE_SIZE / RADEON_GPU_PAGE_SIZE); for (i = 0; i < pages; i++, p++) { - /* On TTM path, we only use the DMA API if TTM_PAGE_FLAG_DMA32 - * is requested. */ - if (dma_addr[i] != DMA_ERROR_CODE) { + /* we reverted the patch using dma_addr in TTM for now but this + * code stops building on alpha so just comment it out for now */ + if (0) { /*dma_addr[i] != DMA_ERROR_CODE) */ rdev->gart.ttm_alloced[p] = true; rdev->gart.pages_addr[p] = dma_addr[i]; } else { diff --git a/trunk/drivers/i2c/busses/i2c-pnx.c b/trunk/drivers/i2c/busses/i2c-pnx.c index a97e3fec8148..04be9f82e14b 100644 --- a/trunk/drivers/i2c/busses/i2c-pnx.c +++ b/trunk/drivers/i2c/busses/i2c-pnx.c @@ -65,7 +65,7 @@ static inline void i2c_pnx_arm_timer(struct i2c_pnx_algo_data *alg_data) jiffies, expires); timer->expires = jiffies + expires; - timer->data = (unsigned long)&alg_data; + timer->data = (unsigned long)alg_data; add_timer(timer); } diff --git a/trunk/drivers/input/touchscreen/ads7846.c b/trunk/drivers/input/touchscreen/ads7846.c index c24946f51256..1de1c19dad30 100644 --- a/trunk/drivers/input/touchscreen/ads7846.c +++ b/trunk/drivers/input/touchscreen/ads7846.c @@ -281,17 +281,24 @@ struct ser_req { u8 command; u8 ref_off; u16 scratch; - __be16 sample; struct spi_message msg; struct spi_transfer xfer[6]; + /* + * DMA (thus cache coherency maintenance) requires the + * transfer buffers to live in their own cache lines. + */ + __be16 sample ____cacheline_aligned; }; struct ads7845_ser_req { u8 command[3]; - u8 pwrdown[3]; - u8 sample[3]; struct spi_message msg; struct spi_transfer xfer[2]; + /* + * DMA (thus cache coherency maintenance) requires the + * transfer buffers to live in their own cache lines. + */ + u8 sample[3] ____cacheline_aligned; }; static int ads7846_read12_ser(struct device *dev, unsigned command) diff --git a/trunk/drivers/mfd/asic3.c b/trunk/drivers/mfd/asic3.c index d4a851c6b5bf..0b4d5b23bec9 100644 --- a/trunk/drivers/mfd/asic3.c +++ b/trunk/drivers/mfd/asic3.c @@ -144,7 +144,7 @@ static void asic3_irq_demux(unsigned int irq, struct irq_desc *desc) int iter, i; unsigned long flags; - data->chip->irq_ack(irq_data); + data->chip->irq_ack(data); for (iter = 0 ; iter < MAX_ASIC_ISR_LOOPS; iter++) { u32 status; diff --git a/trunk/drivers/mfd/omap-usb-host.c b/trunk/drivers/mfd/omap-usb-host.c index 2e165117457b..3ab9ffa00aad 100644 --- a/trunk/drivers/mfd/omap-usb-host.c +++ b/trunk/drivers/mfd/omap-usb-host.c @@ -717,14 +717,14 @@ static int usbhs_enable(struct device *dev) gpio_request(pdata->ehci_data->reset_gpio_port[0], "USB1 PHY reset"); gpio_direction_output - (pdata->ehci_data->reset_gpio_port[0], 1); + (pdata->ehci_data->reset_gpio_port[0], 0); } if (gpio_is_valid(pdata->ehci_data->reset_gpio_port[1])) { gpio_request(pdata->ehci_data->reset_gpio_port[1], "USB2 PHY reset"); gpio_direction_output - (pdata->ehci_data->reset_gpio_port[1], 1); + (pdata->ehci_data->reset_gpio_port[1], 0); } /* Hold the PHY in RESET for enough time till DIR is high */ @@ -904,11 +904,11 @@ static int usbhs_enable(struct device *dev) if (gpio_is_valid(pdata->ehci_data->reset_gpio_port[0])) gpio_set_value - (pdata->ehci_data->reset_gpio_port[0], 0); + (pdata->ehci_data->reset_gpio_port[0], 1); if (gpio_is_valid(pdata->ehci_data->reset_gpio_port[1])) gpio_set_value - (pdata->ehci_data->reset_gpio_port[1], 0); + (pdata->ehci_data->reset_gpio_port[1], 1); } end_count: diff --git a/trunk/drivers/mfd/twl4030-power.c b/trunk/drivers/mfd/twl4030-power.c index 16422de0823a..2c0d4d16491a 100644 --- a/trunk/drivers/mfd/twl4030-power.c +++ b/trunk/drivers/mfd/twl4030-power.c @@ -447,12 +447,13 @@ static int __init load_twl4030_script(struct twl4030_script *tscript, if (err) goto out; } - if (tscript->flags & TWL4030_SLEEP_SCRIPT) + if (tscript->flags & TWL4030_SLEEP_SCRIPT) { if (order) pr_warning("TWL4030: Bad order of scripts (sleep "\ "script before wakeup) Leads to boot"\ "failure on some boards\n"); err = twl4030_config_sleep_sequence(address); + } out: return err; } diff --git a/trunk/drivers/net/Kconfig b/trunk/drivers/net/Kconfig index dc280bc8eba2..6c884ef1b069 100644 --- a/trunk/drivers/net/Kconfig +++ b/trunk/drivers/net/Kconfig @@ -2536,7 +2536,7 @@ config S6GMAC source "drivers/net/stmmac/Kconfig" config PCH_GBE - tristate "PCH Gigabit Ethernet" + tristate "Intel EG20T PCH / OKI SEMICONDUCTOR ML7223 IOH GbE" depends on PCI select MII ---help--- @@ -2548,6 +2548,12 @@ config PCH_GBE to Gigabit Ethernet. This driver enables Gigabit Ethernet function. + This driver also can be used for OKI SEMICONDUCTOR IOH(Input/ + Output Hub), ML7223. + ML7223 IOH is for MP(Media Phone) use. + ML7223 is companion chip for Intel Atom E6xx series. + ML7223 is completely compatible for Intel EG20T PCH. + endif # NETDEV_1000 # diff --git a/trunk/drivers/net/Makefile b/trunk/drivers/net/Makefile index 01b604ad155e..e5a7375685ad 100644 --- a/trunk/drivers/net/Makefile +++ b/trunk/drivers/net/Makefile @@ -144,7 +144,7 @@ obj-$(CONFIG_NE3210) += ne3210.o 8390.o obj-$(CONFIG_SB1250_MAC) += sb1250-mac.o obj-$(CONFIG_B44) += b44.o obj-$(CONFIG_FORCEDETH) += forcedeth.o -obj-$(CONFIG_NE_H8300) += ne-h8300.o 8390.o +obj-$(CONFIG_NE_H8300) += ne-h8300.o obj-$(CONFIG_AX88796) += ax88796.o obj-$(CONFIG_BCM63XX_ENET) += bcm63xx_enet.o obj-$(CONFIG_FTMAC100) += ftmac100.o @@ -219,7 +219,7 @@ obj-$(CONFIG_SC92031) += sc92031.o obj-$(CONFIG_LP486E) += lp486e.o obj-$(CONFIG_ETH16I) += eth16i.o -obj-$(CONFIG_ZORRO8390) += zorro8390.o 8390.o +obj-$(CONFIG_ZORRO8390) += zorro8390.o obj-$(CONFIG_HPLANCE) += hplance.o 7990.o obj-$(CONFIG_MVME147_NET) += mvme147.o 7990.o obj-$(CONFIG_EQUALIZER) += eql.o @@ -231,7 +231,7 @@ obj-$(CONFIG_SGI_IOC3_ETH) += ioc3-eth.o obj-$(CONFIG_DECLANCE) += declance.o obj-$(CONFIG_ATARILANCE) += atarilance.o obj-$(CONFIG_A2065) += a2065.o -obj-$(CONFIG_HYDRA) += hydra.o 8390.o +obj-$(CONFIG_HYDRA) += hydra.o obj-$(CONFIG_ARIADNE) += ariadne.o obj-$(CONFIG_CS89x0) += cs89x0.o obj-$(CONFIG_MACSONIC) += macsonic.o diff --git a/trunk/drivers/net/arm/etherh.c b/trunk/drivers/net/arm/etherh.c index 4af235d41fda..fbfb5b47c506 100644 --- a/trunk/drivers/net/arm/etherh.c +++ b/trunk/drivers/net/arm/etherh.c @@ -527,7 +527,7 @@ static void __init etherh_banner(void) * Read the ethernet address string from the on board rom. * This is an ascii string... */ -static int __init etherh_addr(char *addr, struct expansion_card *ec) +static int __devinit etherh_addr(char *addr, struct expansion_card *ec) { struct in_chunk_dir cd; char *s; @@ -655,7 +655,7 @@ static const struct net_device_ops etherh_netdev_ops = { static u32 etherh_regoffsets[16]; static u32 etherm_regoffsets[16]; -static int __init +static int __devinit etherh_probe(struct expansion_card *ec, const struct ecard_id *id) { const struct etherh_data *data = id->data; diff --git a/trunk/drivers/net/benet/be.h b/trunk/drivers/net/benet/be.h index 66823eded7a3..2353eca32593 100644 --- a/trunk/drivers/net/benet/be.h +++ b/trunk/drivers/net/benet/be.h @@ -213,7 +213,7 @@ struct be_rx_stats { struct be_rx_compl_info { u32 rss_hash; - u16 vid; + u16 vlan_tag; u16 pkt_size; u16 rxq_idx; u16 mac_id; diff --git a/trunk/drivers/net/benet/be_cmds.c b/trunk/drivers/net/benet/be_cmds.c index 1e2d825bb94a..9dc9394fd4ca 100644 --- a/trunk/drivers/net/benet/be_cmds.c +++ b/trunk/drivers/net/benet/be_cmds.c @@ -132,7 +132,7 @@ static void be_async_grp5_pvid_state_process(struct be_adapter *adapter, struct be_async_event_grp5_pvid_state *evt) { if (evt->enabled) - adapter->pvid = evt->tag; + adapter->pvid = le16_to_cpu(evt->tag); else adapter->pvid = 0; } diff --git a/trunk/drivers/net/benet/be_main.c b/trunk/drivers/net/benet/be_main.c index 02a0443d1821..9187fb4e08f1 100644 --- a/trunk/drivers/net/benet/be_main.c +++ b/trunk/drivers/net/benet/be_main.c @@ -1018,7 +1018,8 @@ static void be_rx_compl_process(struct be_adapter *adapter, kfree_skb(skb); return; } - vlan_hwaccel_receive_skb(skb, adapter->vlan_grp, rxcp->vid); + vlan_hwaccel_receive_skb(skb, adapter->vlan_grp, + rxcp->vlan_tag); } else { netif_receive_skb(skb); } @@ -1076,7 +1077,8 @@ static void be_rx_compl_process_gro(struct be_adapter *adapter, if (likely(!rxcp->vlanf)) napi_gro_frags(&eq_obj->napi); else - vlan_gro_frags(&eq_obj->napi, adapter->vlan_grp, rxcp->vid); + vlan_gro_frags(&eq_obj->napi, adapter->vlan_grp, + rxcp->vlan_tag); } static void be_parse_rx_compl_v1(struct be_adapter *adapter, @@ -1102,7 +1104,8 @@ static void be_parse_rx_compl_v1(struct be_adapter *adapter, rxcp->pkt_type = AMAP_GET_BITS(struct amap_eth_rx_compl_v1, cast_enc, compl); rxcp->vtm = AMAP_GET_BITS(struct amap_eth_rx_compl_v1, vtm, compl); - rxcp->vid = AMAP_GET_BITS(struct amap_eth_rx_compl_v1, vlan_tag, compl); + rxcp->vlan_tag = AMAP_GET_BITS(struct amap_eth_rx_compl_v1, vlan_tag, + compl); } static void be_parse_rx_compl_v0(struct be_adapter *adapter, @@ -1128,7 +1131,8 @@ static void be_parse_rx_compl_v0(struct be_adapter *adapter, rxcp->pkt_type = AMAP_GET_BITS(struct amap_eth_rx_compl_v0, cast_enc, compl); rxcp->vtm = AMAP_GET_BITS(struct amap_eth_rx_compl_v0, vtm, compl); - rxcp->vid = AMAP_GET_BITS(struct amap_eth_rx_compl_v0, vlan_tag, compl); + rxcp->vlan_tag = AMAP_GET_BITS(struct amap_eth_rx_compl_v0, vlan_tag, + compl); } static struct be_rx_compl_info *be_rx_compl_get(struct be_rx_obj *rxo) @@ -1155,9 +1159,11 @@ static struct be_rx_compl_info *be_rx_compl_get(struct be_rx_obj *rxo) rxcp->vlanf = 0; if (!lancer_chip(adapter)) - rxcp->vid = swab16(rxcp->vid); + rxcp->vlan_tag = swab16(rxcp->vlan_tag); - if ((adapter->pvid == rxcp->vid) && !adapter->vlan_tag[rxcp->vid]) + if (((adapter->pvid & VLAN_VID_MASK) == + (rxcp->vlan_tag & VLAN_VID_MASK)) && + !adapter->vlan_tag[rxcp->vlan_tag]) rxcp->vlanf = 0; /* As the compl has been parsed, reset it; we wont touch it again */ diff --git a/trunk/drivers/net/bonding/bond_3ad.h b/trunk/drivers/net/bonding/bond_3ad.h index b28baff70864..01b8a6af275b 100644 --- a/trunk/drivers/net/bonding/bond_3ad.h +++ b/trunk/drivers/net/bonding/bond_3ad.h @@ -39,7 +39,7 @@ typedef struct mac_addr { u8 mac_addr_value[ETH_ALEN]; -} mac_addr_t; +} __packed mac_addr_t; enum { BOND_AD_STABLE = 0, @@ -134,12 +134,12 @@ typedef struct lacpdu { u8 tlv_type_terminator; // = terminator u8 terminator_length; // = 0 u8 reserved_50[50]; // = 0 -} lacpdu_t; +} __packed lacpdu_t; typedef struct lacpdu_header { struct ethhdr hdr; struct lacpdu lacpdu; -} lacpdu_header_t; +} __packed lacpdu_header_t; // Marker Protocol Data Unit(PDU) structure(43.5.3.2 in the 802.3ad standard) typedef struct bond_marker { @@ -155,12 +155,12 @@ typedef struct bond_marker { u8 tlv_type_terminator; // = 0x00 u8 terminator_length; // = 0x00 u8 reserved_90[90]; // = 0 -} bond_marker_t; +} __packed bond_marker_t; typedef struct bond_marker_header { struct ethhdr hdr; struct bond_marker marker; -} bond_marker_header_t; +} __packed bond_marker_header_t; #pragma pack() diff --git a/trunk/drivers/net/can/sja1000/sja1000.c b/trunk/drivers/net/can/sja1000/sja1000.c index a358ea9445a2..f501bba1fc6f 100644 --- a/trunk/drivers/net/can/sja1000/sja1000.c +++ b/trunk/drivers/net/can/sja1000/sja1000.c @@ -346,10 +346,10 @@ static void sja1000_rx(struct net_device *dev) | (priv->read_reg(priv, REG_ID2) >> 5); } + cf->can_dlc = get_can_dlc(fi & 0x0F); if (fi & FI_RTR) { id |= CAN_RTR_FLAG; } else { - cf->can_dlc = get_can_dlc(fi & 0x0F); for (i = 0; i < cf->can_dlc; i++) cf->data[i] = priv->read_reg(priv, dreg++); } diff --git a/trunk/drivers/net/can/slcan.c b/trunk/drivers/net/can/slcan.c index b423965a78d1..1b49df6b2470 100644 --- a/trunk/drivers/net/can/slcan.c +++ b/trunk/drivers/net/can/slcan.c @@ -583,7 +583,9 @@ static int slcan_open(struct tty_struct *tty) /* Done. We have linked the TTY line to a channel. */ rtnl_unlock(); tty->receive_room = 65536; /* We don't flow control */ - return sl->dev->base_addr; + + /* TTY layer expects 0 on success */ + return 0; err_free_chan: sl->tty = NULL; diff --git a/trunk/drivers/net/ehea/ehea_ethtool.c b/trunk/drivers/net/ehea/ehea_ethtool.c index 3e2e734fecb7..f3bbdcef338c 100644 --- a/trunk/drivers/net/ehea/ehea_ethtool.c +++ b/trunk/drivers/net/ehea/ehea_ethtool.c @@ -55,15 +55,20 @@ static int ehea_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) cmd->duplex = -1; } - cmd->supported = (SUPPORTED_10000baseT_Full | SUPPORTED_1000baseT_Full - | SUPPORTED_100baseT_Full | SUPPORTED_100baseT_Half - | SUPPORTED_10baseT_Full | SUPPORTED_10baseT_Half - | SUPPORTED_Autoneg | SUPPORTED_FIBRE); - - cmd->advertising = (ADVERTISED_10000baseT_Full | ADVERTISED_Autoneg - | ADVERTISED_FIBRE); + if (cmd->speed == SPEED_10000) { + cmd->supported = (SUPPORTED_10000baseT_Full | SUPPORTED_FIBRE); + cmd->advertising = (ADVERTISED_10000baseT_Full | ADVERTISED_FIBRE); + cmd->port = PORT_FIBRE; + } else { + cmd->supported = (SUPPORTED_1000baseT_Full | SUPPORTED_100baseT_Full + | SUPPORTED_100baseT_Half | SUPPORTED_10baseT_Full + | SUPPORTED_10baseT_Half | SUPPORTED_Autoneg + | SUPPORTED_TP); + cmd->advertising = (ADVERTISED_1000baseT_Full | ADVERTISED_Autoneg + | ADVERTISED_TP); + cmd->port = PORT_TP; + } - cmd->port = PORT_FIBRE; cmd->autoneg = port->autoneg == 1 ? AUTONEG_ENABLE : AUTONEG_DISABLE; return 0; diff --git a/trunk/drivers/net/ehea/ehea_main.c b/trunk/drivers/net/ehea/ehea_main.c index 53c0f04b1b23..cf79cf759e13 100644 --- a/trunk/drivers/net/ehea/ehea_main.c +++ b/trunk/drivers/net/ehea/ehea_main.c @@ -2688,9 +2688,6 @@ static int ehea_open(struct net_device *dev) netif_start_queue(dev); } - init_waitqueue_head(&port->swqe_avail_wq); - init_waitqueue_head(&port->restart_wq); - mutex_unlock(&port->port_lock); return ret; @@ -3276,6 +3273,9 @@ struct ehea_port *ehea_setup_single_port(struct ehea_adapter *adapter, INIT_WORK(&port->reset_task, ehea_reset_port); + init_waitqueue_head(&port->swqe_avail_wq); + init_waitqueue_head(&port->restart_wq); + ret = register_netdev(dev); if (ret) { pr_err("register_netdev failed. ret=%d\n", ret); diff --git a/trunk/drivers/net/hydra.c b/trunk/drivers/net/hydra.c index c5ef62ceb840..1cd481c04202 100644 --- a/trunk/drivers/net/hydra.c +++ b/trunk/drivers/net/hydra.c @@ -98,15 +98,15 @@ static const struct net_device_ops hydra_netdev_ops = { .ndo_open = hydra_open, .ndo_stop = hydra_close, - .ndo_start_xmit = ei_start_xmit, - .ndo_tx_timeout = ei_tx_timeout, - .ndo_get_stats = ei_get_stats, - .ndo_set_multicast_list = ei_set_multicast_list, + .ndo_start_xmit = __ei_start_xmit, + .ndo_tx_timeout = __ei_tx_timeout, + .ndo_get_stats = __ei_get_stats, + .ndo_set_multicast_list = __ei_set_multicast_list, .ndo_validate_addr = eth_validate_addr, - .ndo_set_mac_address = eth_mac_addr, + .ndo_set_mac_address = eth_mac_addr, .ndo_change_mtu = eth_change_mtu, #ifdef CONFIG_NET_POLL_CONTROLLER - .ndo_poll_controller = ei_poll, + .ndo_poll_controller = __ei_poll, #endif }; @@ -125,7 +125,7 @@ static int __devinit hydra_init(struct zorro_dev *z) 0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0x1e, }; - dev = alloc_ei_netdev(); + dev = ____alloc_ei_netdev(0); if (!dev) return -ENOMEM; diff --git a/trunk/drivers/net/ne-h8300.c b/trunk/drivers/net/ne-h8300.c index 30be8c634ebd..7298a34bc795 100644 --- a/trunk/drivers/net/ne-h8300.c +++ b/trunk/drivers/net/ne-h8300.c @@ -167,7 +167,7 @@ static void cleanup_card(struct net_device *dev) #ifndef MODULE struct net_device * __init ne_probe(int unit) { - struct net_device *dev = alloc_ei_netdev(); + struct net_device *dev = ____alloc_ei_netdev(0); int err; if (!dev) @@ -197,15 +197,15 @@ static const struct net_device_ops ne_netdev_ops = { .ndo_open = ne_open, .ndo_stop = ne_close, - .ndo_start_xmit = ei_start_xmit, - .ndo_tx_timeout = ei_tx_timeout, - .ndo_get_stats = ei_get_stats, - .ndo_set_multicast_list = ei_set_multicast_list, + .ndo_start_xmit = __ei_start_xmit, + .ndo_tx_timeout = __ei_tx_timeout, + .ndo_get_stats = __ei_get_stats, + .ndo_set_multicast_list = __ei_set_multicast_list, .ndo_validate_addr = eth_validate_addr, - .ndo_set_mac_address = eth_mac_addr, + .ndo_set_mac_address = eth_mac_addr, .ndo_change_mtu = eth_change_mtu, #ifdef CONFIG_NET_POLL_CONTROLLER - .ndo_poll_controller = ei_poll, + .ndo_poll_controller = __ei_poll, #endif }; @@ -637,7 +637,7 @@ int init_module(void) int err; for (this_dev = 0; this_dev < MAX_NE_CARDS; this_dev++) { - struct net_device *dev = alloc_ei_netdev(); + struct net_device *dev = ____alloc_ei_netdev(0); if (!dev) break; if (io[this_dev]) { diff --git a/trunk/drivers/net/pch_gbe/pch_gbe_main.c b/trunk/drivers/net/pch_gbe/pch_gbe_main.c index 2ef2f9cdefa6..56d049a472da 100644 --- a/trunk/drivers/net/pch_gbe/pch_gbe_main.c +++ b/trunk/drivers/net/pch_gbe/pch_gbe_main.c @@ -34,6 +34,10 @@ const char pch_driver_version[] = DRV_VERSION; #define PCH_GBE_COPYBREAK_DEFAULT 256 #define PCH_GBE_PCI_BAR 1 +/* Macros for ML7223 */ +#define PCI_VENDOR_ID_ROHM 0x10db +#define PCI_DEVICE_ID_ROHM_ML7223_GBE 0x8013 + #define PCH_GBE_TX_WEIGHT 64 #define PCH_GBE_RX_WEIGHT 64 #define PCH_GBE_RX_BUFFER_WRITE 16 @@ -43,8 +47,7 @@ const char pch_driver_version[] = DRV_VERSION; #define PCH_GBE_MAC_RGMII_CTRL_SETTING ( \ PCH_GBE_CHIP_TYPE_INTERNAL | \ - PCH_GBE_RGMII_MODE_RGMII | \ - PCH_GBE_CRS_SEL \ + PCH_GBE_RGMII_MODE_RGMII \ ) /* Ethertype field values */ @@ -1494,12 +1497,11 @@ pch_gbe_clean_rx(struct pch_gbe_adapter *adapter, /* Write meta date of skb */ skb_put(skb, length); skb->protocol = eth_type_trans(skb, netdev); - if ((tcp_ip_status & PCH_GBE_RXD_ACC_STAT_TCPIPOK) == - PCH_GBE_RXD_ACC_STAT_TCPIPOK) { - skb->ip_summed = CHECKSUM_UNNECESSARY; - } else { + if (tcp_ip_status & PCH_GBE_RXD_ACC_STAT_TCPIPOK) skb->ip_summed = CHECKSUM_NONE; - } + else + skb->ip_summed = CHECKSUM_UNNECESSARY; + napi_gro_receive(&adapter->napi, skb); (*work_done)++; pr_debug("Receive skb->ip_summed: %d length: %d\n", @@ -2420,6 +2422,13 @@ static DEFINE_PCI_DEVICE_TABLE(pch_gbe_pcidev_id) = { .class = (PCI_CLASS_NETWORK_ETHERNET << 8), .class_mask = (0xFFFF00) }, + {.vendor = PCI_VENDOR_ID_ROHM, + .device = PCI_DEVICE_ID_ROHM_ML7223_GBE, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .class = (PCI_CLASS_NETWORK_ETHERNET << 8), + .class_mask = (0xFFFF00) + }, /* required last entry */ {0} }; diff --git a/trunk/drivers/net/sfc/mcdi.c b/trunk/drivers/net/sfc/mcdi.c index d98479030ef2..3dd45ed61f0a 100644 --- a/trunk/drivers/net/sfc/mcdi.c +++ b/trunk/drivers/net/sfc/mcdi.c @@ -50,6 +50,20 @@ static inline struct efx_mcdi_iface *efx_mcdi(struct efx_nic *efx) return &nic_data->mcdi; } +static inline void +efx_mcdi_readd(struct efx_nic *efx, efx_dword_t *value, unsigned reg) +{ + struct siena_nic_data *nic_data = efx->nic_data; + value->u32[0] = (__force __le32)__raw_readl(nic_data->mcdi_smem + reg); +} + +static inline void +efx_mcdi_writed(struct efx_nic *efx, const efx_dword_t *value, unsigned reg) +{ + struct siena_nic_data *nic_data = efx->nic_data; + __raw_writel((__force u32)value->u32[0], nic_data->mcdi_smem + reg); +} + void efx_mcdi_init(struct efx_nic *efx) { struct efx_mcdi_iface *mcdi; @@ -70,8 +84,8 @@ static void efx_mcdi_copyin(struct efx_nic *efx, unsigned cmd, const u8 *inbuf, size_t inlen) { struct efx_mcdi_iface *mcdi = efx_mcdi(efx); - unsigned pdu = FR_CZ_MC_TREG_SMEM + MCDI_PDU(efx); - unsigned doorbell = FR_CZ_MC_TREG_SMEM + MCDI_DOORBELL(efx); + unsigned pdu = MCDI_PDU(efx); + unsigned doorbell = MCDI_DOORBELL(efx); unsigned int i; efx_dword_t hdr; u32 xflags, seqno; @@ -92,30 +106,28 @@ static void efx_mcdi_copyin(struct efx_nic *efx, unsigned cmd, MCDI_HEADER_SEQ, seqno, MCDI_HEADER_XFLAGS, xflags); - efx_writed(efx, &hdr, pdu); + efx_mcdi_writed(efx, &hdr, pdu); - for (i = 0; i < inlen; i += 4) { - _efx_writed(efx, *((__le32 *)(inbuf + i)), pdu + 4 + i); - /* use wmb() within loop to inhibit write combining */ - wmb(); - } + for (i = 0; i < inlen; i += 4) + efx_mcdi_writed(efx, (const efx_dword_t *)(inbuf + i), + pdu + 4 + i); /* ring the doorbell with a distinctive value */ - _efx_writed(efx, (__force __le32) 0x45789abc, doorbell); - wmb(); + EFX_POPULATE_DWORD_1(hdr, EFX_DWORD_0, 0x45789abc); + efx_mcdi_writed(efx, &hdr, doorbell); } static void efx_mcdi_copyout(struct efx_nic *efx, u8 *outbuf, size_t outlen) { struct efx_mcdi_iface *mcdi = efx_mcdi(efx); - unsigned int pdu = FR_CZ_MC_TREG_SMEM + MCDI_PDU(efx); + unsigned int pdu = MCDI_PDU(efx); int i; BUG_ON(atomic_read(&mcdi->state) == MCDI_STATE_QUIESCENT); BUG_ON(outlen & 3 || outlen >= 0x100); for (i = 0; i < outlen; i += 4) - *((__le32 *)(outbuf + i)) = _efx_readd(efx, pdu + 4 + i); + efx_mcdi_readd(efx, (efx_dword_t *)(outbuf + i), pdu + 4 + i); } static int efx_mcdi_poll(struct efx_nic *efx) @@ -123,7 +135,7 @@ static int efx_mcdi_poll(struct efx_nic *efx) struct efx_mcdi_iface *mcdi = efx_mcdi(efx); unsigned int time, finish; unsigned int respseq, respcmd, error; - unsigned int pdu = FR_CZ_MC_TREG_SMEM + MCDI_PDU(efx); + unsigned int pdu = MCDI_PDU(efx); unsigned int rc, spins; efx_dword_t reg; @@ -149,8 +161,7 @@ static int efx_mcdi_poll(struct efx_nic *efx) time = get_seconds(); - rmb(); - efx_readd(efx, ®, pdu); + efx_mcdi_readd(efx, ®, pdu); /* All 1's indicates that shared memory is in reset (and is * not a valid header). Wait for it to come out reset before @@ -177,7 +188,7 @@ static int efx_mcdi_poll(struct efx_nic *efx) respseq, mcdi->seqno); rc = EIO; } else if (error) { - efx_readd(efx, ®, pdu + 4); + efx_mcdi_readd(efx, ®, pdu + 4); switch (EFX_DWORD_FIELD(reg, EFX_DWORD_0)) { #define TRANSLATE_ERROR(name) \ case MC_CMD_ERR_ ## name: \ @@ -211,21 +222,21 @@ static int efx_mcdi_poll(struct efx_nic *efx) /* Test and clear MC-rebooted flag for this port/function */ int efx_mcdi_poll_reboot(struct efx_nic *efx) { - unsigned int addr = FR_CZ_MC_TREG_SMEM + MCDI_REBOOT_FLAG(efx); + unsigned int addr = MCDI_REBOOT_FLAG(efx); efx_dword_t reg; uint32_t value; if (efx_nic_rev(efx) < EFX_REV_SIENA_A0) return false; - efx_readd(efx, ®, addr); + efx_mcdi_readd(efx, ®, addr); value = EFX_DWORD_FIELD(reg, EFX_DWORD_0); if (value == 0) return 0; EFX_ZERO_DWORD(reg); - efx_writed(efx, ®, addr); + efx_mcdi_writed(efx, ®, addr); if (value == MC_STATUS_DWORD_ASSERT) return -EINTR; diff --git a/trunk/drivers/net/sfc/nic.h b/trunk/drivers/net/sfc/nic.h index a42db6e35be3..d91701abd331 100644 --- a/trunk/drivers/net/sfc/nic.h +++ b/trunk/drivers/net/sfc/nic.h @@ -143,10 +143,12 @@ static inline struct falcon_board *falcon_board(struct efx_nic *efx) /** * struct siena_nic_data - Siena NIC state * @mcdi: Management-Controller-to-Driver Interface + * @mcdi_smem: MCDI shared memory mapping. The mapping is always uncacheable. * @wol_filter_id: Wake-on-LAN packet filter id */ struct siena_nic_data { struct efx_mcdi_iface mcdi; + void __iomem *mcdi_smem; int wol_filter_id; }; diff --git a/trunk/drivers/net/sfc/siena.c b/trunk/drivers/net/sfc/siena.c index e4dd8986b1fe..837869b71db9 100644 --- a/trunk/drivers/net/sfc/siena.c +++ b/trunk/drivers/net/sfc/siena.c @@ -220,12 +220,26 @@ static int siena_probe_nic(struct efx_nic *efx) efx_reado(efx, ®, FR_AZ_CS_DEBUG); efx->net_dev->dev_id = EFX_OWORD_FIELD(reg, FRF_CZ_CS_PORT_NUM) - 1; + /* Initialise MCDI */ + nic_data->mcdi_smem = ioremap_nocache(efx->membase_phys + + FR_CZ_MC_TREG_SMEM, + FR_CZ_MC_TREG_SMEM_STEP * + FR_CZ_MC_TREG_SMEM_ROWS); + if (!nic_data->mcdi_smem) { + netif_err(efx, probe, efx->net_dev, + "could not map MCDI at %llx+%x\n", + (unsigned long long)efx->membase_phys + + FR_CZ_MC_TREG_SMEM, + FR_CZ_MC_TREG_SMEM_STEP * FR_CZ_MC_TREG_SMEM_ROWS); + rc = -ENOMEM; + goto fail1; + } efx_mcdi_init(efx); /* Recover from a failed assertion before probing */ rc = efx_mcdi_handle_assertion(efx); if (rc) - goto fail1; + goto fail2; /* Let the BMC know that the driver is now in charge of link and * filter settings. We must do this before we reset the NIC */ @@ -280,6 +294,7 @@ static int siena_probe_nic(struct efx_nic *efx) fail3: efx_mcdi_drv_attach(efx, false, NULL); fail2: + iounmap(nic_data->mcdi_smem); fail1: kfree(efx->nic_data); return rc; @@ -359,6 +374,8 @@ static int siena_init_nic(struct efx_nic *efx) static void siena_remove_nic(struct efx_nic *efx) { + struct siena_nic_data *nic_data = efx->nic_data; + efx_nic_free_buffer(efx, &efx->irq_status); siena_reset_hw(efx, RESET_TYPE_ALL); @@ -368,7 +385,8 @@ static void siena_remove_nic(struct efx_nic *efx) efx_mcdi_drv_attach(efx, false, NULL); /* Tear down the private nic state */ - kfree(efx->nic_data); + iounmap(nic_data->mcdi_smem); + kfree(nic_data); efx->nic_data = NULL; } @@ -606,8 +624,7 @@ struct efx_nic_type siena_a0_nic_type = { .default_mac_ops = &efx_mcdi_mac_operations, .revision = EFX_REV_SIENA_A0, - .mem_map_size = (FR_CZ_MC_TREG_SMEM + - FR_CZ_MC_TREG_SMEM_STEP * FR_CZ_MC_TREG_SMEM_ROWS), + .mem_map_size = FR_CZ_MC_TREG_SMEM, /* MC_TREG_SMEM mapped separately */ .txd_ptr_tbl_base = FR_BZ_TX_DESC_PTR_TBL, .rxd_ptr_tbl_base = FR_BZ_RX_DESC_PTR_TBL, .buf_tbl_base = FR_BZ_BUF_FULL_TBL, diff --git a/trunk/drivers/net/slip.c b/trunk/drivers/net/slip.c index 86cbb9ea2f26..8ec1a9a0bb9a 100644 --- a/trunk/drivers/net/slip.c +++ b/trunk/drivers/net/slip.c @@ -853,7 +853,9 @@ static int slip_open(struct tty_struct *tty) /* Done. We have linked the TTY line to a channel. */ rtnl_unlock(); tty->receive_room = 65536; /* We don't flow control */ - return sl->dev->base_addr; + + /* TTY layer expects 0 on success */ + return 0; err_free_bufs: sl_free_bufs(sl); diff --git a/trunk/drivers/net/usb/cdc_ether.c b/trunk/drivers/net/usb/cdc_ether.c index a301479ecc60..c924ea2bce07 100644 --- a/trunk/drivers/net/usb/cdc_ether.c +++ b/trunk/drivers/net/usb/cdc_ether.c @@ -567,7 +567,7 @@ static const struct usb_device_id products [] = { { USB_DEVICE_AND_INTERFACE_INFO(0x1004, 0x61aa, USB_CLASS_COMM, USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), - .driver_info = 0, + .driver_info = (unsigned long)&wwan_info, }, /* diff --git a/trunk/drivers/net/usb/ipheth.c b/trunk/drivers/net/usb/ipheth.c index 7d42f9a2c068..81126ff85e05 100644 --- a/trunk/drivers/net/usb/ipheth.c +++ b/trunk/drivers/net/usb/ipheth.c @@ -65,6 +65,7 @@ #define IPHETH_USBINTF_PROTO 1 #define IPHETH_BUF_SIZE 1516 +#define IPHETH_IP_ALIGN 2 /* padding at front of URB */ #define IPHETH_TX_TIMEOUT (5 * HZ) #define IPHETH_INTFNUM 2 @@ -202,18 +203,21 @@ static void ipheth_rcvbulk_callback(struct urb *urb) return; } - len = urb->actual_length; - buf = urb->transfer_buffer; + if (urb->actual_length <= IPHETH_IP_ALIGN) { + dev->net->stats.rx_length_errors++; + return; + } + len = urb->actual_length - IPHETH_IP_ALIGN; + buf = urb->transfer_buffer + IPHETH_IP_ALIGN; - skb = dev_alloc_skb(NET_IP_ALIGN + len); + skb = dev_alloc_skb(len); if (!skb) { err("%s: dev_alloc_skb: -ENOMEM", __func__); dev->net->stats.rx_dropped++; return; } - skb_reserve(skb, NET_IP_ALIGN); - memcpy(skb_put(skb, len), buf + NET_IP_ALIGN, len - NET_IP_ALIGN); + memcpy(skb_put(skb, len), buf, len); skb->dev = dev->net; skb->protocol = eth_type_trans(skb, dev->net); diff --git a/trunk/drivers/net/usb/usbnet.c b/trunk/drivers/net/usb/usbnet.c index 009bba3d753e..9ab439d144ed 100644 --- a/trunk/drivers/net/usb/usbnet.c +++ b/trunk/drivers/net/usb/usbnet.c @@ -645,6 +645,7 @@ int usbnet_stop (struct net_device *net) struct driver_info *info = dev->driver_info; int retval; + clear_bit(EVENT_DEV_OPEN, &dev->flags); netif_stop_queue (net); netif_info(dev, ifdown, dev->net, @@ -1524,9 +1525,12 @@ int usbnet_resume (struct usb_interface *intf) smp_mb(); clear_bit(EVENT_DEV_ASLEEP, &dev->flags); spin_unlock_irq(&dev->txq.lock); - if (!(dev->txq.qlen >= TX_QLEN(dev))) - netif_start_queue(dev->net); - tasklet_schedule (&dev->bh); + + if (test_bit(EVENT_DEV_OPEN, &dev->flags)) { + if (!(dev->txq.qlen >= TX_QLEN(dev))) + netif_start_queue(dev->net); + tasklet_schedule (&dev->bh); + } } return 0; } diff --git a/trunk/drivers/net/vmxnet3/vmxnet3_drv.c b/trunk/drivers/net/vmxnet3/vmxnet3_drv.c index 0d47c3a05307..c16ed961153a 100644 --- a/trunk/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/trunk/drivers/net/vmxnet3/vmxnet3_drv.c @@ -178,6 +178,7 @@ static void vmxnet3_process_events(struct vmxnet3_adapter *adapter) { int i; + unsigned long flags; u32 events = le32_to_cpu(adapter->shared->ecr); if (!events) return; @@ -190,10 +191,10 @@ vmxnet3_process_events(struct vmxnet3_adapter *adapter) /* Check if there is an error on xmit/recv queues */ if (events & (VMXNET3_ECR_TQERR | VMXNET3_ECR_RQERR)) { - spin_lock(&adapter->cmd_lock); + spin_lock_irqsave(&adapter->cmd_lock, flags); VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, VMXNET3_CMD_GET_QUEUE_STATUS); - spin_unlock(&adapter->cmd_lock); + spin_unlock_irqrestore(&adapter->cmd_lock, flags); for (i = 0; i < adapter->num_tx_queues; i++) if (adapter->tqd_start[i].status.stopped) @@ -2733,13 +2734,14 @@ static void vmxnet3_alloc_intr_resources(struct vmxnet3_adapter *adapter) { u32 cfg; + unsigned long flags; /* intr settings */ - spin_lock(&adapter->cmd_lock); + spin_lock_irqsave(&adapter->cmd_lock, flags); VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, VMXNET3_CMD_GET_CONF_INTR); cfg = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD); - spin_unlock(&adapter->cmd_lock); + spin_unlock_irqrestore(&adapter->cmd_lock, flags); adapter->intr.type = cfg & 0x3; adapter->intr.mask_mode = (cfg >> 2) & 0x3; diff --git a/trunk/drivers/net/wireless/ath/ath9k/main.c b/trunk/drivers/net/wireless/ath/ath9k/main.c index 17d04ff8d678..1482fa650833 100644 --- a/trunk/drivers/net/wireless/ath/ath9k/main.c +++ b/trunk/drivers/net/wireless/ath/ath9k/main.c @@ -2141,6 +2141,8 @@ static void ath9k_set_coverage_class(struct ieee80211_hw *hw, u8 coverage_class) static void ath9k_flush(struct ieee80211_hw *hw, bool drop) { struct ath_softc *sc = hw->priv; + struct ath_hw *ah = sc->sc_ah; + struct ath_common *common = ath9k_hw_common(ah); int timeout = 200; /* ms */ int i, j; @@ -2149,6 +2151,12 @@ static void ath9k_flush(struct ieee80211_hw *hw, bool drop) cancel_delayed_work_sync(&sc->tx_complete_work); + if (sc->sc_flags & SC_OP_INVALID) { + ath_dbg(common, ATH_DBG_ANY, "Device not present\n"); + mutex_unlock(&sc->mutex); + return; + } + if (drop) timeout = 1; diff --git a/trunk/drivers/net/wireless/iwlegacy/iwl-core.c b/trunk/drivers/net/wireless/iwlegacy/iwl-core.c index c1511b14b239..42db0fc8b921 100644 --- a/trunk/drivers/net/wireless/iwlegacy/iwl-core.c +++ b/trunk/drivers/net/wireless/iwlegacy/iwl-core.c @@ -2155,6 +2155,13 @@ int iwl_legacy_mac_config(struct ieee80211_hw *hw, u32 changed) goto set_ch_out; } + if (priv->iw_mode == NL80211_IFTYPE_ADHOC && + !iwl_legacy_is_channel_ibss(ch_info)) { + IWL_DEBUG_MAC80211(priv, "leave - not IBSS channel\n"); + ret = -EINVAL; + goto set_ch_out; + } + spin_lock_irqsave(&priv->lock, flags); for_each_context(priv, ctx) { diff --git a/trunk/drivers/net/wireless/iwlegacy/iwl-dev.h b/trunk/drivers/net/wireless/iwlegacy/iwl-dev.h index 9ee849d669f3..f43ac1eb9014 100644 --- a/trunk/drivers/net/wireless/iwlegacy/iwl-dev.h +++ b/trunk/drivers/net/wireless/iwlegacy/iwl-dev.h @@ -1411,6 +1411,12 @@ iwl_legacy_is_channel_passive(const struct iwl_channel_info *ch) return (!(ch->flags & EEPROM_CHANNEL_ACTIVE)) ? 1 : 0; } +static inline int +iwl_legacy_is_channel_ibss(const struct iwl_channel_info *ch) +{ + return (ch->flags & EEPROM_CHANNEL_IBSS) ? 1 : 0; +} + static inline void __iwl_legacy_free_pages(struct iwl_priv *priv, struct page *page) { diff --git a/trunk/drivers/net/wireless/libertas/cmd.c b/trunk/drivers/net/wireless/libertas/cmd.c index 7e8a658b7670..f3ac62431a30 100644 --- a/trunk/drivers/net/wireless/libertas/cmd.c +++ b/trunk/drivers/net/wireless/libertas/cmd.c @@ -1339,8 +1339,8 @@ int lbs_execute_next_command(struct lbs_private *priv) cpu_to_le16(PS_MODE_ACTION_EXIT_PS)) { lbs_deb_host( "EXEC_NEXT_CMD: ignore ENTER_PS cmd\n"); - list_del(&cmdnode->list); spin_lock_irqsave(&priv->driver_lock, flags); + list_del(&cmdnode->list); lbs_complete_command(priv, cmdnode, 0); spin_unlock_irqrestore(&priv->driver_lock, flags); @@ -1352,8 +1352,8 @@ int lbs_execute_next_command(struct lbs_private *priv) (priv->psstate == PS_STATE_PRE_SLEEP)) { lbs_deb_host( "EXEC_NEXT_CMD: ignore EXIT_PS cmd in sleep\n"); - list_del(&cmdnode->list); spin_lock_irqsave(&priv->driver_lock, flags); + list_del(&cmdnode->list); lbs_complete_command(priv, cmdnode, 0); spin_unlock_irqrestore(&priv->driver_lock, flags); priv->needtowakeup = 1; @@ -1366,7 +1366,9 @@ int lbs_execute_next_command(struct lbs_private *priv) "EXEC_NEXT_CMD: sending EXIT_PS\n"); } } + spin_lock_irqsave(&priv->driver_lock, flags); list_del(&cmdnode->list); + spin_unlock_irqrestore(&priv->driver_lock, flags); lbs_deb_host("EXEC_NEXT_CMD: sending command 0x%04x\n", le16_to_cpu(cmd->command)); lbs_submit_command(priv, cmdnode); diff --git a/trunk/drivers/net/zorro8390.c b/trunk/drivers/net/zorro8390.c index b78a38d9172a..8c7c522a056a 100644 --- a/trunk/drivers/net/zorro8390.c +++ b/trunk/drivers/net/zorro8390.c @@ -126,7 +126,7 @@ static int __devinit zorro8390_init_one(struct zorro_dev *z, board = z->resource.start; ioaddr = board+cards[i].offset; - dev = alloc_ei_netdev(); + dev = ____alloc_ei_netdev(0); if (!dev) return -ENOMEM; if (!request_mem_region(ioaddr, NE_IO_EXTENT*2, DRV_NAME)) { @@ -146,15 +146,15 @@ static int __devinit zorro8390_init_one(struct zorro_dev *z, static const struct net_device_ops zorro8390_netdev_ops = { .ndo_open = zorro8390_open, .ndo_stop = zorro8390_close, - .ndo_start_xmit = ei_start_xmit, - .ndo_tx_timeout = ei_tx_timeout, - .ndo_get_stats = ei_get_stats, - .ndo_set_multicast_list = ei_set_multicast_list, + .ndo_start_xmit = __ei_start_xmit, + .ndo_tx_timeout = __ei_tx_timeout, + .ndo_get_stats = __ei_get_stats, + .ndo_set_multicast_list = __ei_set_multicast_list, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = eth_mac_addr, .ndo_change_mtu = eth_change_mtu, #ifdef CONFIG_NET_POLL_CONTROLLER - .ndo_poll_controller = ei_poll, + .ndo_poll_controller = __ei_poll, #endif }; diff --git a/trunk/drivers/rtc/rtc-s3c.c b/trunk/drivers/rtc/rtc-s3c.c index b3466c491cd3..16512ecae31a 100644 --- a/trunk/drivers/rtc/rtc-s3c.c +++ b/trunk/drivers/rtc/rtc-s3c.c @@ -46,6 +46,7 @@ static struct clk *rtc_clk; static void __iomem *s3c_rtc_base; static int s3c_rtc_alarmno = NO_IRQ; static int s3c_rtc_tickno = NO_IRQ; +static bool wake_en; static enum s3c_cpu_type s3c_rtc_cpu_type; static DEFINE_SPINLOCK(s3c_rtc_pie_lock); @@ -562,8 +563,12 @@ static int s3c_rtc_suspend(struct platform_device *pdev, pm_message_t state) } s3c_rtc_enable(pdev, 0); - if (device_may_wakeup(&pdev->dev)) - enable_irq_wake(s3c_rtc_alarmno); + if (device_may_wakeup(&pdev->dev) && !wake_en) { + if (enable_irq_wake(s3c_rtc_alarmno) == 0) + wake_en = true; + else + dev_err(&pdev->dev, "enable_irq_wake failed\n"); + } return 0; } @@ -579,8 +584,10 @@ static int s3c_rtc_resume(struct platform_device *pdev) writew(tmp | ticnt_en_save, s3c_rtc_base + S3C2410_RTCCON); } - if (device_may_wakeup(&pdev->dev)) + if (device_may_wakeup(&pdev->dev) && wake_en) { disable_irq_wake(s3c_rtc_alarmno); + wake_en = false; + } return 0; } diff --git a/trunk/drivers/s390/block/dasd.c b/trunk/drivers/s390/block/dasd.c index 475e603fc584..86b6f1cc1b10 100644 --- a/trunk/drivers/s390/block/dasd.c +++ b/trunk/drivers/s390/block/dasd.c @@ -1742,11 +1742,20 @@ int dasd_sleep_on_interruptible(struct dasd_ccw_req *cqr) static inline int _dasd_term_running_cqr(struct dasd_device *device) { struct dasd_ccw_req *cqr; + int rc; if (list_empty(&device->ccw_queue)) return 0; cqr = list_entry(device->ccw_queue.next, struct dasd_ccw_req, devlist); - return device->discipline->term_IO(cqr); + rc = device->discipline->term_IO(cqr); + if (!rc) + /* + * CQR terminated because a more important request is pending. + * Undo decreasing of retry counter because this is + * not an error case. + */ + cqr->retries++; + return rc; } int dasd_sleep_on_immediatly(struct dasd_ccw_req *cqr) diff --git a/trunk/drivers/s390/char/sclp_cmd.c b/trunk/drivers/s390/char/sclp_cmd.c index 4b60ede07f0e..be55fb2b1b1c 100644 --- a/trunk/drivers/s390/char/sclp_cmd.c +++ b/trunk/drivers/s390/char/sclp_cmd.c @@ -518,6 +518,8 @@ static void __init insert_increment(u16 rn, int standby, int assigned) return; new_incr->rn = rn; new_incr->standby = standby; + if (!standby) + new_incr->usecount = 1; last_rn = 0; prev = &sclp_mem_list; list_for_each_entry(incr, &sclp_mem_list, list) { diff --git a/trunk/drivers/video/acornfb.c b/trunk/drivers/video/acornfb.c index 82acb8dc4aa1..6183a57eb69d 100644 --- a/trunk/drivers/video/acornfb.c +++ b/trunk/drivers/video/acornfb.c @@ -66,7 +66,7 @@ * have. Allow 1% either way on the nominal for TVs. */ #define NR_MONTYPES 6 -static struct fb_monspecs monspecs[NR_MONTYPES] __initdata = { +static struct fb_monspecs monspecs[NR_MONTYPES] __devinitdata = { { /* TV */ .hfmin = 15469, .hfmax = 15781, @@ -873,7 +873,7 @@ static struct fb_ops acornfb_ops = { /* * Everything after here is initialisation!!! */ -static struct fb_videomode modedb[] __initdata = { +static struct fb_videomode modedb[] __devinitdata = { { /* 320x256 @ 50Hz */ NULL, 50, 320, 256, 125000, 92, 62, 35, 19, 38, 2, FB_SYNC_COMP_HIGH_ACT, @@ -925,8 +925,7 @@ static struct fb_videomode modedb[] __initdata = { } }; -static struct fb_videomode __initdata -acornfb_default_mode = { +static struct fb_videomode acornfb_default_mode __devinitdata = { .name = NULL, .refresh = 60, .xres = 640, @@ -942,7 +941,7 @@ acornfb_default_mode = { .vmode = FB_VMODE_NONINTERLACED }; -static void __init acornfb_init_fbinfo(void) +static void __devinit acornfb_init_fbinfo(void) { static int first = 1; @@ -1018,8 +1017,7 @@ static void __init acornfb_init_fbinfo(void) * size can optionally be followed by 'M' or 'K' for * MB or KB respectively. */ -static void __init -acornfb_parse_mon(char *opt) +static void __devinit acornfb_parse_mon(char *opt) { char *p = opt; @@ -1066,8 +1064,7 @@ acornfb_parse_mon(char *opt) current_par.montype = -1; } -static void __init -acornfb_parse_montype(char *opt) +static void __devinit acornfb_parse_montype(char *opt) { current_par.montype = -2; @@ -1108,8 +1105,7 @@ acornfb_parse_montype(char *opt) } } -static void __init -acornfb_parse_dram(char *opt) +static void __devinit acornfb_parse_dram(char *opt) { unsigned int size; @@ -1134,15 +1130,14 @@ acornfb_parse_dram(char *opt) static struct options { char *name; void (*parse)(char *opt); -} opt_table[] __initdata = { +} opt_table[] __devinitdata = { { "mon", acornfb_parse_mon }, { "montype", acornfb_parse_montype }, { "dram", acornfb_parse_dram }, { NULL, NULL } }; -int __init -acornfb_setup(char *options) +static int __devinit acornfb_setup(char *options) { struct options *optp; char *opt; @@ -1179,8 +1174,7 @@ acornfb_setup(char *options) * Detect type of monitor connected * For now, we just assume SVGA */ -static int __init -acornfb_detect_monitortype(void) +static int __devinit acornfb_detect_monitortype(void) { return 4; } diff --git a/trunk/drivers/video/fbmem.c b/trunk/drivers/video/fbmem.c index e0c2284924b6..5aac00eb1830 100644 --- a/trunk/drivers/video/fbmem.c +++ b/trunk/drivers/video/fbmem.c @@ -42,9 +42,34 @@ #define FBPIXMAPSIZE (1024 * 8) +static DEFINE_MUTEX(registration_lock); struct fb_info *registered_fb[FB_MAX] __read_mostly; int num_registered_fb __read_mostly; +static struct fb_info *get_fb_info(unsigned int idx) +{ + struct fb_info *fb_info; + + if (idx >= FB_MAX) + return ERR_PTR(-ENODEV); + + mutex_lock(®istration_lock); + fb_info = registered_fb[idx]; + if (fb_info) + atomic_inc(&fb_info->count); + mutex_unlock(®istration_lock); + + return fb_info; +} + +static void put_fb_info(struct fb_info *fb_info) +{ + if (!atomic_dec_and_test(&fb_info->count)) + return; + if (fb_info->fbops->fb_destroy) + fb_info->fbops->fb_destroy(fb_info); +} + int lock_fb_info(struct fb_info *info) { mutex_lock(&info->lock); @@ -647,6 +672,7 @@ int fb_show_logo(struct fb_info *info, int rotate) { return 0; } static void *fb_seq_start(struct seq_file *m, loff_t *pos) { + mutex_lock(®istration_lock); return (*pos < FB_MAX) ? pos : NULL; } @@ -658,6 +684,7 @@ static void *fb_seq_next(struct seq_file *m, void *v, loff_t *pos) static void fb_seq_stop(struct seq_file *m, void *v) { + mutex_unlock(®istration_lock); } static int fb_seq_show(struct seq_file *m, void *v) @@ -690,13 +717,30 @@ static const struct file_operations fb_proc_fops = { .release = seq_release, }; -static ssize_t -fb_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) +/* + * We hold a reference to the fb_info in file->private_data, + * but if the current registered fb has changed, we don't + * actually want to use it. + * + * So look up the fb_info using the inode minor number, + * and just verify it against the reference we have. + */ +static struct fb_info *file_fb_info(struct file *file) { - unsigned long p = *ppos; struct inode *inode = file->f_path.dentry->d_inode; int fbidx = iminor(inode); struct fb_info *info = registered_fb[fbidx]; + + if (info != file->private_data) + info = NULL; + return info; +} + +static ssize_t +fb_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) +{ + unsigned long p = *ppos; + struct fb_info *info = file_fb_info(file); u8 *buffer, *dst; u8 __iomem *src; int c, cnt = 0, err = 0; @@ -761,9 +805,7 @@ static ssize_t fb_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { unsigned long p = *ppos; - struct inode *inode = file->f_path.dentry->d_inode; - int fbidx = iminor(inode); - struct fb_info *info = registered_fb[fbidx]; + struct fb_info *info = file_fb_info(file); u8 *buffer, *src; u8 __iomem *dst; int c, cnt = 0, err = 0; @@ -1141,10 +1183,10 @@ static long do_fb_ioctl(struct fb_info *info, unsigned int cmd, static long fb_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { - struct inode *inode = file->f_path.dentry->d_inode; - int fbidx = iminor(inode); - struct fb_info *info = registered_fb[fbidx]; + struct fb_info *info = file_fb_info(file); + if (!info) + return -ENODEV; return do_fb_ioctl(info, cmd, arg); } @@ -1265,12 +1307,13 @@ static int fb_get_fscreeninfo(struct fb_info *info, unsigned int cmd, static long fb_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { - struct inode *inode = file->f_path.dentry->d_inode; - int fbidx = iminor(inode); - struct fb_info *info = registered_fb[fbidx]; - struct fb_ops *fb = info->fbops; + struct fb_info *info = file_fb_info(file); + struct fb_ops *fb; long ret = -ENOIOCTLCMD; + if (!info) + return -ENODEV; + fb = info->fbops; switch(cmd) { case FBIOGET_VSCREENINFO: case FBIOPUT_VSCREENINFO: @@ -1303,16 +1346,18 @@ static long fb_compat_ioctl(struct file *file, unsigned int cmd, static int fb_mmap(struct file *file, struct vm_area_struct * vma) { - int fbidx = iminor(file->f_path.dentry->d_inode); - struct fb_info *info = registered_fb[fbidx]; - struct fb_ops *fb = info->fbops; + struct fb_info *info = file_fb_info(file); + struct fb_ops *fb; unsigned long off; unsigned long start; u32 len; + if (!info) + return -ENODEV; if (vma->vm_pgoff > (~0UL >> PAGE_SHIFT)) return -EINVAL; off = vma->vm_pgoff << PAGE_SHIFT; + fb = info->fbops; if (!fb) return -ENODEV; mutex_lock(&info->mm_lock); @@ -1361,14 +1406,16 @@ __releases(&info->lock) struct fb_info *info; int res = 0; - if (fbidx >= FB_MAX) - return -ENODEV; - info = registered_fb[fbidx]; - if (!info) + info = get_fb_info(fbidx); + if (!info) { request_module("fb%d", fbidx); - info = registered_fb[fbidx]; - if (!info) - return -ENODEV; + info = get_fb_info(fbidx); + if (!info) + return -ENODEV; + } + if (IS_ERR(info)) + return PTR_ERR(info); + mutex_lock(&info->lock); if (!try_module_get(info->fbops->owner)) { res = -ENODEV; @@ -1386,6 +1433,8 @@ __releases(&info->lock) #endif out: mutex_unlock(&info->lock); + if (res) + put_fb_info(info); return res; } @@ -1401,6 +1450,7 @@ __releases(&info->lock) info->fbops->fb_release(info,1); module_put(info->fbops->owner); mutex_unlock(&info->lock); + put_fb_info(info); return 0; } @@ -1487,8 +1537,10 @@ static bool fb_do_apertures_overlap(struct apertures_struct *gena, return false; } +static int do_unregister_framebuffer(struct fb_info *fb_info); + #define VGA_FB_PHYS 0xA0000 -void remove_conflicting_framebuffers(struct apertures_struct *a, +static void do_remove_conflicting_framebuffers(struct apertures_struct *a, const char *name, bool primary) { int i; @@ -1510,43 +1562,32 @@ void remove_conflicting_framebuffers(struct apertures_struct *a, printk(KERN_INFO "fb: conflicting fb hw usage " "%s vs %s - removing generic driver\n", name, registered_fb[i]->fix.id); - unregister_framebuffer(registered_fb[i]); + do_unregister_framebuffer(registered_fb[i]); } } } -EXPORT_SYMBOL(remove_conflicting_framebuffers); -/** - * register_framebuffer - registers a frame buffer device - * @fb_info: frame buffer info structure - * - * Registers a frame buffer device @fb_info. - * - * Returns negative errno on error, or zero for success. - * - */ - -int -register_framebuffer(struct fb_info *fb_info) +static int do_register_framebuffer(struct fb_info *fb_info) { int i; struct fb_event event; struct fb_videomode mode; - if (num_registered_fb == FB_MAX) - return -ENXIO; - if (fb_check_foreignness(fb_info)) return -ENOSYS; - remove_conflicting_framebuffers(fb_info->apertures, fb_info->fix.id, + do_remove_conflicting_framebuffers(fb_info->apertures, fb_info->fix.id, fb_is_primary_device(fb_info)); + if (num_registered_fb == FB_MAX) + return -ENXIO; + num_registered_fb++; for (i = 0 ; i < FB_MAX; i++) if (!registered_fb[i]) break; fb_info->node = i; + atomic_set(&fb_info->count, 1); mutex_init(&fb_info->lock); mutex_init(&fb_info->mm_lock); @@ -1592,36 +1633,14 @@ register_framebuffer(struct fb_info *fb_info) return 0; } - -/** - * unregister_framebuffer - releases a frame buffer device - * @fb_info: frame buffer info structure - * - * Unregisters a frame buffer device @fb_info. - * - * Returns negative errno on error, or zero for success. - * - * This function will also notify the framebuffer console - * to release the driver. - * - * This is meant to be called within a driver's module_exit() - * function. If this is called outside module_exit(), ensure - * that the driver implements fb_open() and fb_release() to - * check that no processes are using the device. - */ - -int -unregister_framebuffer(struct fb_info *fb_info) +static int do_unregister_framebuffer(struct fb_info *fb_info) { struct fb_event event; int i, ret = 0; i = fb_info->node; - if (!registered_fb[i]) { - ret = -EINVAL; - goto done; - } - + if (i < 0 || i >= FB_MAX || registered_fb[i] != fb_info) + return -EINVAL; if (!lock_fb_info(fb_info)) return -ENODEV; @@ -1629,16 +1648,14 @@ unregister_framebuffer(struct fb_info *fb_info) ret = fb_notifier_call_chain(FB_EVENT_FB_UNBIND, &event); unlock_fb_info(fb_info); - if (ret) { - ret = -EINVAL; - goto done; - } + if (ret) + return -EINVAL; if (fb_info->pixmap.addr && (fb_info->pixmap.flags & FB_PIXMAP_DEFAULT)) kfree(fb_info->pixmap.addr); fb_destroy_modelist(&fb_info->modelist); - registered_fb[i]=NULL; + registered_fb[i] = NULL; num_registered_fb--; fb_cleanup_device(fb_info); device_destroy(fb_class, MKDEV(FB_MAJOR, i)); @@ -1646,9 +1663,65 @@ unregister_framebuffer(struct fb_info *fb_info) fb_notifier_call_chain(FB_EVENT_FB_UNREGISTERED, &event); /* this may free fb info */ - if (fb_info->fbops->fb_destroy) - fb_info->fbops->fb_destroy(fb_info); -done: + put_fb_info(fb_info); + return 0; +} + +void remove_conflicting_framebuffers(struct apertures_struct *a, + const char *name, bool primary) +{ + mutex_lock(®istration_lock); + do_remove_conflicting_framebuffers(a, name, primary); + mutex_unlock(®istration_lock); +} +EXPORT_SYMBOL(remove_conflicting_framebuffers); + +/** + * register_framebuffer - registers a frame buffer device + * @fb_info: frame buffer info structure + * + * Registers a frame buffer device @fb_info. + * + * Returns negative errno on error, or zero for success. + * + */ +int +register_framebuffer(struct fb_info *fb_info) +{ + int ret; + + mutex_lock(®istration_lock); + ret = do_register_framebuffer(fb_info); + mutex_unlock(®istration_lock); + + return ret; +} + +/** + * unregister_framebuffer - releases a frame buffer device + * @fb_info: frame buffer info structure + * + * Unregisters a frame buffer device @fb_info. + * + * Returns negative errno on error, or zero for success. + * + * This function will also notify the framebuffer console + * to release the driver. + * + * This is meant to be called within a driver's module_exit() + * function. If this is called outside module_exit(), ensure + * that the driver implements fb_open() and fb_release() to + * check that no processes are using the device. + */ +int +unregister_framebuffer(struct fb_info *fb_info) +{ + int ret; + + mutex_lock(®istration_lock); + ret = do_unregister_framebuffer(fb_info); + mutex_unlock(®istration_lock); + return ret; } diff --git a/trunk/fs/ceph/caps.c b/trunk/fs/ceph/caps.c index 9fa08662a88d..2a5404c1c42f 100644 --- a/trunk/fs/ceph/caps.c +++ b/trunk/fs/ceph/caps.c @@ -819,7 +819,7 @@ int __ceph_caps_used(struct ceph_inode_info *ci) used |= CEPH_CAP_FILE_CACHE; if (ci->i_wr_ref) used |= CEPH_CAP_FILE_WR; - if (ci->i_wrbuffer_ref) + if (ci->i_wb_ref || ci->i_wrbuffer_ref) used |= CEPH_CAP_FILE_BUFFER; return used; } @@ -1990,11 +1990,11 @@ static void __take_cap_refs(struct ceph_inode_info *ci, int got) if (got & CEPH_CAP_FILE_WR) ci->i_wr_ref++; if (got & CEPH_CAP_FILE_BUFFER) { - if (ci->i_wrbuffer_ref == 0) + if (ci->i_wb_ref == 0) ihold(&ci->vfs_inode); - ci->i_wrbuffer_ref++; - dout("__take_cap_refs %p wrbuffer %d -> %d (?)\n", - &ci->vfs_inode, ci->i_wrbuffer_ref-1, ci->i_wrbuffer_ref); + ci->i_wb_ref++; + dout("__take_cap_refs %p wb %d -> %d (?)\n", + &ci->vfs_inode, ci->i_wb_ref-1, ci->i_wb_ref); } } @@ -2169,12 +2169,12 @@ void ceph_put_cap_refs(struct ceph_inode_info *ci, int had) if (--ci->i_rdcache_ref == 0) last++; if (had & CEPH_CAP_FILE_BUFFER) { - if (--ci->i_wrbuffer_ref == 0) { + if (--ci->i_wb_ref == 0) { last++; put++; } - dout("put_cap_refs %p wrbuffer %d -> %d (?)\n", - inode, ci->i_wrbuffer_ref+1, ci->i_wrbuffer_ref); + dout("put_cap_refs %p wb %d -> %d (?)\n", + inode, ci->i_wb_ref+1, ci->i_wb_ref); } if (had & CEPH_CAP_FILE_WR) if (--ci->i_wr_ref == 0) { diff --git a/trunk/fs/ceph/inode.c b/trunk/fs/ceph/inode.c index 03d6dafda61f..70b6a4839c38 100644 --- a/trunk/fs/ceph/inode.c +++ b/trunk/fs/ceph/inode.c @@ -355,6 +355,7 @@ struct inode *ceph_alloc_inode(struct super_block *sb) ci->i_rd_ref = 0; ci->i_rdcache_ref = 0; ci->i_wr_ref = 0; + ci->i_wb_ref = 0; ci->i_wrbuffer_ref = 0; ci->i_wrbuffer_ref_head = 0; ci->i_shared_gen = 0; diff --git a/trunk/fs/ceph/mds_client.c b/trunk/fs/ceph/mds_client.c index f60b07b0feb0..d0fae4ce9ba5 100644 --- a/trunk/fs/ceph/mds_client.c +++ b/trunk/fs/ceph/mds_client.c @@ -3304,8 +3304,8 @@ static void con_put(struct ceph_connection *con) { struct ceph_mds_session *s = con->private; + dout("mdsc con_put %p (%d)\n", s, atomic_read(&s->s_ref) - 1); ceph_put_mds_session(s); - dout("mdsc con_put %p (%d)\n", s, atomic_read(&s->s_ref)); } /* diff --git a/trunk/fs/ceph/snap.c b/trunk/fs/ceph/snap.c index e86ec1155f8f..24067d68a554 100644 --- a/trunk/fs/ceph/snap.c +++ b/trunk/fs/ceph/snap.c @@ -206,7 +206,7 @@ void ceph_put_snap_realm(struct ceph_mds_client *mdsc, up_write(&mdsc->snap_rwsem); } else { spin_lock(&mdsc->snap_empty_lock); - list_add(&mdsc->snap_empty, &realm->empty_item); + list_add(&realm->empty_item, &mdsc->snap_empty); spin_unlock(&mdsc->snap_empty_lock); } } diff --git a/trunk/fs/ceph/super.h b/trunk/fs/ceph/super.h index b1f1b8bb1271..f5cabefa98dc 100644 --- a/trunk/fs/ceph/super.h +++ b/trunk/fs/ceph/super.h @@ -293,7 +293,7 @@ struct ceph_inode_info { /* held references to caps */ int i_pin_ref; - int i_rd_ref, i_rdcache_ref, i_wr_ref; + int i_rd_ref, i_rdcache_ref, i_wr_ref, i_wb_ref; int i_wrbuffer_ref, i_wrbuffer_ref_head; u32 i_shared_gen; /* increment each time we get FILE_SHARED */ u32 i_rdcache_gen; /* incremented each time we get FILE_CACHE. */ diff --git a/trunk/fs/fuse/dir.c b/trunk/fs/fuse/dir.c index c6ba49bd95b3..b32eb29a4e6f 100644 --- a/trunk/fs/fuse/dir.c +++ b/trunk/fs/fuse/dir.c @@ -174,7 +174,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd) if (!inode) return 0; - if (nd->flags & LOOKUP_RCU) + if (nd && (nd->flags & LOOKUP_RCU)) return -ECHILD; fc = get_fuse_conn(inode); diff --git a/trunk/fs/namei.c b/trunk/fs/namei.c index 54fc993e3027..e3c4f112ebf7 100644 --- a/trunk/fs/namei.c +++ b/trunk/fs/namei.c @@ -179,7 +179,7 @@ EXPORT_SYMBOL(putname); static int acl_permission_check(struct inode *inode, int mask, unsigned int flags, int (*check_acl)(struct inode *inode, int mask, unsigned int flags)) { - umode_t mode = inode->i_mode; + unsigned int mode = inode->i_mode; mask &= MAY_READ | MAY_WRITE | MAY_EXEC; diff --git a/trunk/fs/nfs/nfs4filelayout.c b/trunk/fs/nfs/nfs4filelayout.c index 6f8192f4cfc7..be79dc9f386d 100644 --- a/trunk/fs/nfs/nfs4filelayout.c +++ b/trunk/fs/nfs/nfs4filelayout.c @@ -117,6 +117,8 @@ static int filelayout_async_handle_error(struct rpc_task *task, case -EKEYEXPIRED: rpc_delay(task, FILELAYOUT_POLL_RETRY_MAX); break; + case -NFS4ERR_RETRY_UNCACHED_REP: + break; default: dprintk("%s DS error. Retry through MDS %d\n", __func__, task->tk_status); @@ -416,7 +418,8 @@ static int filelayout_check_layout(struct pnfs_layout_hdr *lo, struct nfs4_filelayout_segment *fl, struct nfs4_layoutget_res *lgr, - struct nfs4_deviceid *id) + struct nfs4_deviceid *id, + gfp_t gfp_flags) { struct nfs4_file_layout_dsaddr *dsaddr; int status = -EINVAL; @@ -439,7 +442,7 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo, /* find and reference the deviceid */ dsaddr = nfs4_fl_find_get_deviceid(id); if (dsaddr == NULL) { - dsaddr = get_device_info(lo->plh_inode, id); + dsaddr = get_device_info(lo->plh_inode, id, gfp_flags); if (dsaddr == NULL) goto out; } @@ -500,7 +503,8 @@ static int filelayout_decode_layout(struct pnfs_layout_hdr *flo, struct nfs4_filelayout_segment *fl, struct nfs4_layoutget_res *lgr, - struct nfs4_deviceid *id) + struct nfs4_deviceid *id, + gfp_t gfp_flags) { struct xdr_stream stream; struct xdr_buf buf = { @@ -516,7 +520,7 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo, dprintk("%s: set_layout_map Begin\n", __func__); - scratch = alloc_page(GFP_KERNEL); + scratch = alloc_page(gfp_flags); if (!scratch) return -ENOMEM; @@ -554,13 +558,13 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo, goto out_err; fl->fh_array = kzalloc(fl->num_fh * sizeof(struct nfs_fh *), - GFP_KERNEL); + gfp_flags); if (!fl->fh_array) goto out_err; for (i = 0; i < fl->num_fh; i++) { /* Do we want to use a mempool here? */ - fl->fh_array[i] = kmalloc(sizeof(struct nfs_fh), GFP_KERNEL); + fl->fh_array[i] = kmalloc(sizeof(struct nfs_fh), gfp_flags); if (!fl->fh_array[i]) goto out_err_free; @@ -605,19 +609,20 @@ filelayout_free_lseg(struct pnfs_layout_segment *lseg) static struct pnfs_layout_segment * filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid, - struct nfs4_layoutget_res *lgr) + struct nfs4_layoutget_res *lgr, + gfp_t gfp_flags) { struct nfs4_filelayout_segment *fl; int rc; struct nfs4_deviceid id; dprintk("--> %s\n", __func__); - fl = kzalloc(sizeof(*fl), GFP_KERNEL); + fl = kzalloc(sizeof(*fl), gfp_flags); if (!fl) return NULL; - rc = filelayout_decode_layout(layoutid, fl, lgr, &id); - if (rc != 0 || filelayout_check_layout(layoutid, fl, lgr, &id)) { + rc = filelayout_decode_layout(layoutid, fl, lgr, &id, gfp_flags); + if (rc != 0 || filelayout_check_layout(layoutid, fl, lgr, &id, gfp_flags)) { _filelayout_free_lseg(fl); return NULL; } @@ -633,7 +638,7 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid, int size = (fl->stripe_type == STRIPE_SPARSE) ? fl->dsaddr->ds_num : fl->dsaddr->stripe_count; - fl->commit_buckets = kcalloc(size, sizeof(struct list_head), GFP_KERNEL); + fl->commit_buckets = kcalloc(size, sizeof(struct list_head), gfp_flags); if (!fl->commit_buckets) { filelayout_free_lseg(&fl->generic_hdr); return NULL; diff --git a/trunk/fs/nfs/nfs4filelayout.h b/trunk/fs/nfs/nfs4filelayout.h index 7c44579f5832..2b461d77b43a 100644 --- a/trunk/fs/nfs/nfs4filelayout.h +++ b/trunk/fs/nfs/nfs4filelayout.h @@ -104,6 +104,6 @@ extern struct nfs4_file_layout_dsaddr * nfs4_fl_find_get_deviceid(struct nfs4_deviceid *dev_id); extern void nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr); struct nfs4_file_layout_dsaddr * -get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id); +get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id, gfp_t gfp_flags); #endif /* FS_NFS_NFS4FILELAYOUT_H */ diff --git a/trunk/fs/nfs/nfs4filelayoutdev.c b/trunk/fs/nfs/nfs4filelayoutdev.c index de5350f2b249..db07c7af1395 100644 --- a/trunk/fs/nfs/nfs4filelayoutdev.c +++ b/trunk/fs/nfs/nfs4filelayoutdev.c @@ -225,11 +225,11 @@ nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr) } static struct nfs4_pnfs_ds * -nfs4_pnfs_ds_add(struct inode *inode, u32 ip_addr, u32 port) +nfs4_pnfs_ds_add(struct inode *inode, u32 ip_addr, u32 port, gfp_t gfp_flags) { struct nfs4_pnfs_ds *tmp_ds, *ds; - ds = kzalloc(sizeof(*tmp_ds), GFP_KERNEL); + ds = kzalloc(sizeof(*tmp_ds), gfp_flags); if (!ds) goto out; @@ -261,7 +261,7 @@ nfs4_pnfs_ds_add(struct inode *inode, u32 ip_addr, u32 port) * Currently only support ipv4, and one multi-path address. */ static struct nfs4_pnfs_ds * -decode_and_add_ds(struct xdr_stream *streamp, struct inode *inode) +decode_and_add_ds(struct xdr_stream *streamp, struct inode *inode, gfp_t gfp_flags) { struct nfs4_pnfs_ds *ds = NULL; char *buf; @@ -303,7 +303,7 @@ decode_and_add_ds(struct xdr_stream *streamp, struct inode *inode) rlen); goto out_err; } - buf = kmalloc(rlen + 1, GFP_KERNEL); + buf = kmalloc(rlen + 1, gfp_flags); if (!buf) { dprintk("%s: Not enough memory\n", __func__); goto out_err; @@ -333,7 +333,7 @@ decode_and_add_ds(struct xdr_stream *streamp, struct inode *inode) sscanf(pstr, "-%d-%d", &tmp[0], &tmp[1]); port = htons((tmp[0] << 8) | (tmp[1])); - ds = nfs4_pnfs_ds_add(inode, ip_addr, port); + ds = nfs4_pnfs_ds_add(inode, ip_addr, port, gfp_flags); dprintk("%s: Decoded address and port %s\n", __func__, buf); out_free: kfree(buf); @@ -343,7 +343,7 @@ decode_and_add_ds(struct xdr_stream *streamp, struct inode *inode) /* Decode opaque device data and return the result */ static struct nfs4_file_layout_dsaddr* -decode_device(struct inode *ino, struct pnfs_device *pdev) +decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags) { int i; u32 cnt, num; @@ -362,7 +362,7 @@ decode_device(struct inode *ino, struct pnfs_device *pdev) struct page *scratch; /* set up xdr stream */ - scratch = alloc_page(GFP_KERNEL); + scratch = alloc_page(gfp_flags); if (!scratch) goto out_err; @@ -384,7 +384,7 @@ decode_device(struct inode *ino, struct pnfs_device *pdev) } /* read stripe indices */ - stripe_indices = kcalloc(cnt, sizeof(u8), GFP_KERNEL); + stripe_indices = kcalloc(cnt, sizeof(u8), gfp_flags); if (!stripe_indices) goto out_err_free_scratch; @@ -423,7 +423,7 @@ decode_device(struct inode *ino, struct pnfs_device *pdev) dsaddr = kzalloc(sizeof(*dsaddr) + (sizeof(struct nfs4_pnfs_ds *) * (num - 1)), - GFP_KERNEL); + gfp_flags); if (!dsaddr) goto out_err_free_stripe_indices; @@ -452,7 +452,7 @@ decode_device(struct inode *ino, struct pnfs_device *pdev) for (j = 0; j < mp_count; j++) { if (j == 0) { dsaddr->ds_list[i] = decode_and_add_ds(&stream, - ino); + ino, gfp_flags); if (dsaddr->ds_list[i] == NULL) goto out_err_free_deviceid; } else { @@ -503,12 +503,12 @@ decode_device(struct inode *ino, struct pnfs_device *pdev) * available devices. */ static struct nfs4_file_layout_dsaddr * -decode_and_add_device(struct inode *inode, struct pnfs_device *dev) +decode_and_add_device(struct inode *inode, struct pnfs_device *dev, gfp_t gfp_flags) { struct nfs4_file_layout_dsaddr *d, *new; long hash; - new = decode_device(inode, dev); + new = decode_device(inode, dev, gfp_flags); if (!new) { printk(KERN_WARNING "%s: Could not decode or add device\n", __func__); @@ -537,7 +537,7 @@ decode_and_add_device(struct inode *inode, struct pnfs_device *dev) * of available devices, and return it. */ struct nfs4_file_layout_dsaddr * -get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id) +get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id, gfp_t gfp_flags) { struct pnfs_device *pdev = NULL; u32 max_resp_sz; @@ -556,17 +556,17 @@ get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id) dprintk("%s inode %p max_resp_sz %u max_pages %d\n", __func__, inode, max_resp_sz, max_pages); - pdev = kzalloc(sizeof(struct pnfs_device), GFP_KERNEL); + pdev = kzalloc(sizeof(struct pnfs_device), gfp_flags); if (pdev == NULL) return NULL; - pages = kzalloc(max_pages * sizeof(struct page *), GFP_KERNEL); + pages = kzalloc(max_pages * sizeof(struct page *), gfp_flags); if (pages == NULL) { kfree(pdev); return NULL; } for (i = 0; i < max_pages; i++) { - pages[i] = alloc_page(GFP_KERNEL); + pages[i] = alloc_page(gfp_flags); if (!pages[i]) goto out_free; } @@ -587,7 +587,7 @@ get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id) * Found new device, need to decode it and then add it to the * list of known devices for this mountpoint. */ - dsaddr = decode_and_add_device(inode, pdev); + dsaddr = decode_and_add_device(inode, pdev, gfp_flags); out_free: for (i = 0; i < max_pages; i++) __free_page(pages[i]); diff --git a/trunk/fs/nfs/nfs4proc.c b/trunk/fs/nfs/nfs4proc.c index 69c0f3c5ee7a..cf1b339c3937 100644 --- a/trunk/fs/nfs/nfs4proc.c +++ b/trunk/fs/nfs/nfs4proc.c @@ -300,6 +300,7 @@ static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struc ret = nfs4_delay(server->client, &exception->timeout); if (ret != 0) break; + case -NFS4ERR_RETRY_UNCACHED_REP: case -NFS4ERR_OLD_STATEID: exception->retry = 1; break; @@ -3695,6 +3696,7 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, rpc_delay(task, NFS4_POLL_RETRY_MAX); task->tk_status = 0; return -EAGAIN; + case -NFS4ERR_RETRY_UNCACHED_REP: case -NFS4ERR_OLD_STATEID: task->tk_status = 0; return -EAGAIN; @@ -4844,6 +4846,8 @@ static void nfs4_get_lease_time_done(struct rpc_task *task, void *calldata) dprintk("%s Retry: tk_status %d\n", __func__, task->tk_status); rpc_delay(task, NFS4_POLL_RETRY_MIN); task->tk_status = 0; + /* fall through */ + case -NFS4ERR_RETRY_UNCACHED_REP: nfs_restart_rpc(task, data->clp); return; } @@ -5479,6 +5483,8 @@ static int nfs41_reclaim_complete_handle_errors(struct rpc_task *task, struct nf break; case -NFS4ERR_DELAY: rpc_delay(task, NFS4_POLL_RETRY_MAX); + /* fall through */ + case -NFS4ERR_RETRY_UNCACHED_REP: return -EAGAIN; default: nfs4_schedule_lease_recovery(clp); diff --git a/trunk/fs/nfs/pnfs.c b/trunk/fs/nfs/pnfs.c index ff681ab65d31..f57f5281a520 100644 --- a/trunk/fs/nfs/pnfs.c +++ b/trunk/fs/nfs/pnfs.c @@ -383,6 +383,7 @@ pnfs_destroy_all_layouts(struct nfs_client *clp) plh_layouts); dprintk("%s freeing layout for inode %lu\n", __func__, lo->plh_inode->i_ino); + list_del_init(&lo->plh_layouts); pnfs_destroy_layout(NFS_I(lo->plh_inode)); } } @@ -466,7 +467,8 @@ pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo, static struct pnfs_layout_segment * send_layoutget(struct pnfs_layout_hdr *lo, struct nfs_open_context *ctx, - u32 iomode) + u32 iomode, + gfp_t gfp_flags) { struct inode *ino = lo->plh_inode; struct nfs_server *server = NFS_SERVER(ino); @@ -479,7 +481,7 @@ send_layoutget(struct pnfs_layout_hdr *lo, dprintk("--> %s\n", __func__); BUG_ON(ctx == NULL); - lgp = kzalloc(sizeof(*lgp), GFP_KERNEL); + lgp = kzalloc(sizeof(*lgp), gfp_flags); if (lgp == NULL) return NULL; @@ -487,12 +489,12 @@ send_layoutget(struct pnfs_layout_hdr *lo, max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz; max_pages = max_resp_sz >> PAGE_SHIFT; - pages = kzalloc(max_pages * sizeof(struct page *), GFP_KERNEL); + pages = kzalloc(max_pages * sizeof(struct page *), gfp_flags); if (!pages) goto out_err_free; for (i = 0; i < max_pages; i++) { - pages[i] = alloc_page(GFP_KERNEL); + pages[i] = alloc_page(gfp_flags); if (!pages[i]) goto out_err_free; } @@ -508,6 +510,7 @@ send_layoutget(struct pnfs_layout_hdr *lo, lgp->args.layout.pages = pages; lgp->args.layout.pglen = max_pages * PAGE_SIZE; lgp->lsegpp = &lseg; + lgp->gfp_flags = gfp_flags; /* Synchronously retrieve layout information from server and * store in lseg. @@ -665,11 +668,11 @@ pnfs_insert_layout(struct pnfs_layout_hdr *lo, } static struct pnfs_layout_hdr * -alloc_init_layout_hdr(struct inode *ino) +alloc_init_layout_hdr(struct inode *ino, gfp_t gfp_flags) { struct pnfs_layout_hdr *lo; - lo = kzalloc(sizeof(struct pnfs_layout_hdr), GFP_KERNEL); + lo = kzalloc(sizeof(struct pnfs_layout_hdr), gfp_flags); if (!lo) return NULL; atomic_set(&lo->plh_refcount, 1); @@ -681,7 +684,7 @@ alloc_init_layout_hdr(struct inode *ino) } static struct pnfs_layout_hdr * -pnfs_find_alloc_layout(struct inode *ino) +pnfs_find_alloc_layout(struct inode *ino, gfp_t gfp_flags) { struct nfs_inode *nfsi = NFS_I(ino); struct pnfs_layout_hdr *new = NULL; @@ -696,7 +699,7 @@ pnfs_find_alloc_layout(struct inode *ino) return nfsi->layout; } spin_unlock(&ino->i_lock); - new = alloc_init_layout_hdr(ino); + new = alloc_init_layout_hdr(ino, gfp_flags); spin_lock(&ino->i_lock); if (likely(nfsi->layout == NULL)) /* Won the race? */ @@ -756,7 +759,8 @@ pnfs_find_lseg(struct pnfs_layout_hdr *lo, u32 iomode) struct pnfs_layout_segment * pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx, - enum pnfs_iomode iomode) + enum pnfs_iomode iomode, + gfp_t gfp_flags) { struct nfs_inode *nfsi = NFS_I(ino); struct nfs_client *clp = NFS_SERVER(ino)->nfs_client; @@ -767,7 +771,7 @@ pnfs_update_layout(struct inode *ino, if (!pnfs_enabled_sb(NFS_SERVER(ino))) return NULL; spin_lock(&ino->i_lock); - lo = pnfs_find_alloc_layout(ino); + lo = pnfs_find_alloc_layout(ino, gfp_flags); if (lo == NULL) { dprintk("%s ERROR: can't get pnfs_layout_hdr\n", __func__); goto out_unlock; @@ -807,7 +811,7 @@ pnfs_update_layout(struct inode *ino, spin_unlock(&clp->cl_lock); } - lseg = send_layoutget(lo, ctx, iomode); + lseg = send_layoutget(lo, ctx, iomode, gfp_flags); if (!lseg && first) { spin_lock(&clp->cl_lock); list_del_init(&lo->plh_layouts); @@ -846,7 +850,7 @@ pnfs_layout_process(struct nfs4_layoutget *lgp) goto out; } /* Inject layout blob into I/O device driver */ - lseg = NFS_SERVER(ino)->pnfs_curr_ld->alloc_lseg(lo, res); + lseg = NFS_SERVER(ino)->pnfs_curr_ld->alloc_lseg(lo, res, lgp->gfp_flags); if (!lseg || IS_ERR(lseg)) { if (!lseg) status = -ENOMEM; @@ -899,7 +903,8 @@ static int pnfs_read_pg_test(struct nfs_pageio_descriptor *pgio, /* This is first coelesce call for a series of nfs_pages */ pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, prev->wb_context, - IOMODE_READ); + IOMODE_READ, + GFP_KERNEL); } return NFS_SERVER(pgio->pg_inode)->pnfs_curr_ld->pg_test(pgio, prev, req); } @@ -921,7 +926,8 @@ static int pnfs_write_pg_test(struct nfs_pageio_descriptor *pgio, /* This is first coelesce call for a series of nfs_pages */ pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, prev->wb_context, - IOMODE_RW); + IOMODE_RW, + GFP_NOFS); } return NFS_SERVER(pgio->pg_inode)->pnfs_curr_ld->pg_test(pgio, prev, req); } diff --git a/trunk/fs/nfs/pnfs.h b/trunk/fs/nfs/pnfs.h index bc4827202e7a..0c015bad9e7a 100644 --- a/trunk/fs/nfs/pnfs.h +++ b/trunk/fs/nfs/pnfs.h @@ -70,7 +70,7 @@ struct pnfs_layoutdriver_type { const u32 id; const char *name; struct module *owner; - struct pnfs_layout_segment * (*alloc_lseg) (struct pnfs_layout_hdr *layoutid, struct nfs4_layoutget_res *lgr); + struct pnfs_layout_segment * (*alloc_lseg) (struct pnfs_layout_hdr *layoutid, struct nfs4_layoutget_res *lgr, gfp_t gfp_flags); void (*free_lseg) (struct pnfs_layout_segment *lseg); /* test for nfs page cache coalescing */ @@ -126,7 +126,7 @@ void get_layout_hdr(struct pnfs_layout_hdr *lo); void put_lseg(struct pnfs_layout_segment *lseg); struct pnfs_layout_segment * pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx, - enum pnfs_iomode access_type); + enum pnfs_iomode access_type, gfp_t gfp_flags); void set_pnfs_layoutdriver(struct nfs_server *, u32 id); void unset_pnfs_layoutdriver(struct nfs_server *); enum pnfs_try_status pnfs_try_to_write_data(struct nfs_write_data *, @@ -245,7 +245,7 @@ static inline void put_lseg(struct pnfs_layout_segment *lseg) static inline struct pnfs_layout_segment * pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx, - enum pnfs_iomode access_type) + enum pnfs_iomode access_type, gfp_t gfp_flags) { return NULL; } diff --git a/trunk/fs/nfs/read.c b/trunk/fs/nfs/read.c index 7cded2b12a05..2bcf0dc306a1 100644 --- a/trunk/fs/nfs/read.c +++ b/trunk/fs/nfs/read.c @@ -288,7 +288,7 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc) atomic_set(&req->wb_complete, requests); BUG_ON(desc->pg_lseg != NULL); - lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_READ); + lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_READ, GFP_KERNEL); ClearPageError(page); offset = 0; nbytes = desc->pg_count; @@ -351,7 +351,7 @@ static int nfs_pagein_one(struct nfs_pageio_descriptor *desc) } req = nfs_list_entry(data->pages.next); if ((!lseg) && list_is_singular(&data->pages)) - lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_READ); + lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_READ, GFP_KERNEL); ret = nfs_read_rpcsetup(req, data, &nfs_read_full_ops, desc->pg_count, 0, lseg); diff --git a/trunk/fs/nfs/write.c b/trunk/fs/nfs/write.c index 3bd5d7e80f6c..49c715b4ac92 100644 --- a/trunk/fs/nfs/write.c +++ b/trunk/fs/nfs/write.c @@ -939,7 +939,7 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc) atomic_set(&req->wb_complete, requests); BUG_ON(desc->pg_lseg); - lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_RW); + lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_RW, GFP_NOFS); ClearPageError(page); offset = 0; nbytes = desc->pg_count; @@ -1013,7 +1013,7 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc) } req = nfs_list_entry(data->pages.next); if ((!lseg) && list_is_singular(&data->pages)) - lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_RW); + lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_RW, GFP_NOFS); if ((desc->pg_ioflags & FLUSH_COND_STABLE) && (desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit)) diff --git a/trunk/fs/nilfs2/alloc.c b/trunk/fs/nilfs2/alloc.c index 0a0a66d98cce..f7684483785e 100644 --- a/trunk/fs/nilfs2/alloc.c +++ b/trunk/fs/nilfs2/alloc.c @@ -646,7 +646,7 @@ int nilfs_palloc_freev(struct inode *inode, __u64 *entry_nrs, size_t nitems) unsigned long group, group_offset; int i, j, n, ret; - for (i = 0; i < nitems; i += n) { + for (i = 0; i < nitems; i = j) { group = nilfs_palloc_group(inode, entry_nrs[i], &group_offset); ret = nilfs_palloc_get_desc_block(inode, group, 0, &desc_bh); if (ret < 0) diff --git a/trunk/fs/xfs/linux-2.6/xfs_sync.c b/trunk/fs/xfs/linux-2.6/xfs_sync.c index e4f9c1b0836c..3e898a48122d 100644 --- a/trunk/fs/xfs/linux-2.6/xfs_sync.c +++ b/trunk/fs/xfs/linux-2.6/xfs_sync.c @@ -926,6 +926,7 @@ xfs_reclaim_inodes_ag( XFS_LOOKUP_BATCH, XFS_ICI_RECLAIM_TAG); if (!nr_found) { + done = 1; rcu_read_unlock(); break; } diff --git a/trunk/fs/xfs/xfs_trans_ail.c b/trunk/fs/xfs/xfs_trans_ail.c index acdb92f14d51..5fc2380092c8 100644 --- a/trunk/fs/xfs/xfs_trans_ail.c +++ b/trunk/fs/xfs/xfs_trans_ail.c @@ -346,20 +346,23 @@ xfs_ail_delete( */ STATIC void xfs_ail_worker( - struct work_struct *work) + struct work_struct *work) { - struct xfs_ail *ailp = container_of(to_delayed_work(work), + struct xfs_ail *ailp = container_of(to_delayed_work(work), struct xfs_ail, xa_work); - long tout; - xfs_lsn_t target = ailp->xa_target; - xfs_lsn_t lsn; - xfs_log_item_t *lip; - int flush_log, count, stuck; - xfs_mount_t *mp = ailp->xa_mount; + xfs_mount_t *mp = ailp->xa_mount; struct xfs_ail_cursor *cur = &ailp->xa_cursors; - int push_xfsbufd = 0; + xfs_log_item_t *lip; + xfs_lsn_t lsn; + xfs_lsn_t target; + long tout = 10; + int flush_log = 0; + int stuck = 0; + int count = 0; + int push_xfsbufd = 0; spin_lock(&ailp->xa_lock); + target = ailp->xa_target; xfs_trans_ail_cursor_init(ailp, cur); lip = xfs_trans_ail_cursor_first(ailp, cur, ailp->xa_last_pushed_lsn); if (!lip || XFS_FORCED_SHUTDOWN(mp)) { @@ -368,8 +371,7 @@ xfs_ail_worker( */ xfs_trans_ail_cursor_done(ailp, cur); spin_unlock(&ailp->xa_lock); - ailp->xa_last_pushed_lsn = 0; - return; + goto out_done; } XFS_STATS_INC(xs_push_ail); @@ -386,8 +388,7 @@ xfs_ail_worker( * lots of contention on the AIL lists. */ lsn = lip->li_lsn; - flush_log = stuck = count = 0; - while ((XFS_LSN_CMP(lip->li_lsn, target) < 0)) { + while ((XFS_LSN_CMP(lip->li_lsn, target) <= 0)) { int lock_result; /* * If we can lock the item without sleeping, unlock the AIL @@ -480,21 +481,25 @@ xfs_ail_worker( } /* assume we have more work to do in a short while */ - tout = 10; +out_done: if (!count) { /* We're past our target or empty, so idle */ ailp->xa_last_pushed_lsn = 0; /* - * Check for an updated push target before clearing the - * XFS_AIL_PUSHING_BIT. If the target changed, we've got more - * work to do. Wait a bit longer before starting that work. + * We clear the XFS_AIL_PUSHING_BIT first before checking + * whether the target has changed. If the target has changed, + * this pushes the requeue race directly onto the result of the + * atomic test/set bit, so we are guaranteed that either the + * the pusher that changed the target or ourselves will requeue + * the work (but not both). */ + clear_bit(XFS_AIL_PUSHING_BIT, &ailp->xa_flags); smp_rmb(); - if (ailp->xa_target == target) { - clear_bit(XFS_AIL_PUSHING_BIT, &ailp->xa_flags); + if (XFS_LSN_CMP(ailp->xa_target, target) == 0 || + test_and_set_bit(XFS_AIL_PUSHING_BIT, &ailp->xa_flags)) return; - } + tout = 50; } else if (XFS_LSN_CMP(lsn, target) >= 0) { /* @@ -553,7 +558,7 @@ xfs_ail_push( * the XFS_AIL_PUSHING_BIT. */ smp_wmb(); - ailp->xa_target = threshold_lsn; + xfs_trans_ail_copy_lsn(ailp, &ailp->xa_target, &threshold_lsn); if (!test_and_set_bit(XFS_AIL_PUSHING_BIT, &ailp->xa_flags)) queue_delayed_work(xfs_syncd_wq, &ailp->xa_work, 0); } diff --git a/trunk/include/linux/bootmem.h b/trunk/include/linux/bootmem.h index b8613e806aa9..01eca1794e14 100644 --- a/trunk/include/linux/bootmem.h +++ b/trunk/include/linux/bootmem.h @@ -111,6 +111,8 @@ extern void *__alloc_bootmem_low_node(pg_data_t *pgdat, __alloc_bootmem_nopanic(x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS)) #define alloc_bootmem_node(pgdat, x) \ __alloc_bootmem_node(pgdat, x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS)) +#define alloc_bootmem_node_nopanic(pgdat, x) \ + __alloc_bootmem_node_nopanic(pgdat, x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS)) #define alloc_bootmem_pages_node(pgdat, x) \ __alloc_bootmem_node(pgdat, x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS)) #define alloc_bootmem_pages_node_nopanic(pgdat, x) \ diff --git a/trunk/include/linux/capability.h b/trunk/include/linux/capability.h index 16ee8b49a200..d4675af963fa 100644 --- a/trunk/include/linux/capability.h +++ b/trunk/include/linux/capability.h @@ -546,18 +546,7 @@ extern bool has_capability_noaudit(struct task_struct *t, int cap); extern bool capable(int cap); extern bool ns_capable(struct user_namespace *ns, int cap); extern bool task_ns_capable(struct task_struct *t, int cap); - -/** - * nsown_capable - Check superior capability to one's own user_ns - * @cap: The capability in question - * - * Return true if the current task has the given superior capability - * targeted at its own user namespace. - */ -static inline bool nsown_capable(int cap) -{ - return ns_capable(current_user_ns(), cap); -} +extern bool nsown_capable(int cap); /* audit system wants to get cap info from files as well */ extern int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data *cpu_caps); diff --git a/trunk/include/linux/cred.h b/trunk/include/linux/cred.h index 9aeeb0ba2003..be16b61283cc 100644 --- a/trunk/include/linux/cred.h +++ b/trunk/include/linux/cred.h @@ -146,6 +146,7 @@ struct cred { void *security; /* subjective LSM security */ #endif struct user_struct *user; /* real user ID subscription */ + struct user_namespace *user_ns; /* cached user->user_ns */ struct group_info *group_info; /* supplementary groups for euid/fsgid */ struct rcu_head rcu; /* RCU deletion hook */ }; @@ -354,10 +355,15 @@ static inline void put_cred(const struct cred *_cred) #define current_fsgid() (current_cred_xxx(fsgid)) #define current_cap() (current_cred_xxx(cap_effective)) #define current_user() (current_cred_xxx(user)) -#define _current_user_ns() (current_cred_xxx(user)->user_ns) #define current_security() (current_cred_xxx(security)) -extern struct user_namespace *current_user_ns(void); +#ifdef CONFIG_USER_NS +#define current_user_ns() (current_cred_xxx(user_ns)) +#else +extern struct user_namespace init_user_ns; +#define current_user_ns() (&init_user_ns) +#endif + #define current_uid_gid(_uid, _gid) \ do { \ diff --git a/trunk/include/linux/fb.h b/trunk/include/linux/fb.h index df728c1c29ed..6a8274877171 100644 --- a/trunk/include/linux/fb.h +++ b/trunk/include/linux/fb.h @@ -832,6 +832,7 @@ struct fb_tile_ops { #define FBINFO_CAN_FORCE_OUTPUT 0x200000 struct fb_info { + atomic_t count; int node; int flags; struct mutex lock; /* Lock for open/release/ioctl funcs */ diff --git a/trunk/include/linux/gfp.h b/trunk/include/linux/gfp.h index bfb8f934521e..56d8fc87fbbc 100644 --- a/trunk/include/linux/gfp.h +++ b/trunk/include/linux/gfp.h @@ -353,6 +353,8 @@ extern unsigned long get_zeroed_page(gfp_t gfp_mask); void *alloc_pages_exact(size_t size, gfp_t gfp_mask); void free_pages_exact(void *virt, size_t size); +/* This is different from alloc_pages_exact_node !!! */ +void *alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask); #define __get_free_page(gfp_mask) \ __get_free_pages((gfp_mask), 0) diff --git a/trunk/include/linux/init_task.h b/trunk/include/linux/init_task.h index 689496bb6654..caa151fbebb7 100644 --- a/trunk/include/linux/init_task.h +++ b/trunk/include/linux/init_task.h @@ -134,6 +134,7 @@ extern struct cred init_cred; .stack = &init_thread_info, \ .usage = ATOMIC_INIT(2), \ .flags = PF_KTHREAD, \ + .lock_depth = -1, \ .prio = MAX_PRIO-20, \ .static_prio = MAX_PRIO-20, \ .normal_prio = MAX_PRIO-20, \ diff --git a/trunk/include/linux/mutex.h b/trunk/include/linux/mutex.h index c75471db576e..94b48bd40dd7 100644 --- a/trunk/include/linux/mutex.h +++ b/trunk/include/linux/mutex.h @@ -51,7 +51,7 @@ struct mutex { spinlock_t wait_lock; struct list_head wait_list; #if defined(CONFIG_DEBUG_MUTEXES) || defined(CONFIG_SMP) - struct task_struct *owner; + struct thread_info *owner; #endif #ifdef CONFIG_DEBUG_MUTEXES const char *name; diff --git a/trunk/include/linux/nfs_xdr.h b/trunk/include/linux/nfs_xdr.h index 890dce242639..7e371f7df9c4 100644 --- a/trunk/include/linux/nfs_xdr.h +++ b/trunk/include/linux/nfs_xdr.h @@ -233,6 +233,7 @@ struct nfs4_layoutget { struct nfs4_layoutget_args args; struct nfs4_layoutget_res res; struct pnfs_layout_segment **lsegpp; + gfp_t gfp_flags; }; struct nfs4_getdeviceinfo_args { diff --git a/trunk/include/linux/sched.h b/trunk/include/linux/sched.h index 12211e1666e2..781abd137673 100644 --- a/trunk/include/linux/sched.h +++ b/trunk/include/linux/sched.h @@ -360,7 +360,7 @@ extern signed long schedule_timeout_interruptible(signed long timeout); extern signed long schedule_timeout_killable(signed long timeout); extern signed long schedule_timeout_uninterruptible(signed long timeout); asmlinkage void schedule(void); -extern int mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner); +extern int mutex_spin_on_owner(struct mutex *lock, struct thread_info *owner); struct nsproxy; struct user_namespace; @@ -731,6 +731,10 @@ struct sched_info { /* timestamps */ unsigned long long last_arrival,/* when we last ran on a cpu */ last_queued; /* when we were last queued to run */ +#ifdef CONFIG_SCHEDSTATS + /* BKL stats */ + unsigned int bkl_count; +#endif }; #endif /* defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) */ @@ -864,7 +868,6 @@ static inline int sd_power_saving_flags(void) struct sched_group { struct sched_group *next; /* Must be a circular list */ - atomic_t ref; /* * CPU power of this group, SCHED_LOAD_SCALE being max power for a @@ -879,6 +882,9 @@ struct sched_group { * NOTE: this field is variable length. (Allocated dynamically * by attaching extra space to the end of the structure, * depending on how many CPUs the kernel has booted up with) + * + * It is also be embedded into static data structures at build + * time. (See 'struct static_sched_group' in kernel/sched.c) */ unsigned long cpumask[0]; }; @@ -888,6 +894,17 @@ static inline struct cpumask *sched_group_cpus(struct sched_group *sg) return to_cpumask(sg->cpumask); } +enum sched_domain_level { + SD_LV_NONE = 0, + SD_LV_SIBLING, + SD_LV_MC, + SD_LV_BOOK, + SD_LV_CPU, + SD_LV_NODE, + SD_LV_ALLNODES, + SD_LV_MAX +}; + struct sched_domain_attr { int relax_domain_level; }; @@ -896,8 +913,6 @@ struct sched_domain_attr { .relax_domain_level = -1, \ } -extern int sched_domain_level_max; - struct sched_domain { /* These fields must be setup */ struct sched_domain *parent; /* top domain must be null terminated */ @@ -915,7 +930,7 @@ struct sched_domain { unsigned int forkexec_idx; unsigned int smt_gain; int flags; /* See SD_* */ - int level; + enum sched_domain_level level; /* Runtime fields. */ unsigned long last_balance; /* init to jiffies. units in jiffies */ @@ -958,10 +973,6 @@ struct sched_domain { #ifdef CONFIG_SCHED_DEBUG char *name; #endif - union { - void *private; /* used during construction */ - struct rcu_head rcu; /* used during destruction */ - }; unsigned int span_weight; /* @@ -970,6 +981,9 @@ struct sched_domain { * NOTE: this field is variable length. (Allocated dynamically * by attaching extra space to the end of the structure, * depending on how many CPUs the kernel has booted up with) + * + * It is also be embedded into static data structures at build + * time. (See 'struct static_sched_domain' in kernel/sched.c) */ unsigned long span[0]; }; @@ -1034,12 +1048,8 @@ struct sched_domain; #define WF_FORK 0x02 /* child wakeup after fork */ #define ENQUEUE_WAKEUP 1 -#define ENQUEUE_HEAD 2 -#ifdef CONFIG_SMP -#define ENQUEUE_WAKING 4 /* sched_class::task_waking was called */ -#else -#define ENQUEUE_WAKING 0 -#endif +#define ENQUEUE_WAKING 2 +#define ENQUEUE_HEAD 4 #define DEQUEUE_SLEEP 1 @@ -1057,11 +1067,12 @@ struct sched_class { void (*put_prev_task) (struct rq *rq, struct task_struct *p); #ifdef CONFIG_SMP - int (*select_task_rq)(struct task_struct *p, int sd_flag, int flags); + int (*select_task_rq)(struct rq *rq, struct task_struct *p, + int sd_flag, int flags); void (*pre_schedule) (struct rq *this_rq, struct task_struct *task); void (*post_schedule) (struct rq *this_rq); - void (*task_waking) (struct task_struct *task); + void (*task_waking) (struct rq *this_rq, struct task_struct *task); void (*task_woken) (struct rq *this_rq, struct task_struct *task); void (*set_cpus_allowed)(struct task_struct *p, @@ -1186,11 +1197,13 @@ struct task_struct { unsigned int flags; /* per process flags, defined below */ unsigned int ptrace; + int lock_depth; /* BKL lock depth */ + #ifdef CONFIG_SMP - struct task_struct *wake_entry; - int on_cpu; +#ifdef __ARCH_WANT_UNLOCKED_CTXSW + int oncpu; +#endif #endif - int on_rq; int prio, static_prio, normal_prio; unsigned int rt_priority; @@ -1261,7 +1274,6 @@ struct task_struct { /* Revert to default priority/policy when forking */ unsigned sched_reset_on_fork:1; - unsigned sched_contributes_to_load:1; pid_t pid; pid_t tgid; @@ -2051,13 +2063,14 @@ extern void xtime_update(unsigned long ticks); extern int wake_up_state(struct task_struct *tsk, unsigned int state); extern int wake_up_process(struct task_struct *tsk); -extern void wake_up_new_task(struct task_struct *tsk); +extern void wake_up_new_task(struct task_struct *tsk, + unsigned long clone_flags); #ifdef CONFIG_SMP extern void kick_process(struct task_struct *tsk); #else static inline void kick_process(struct task_struct *tsk) { } #endif -extern void sched_fork(struct task_struct *p); +extern void sched_fork(struct task_struct *p, int clone_flags); extern void sched_dead(struct task_struct *p); extern void proc_caches_init(void); @@ -2182,10 +2195,8 @@ extern void set_task_comm(struct task_struct *tsk, char *from); extern char *get_task_comm(char *to, struct task_struct *tsk); #ifdef CONFIG_SMP -void scheduler_ipi(void); extern unsigned long wait_task_inactive(struct task_struct *, long match_state); #else -static inline void scheduler_ipi(void) { } static inline unsigned long wait_task_inactive(struct task_struct *p, long match_state) { diff --git a/trunk/include/net/inet_ecn.h b/trunk/include/net/inet_ecn.h index 88bdd010d65d..2fa8d1341a0a 100644 --- a/trunk/include/net/inet_ecn.h +++ b/trunk/include/net/inet_ecn.h @@ -38,9 +38,19 @@ static inline __u8 INET_ECN_encapsulate(__u8 outer, __u8 inner) return outer; } -#define INET_ECN_xmit(sk) do { inet_sk(sk)->tos |= INET_ECN_ECT_0; } while (0) -#define INET_ECN_dontxmit(sk) \ - do { inet_sk(sk)->tos &= ~INET_ECN_MASK; } while (0) +static inline void INET_ECN_xmit(struct sock *sk) +{ + inet_sk(sk)->tos |= INET_ECN_ECT_0; + if (inet6_sk(sk) != NULL) + inet6_sk(sk)->tclass |= INET_ECN_ECT_0; +} + +static inline void INET_ECN_dontxmit(struct sock *sk) +{ + inet_sk(sk)->tos &= ~INET_ECN_MASK; + if (inet6_sk(sk) != NULL) + inet6_sk(sk)->tclass &= ~INET_ECN_MASK; +} #define IP6_ECN_flow_init(label) do { \ (label) &= ~htonl(INET_ECN_MASK << 20); \ diff --git a/trunk/include/net/ip_vs.h b/trunk/include/net/ip_vs.h index d516f00c8e0f..86aefed6140b 100644 --- a/trunk/include/net/ip_vs.h +++ b/trunk/include/net/ip_vs.h @@ -791,6 +791,7 @@ struct ip_vs_app { /* IPVS in network namespace */ struct netns_ipvs { int gen; /* Generation */ + int enable; /* enable like nf_hooks do */ /* * Hash table: for real service lookups */ @@ -1089,6 +1090,22 @@ ip_vs_control_add(struct ip_vs_conn *cp, struct ip_vs_conn *ctl_cp) atomic_inc(&ctl_cp->n_control); } +/* + * IPVS netns init & cleanup functions + */ +extern int __ip_vs_estimator_init(struct net *net); +extern int __ip_vs_control_init(struct net *net); +extern int __ip_vs_protocol_init(struct net *net); +extern int __ip_vs_app_init(struct net *net); +extern int __ip_vs_conn_init(struct net *net); +extern int __ip_vs_sync_init(struct net *net); +extern void __ip_vs_conn_cleanup(struct net *net); +extern void __ip_vs_app_cleanup(struct net *net); +extern void __ip_vs_protocol_cleanup(struct net *net); +extern void __ip_vs_control_cleanup(struct net *net); +extern void __ip_vs_estimator_cleanup(struct net *net); +extern void __ip_vs_sync_cleanup(struct net *net); +extern void __ip_vs_service_cleanup(struct net *net); /* * IPVS application functions diff --git a/trunk/include/net/llc_pdu.h b/trunk/include/net/llc_pdu.h index 75b8e2968c9b..f57e7d46a453 100644 --- a/trunk/include/net/llc_pdu.h +++ b/trunk/include/net/llc_pdu.h @@ -199,7 +199,7 @@ struct llc_pdu_sn { u8 ssap; u8 ctrl_1; u8 ctrl_2; -}; +} __packed; static inline struct llc_pdu_sn *llc_pdu_sn_hdr(struct sk_buff *skb) { @@ -211,7 +211,7 @@ struct llc_pdu_un { u8 dsap; u8 ssap; u8 ctrl_1; -}; +} __packed; static inline struct llc_pdu_un *llc_pdu_un_hdr(struct sk_buff *skb) { @@ -359,7 +359,7 @@ struct llc_xid_info { u8 fmt_id; /* always 0x81 for LLC */ u8 type; /* different if NULL/non-NULL LSAP */ u8 rw; /* sender receive window */ -}; +} __packed; /** * llc_pdu_init_as_xid_cmd - sets bytes 3, 4 & 5 of LLC header as XID @@ -415,7 +415,7 @@ struct llc_frmr_info { u8 curr_ssv; /* current send state variable val */ u8 curr_rsv; /* current receive state variable */ u8 ind_bits; /* indicator bits set with macro */ -}; +} __packed; extern void llc_pdu_set_cmd_rsp(struct sk_buff *skb, u8 type); extern void llc_pdu_set_pf_bit(struct sk_buff *skb, u8 bit_value); diff --git a/trunk/include/net/xfrm.h b/trunk/include/net/xfrm.h index 6ae4bc5ce8a7..20afeaa39395 100644 --- a/trunk/include/net/xfrm.h +++ b/trunk/include/net/xfrm.h @@ -324,6 +324,7 @@ struct xfrm_state_afinfo { int (*tmpl_sort)(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n); int (*state_sort)(struct xfrm_state **dst, struct xfrm_state **src, int n); int (*output)(struct sk_buff *skb); + int (*output_finish)(struct sk_buff *skb); int (*extract_input)(struct xfrm_state *x, struct sk_buff *skb); int (*extract_output)(struct xfrm_state *x, @@ -1454,6 +1455,7 @@ static inline int xfrm4_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi) extern int xfrm4_extract_output(struct xfrm_state *x, struct sk_buff *skb); extern int xfrm4_prepare_output(struct xfrm_state *x, struct sk_buff *skb); extern int xfrm4_output(struct sk_buff *skb); +extern int xfrm4_output_finish(struct sk_buff *skb); extern int xfrm4_tunnel_register(struct xfrm_tunnel *handler, unsigned short family); extern int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler, unsigned short family); extern int xfrm6_extract_header(struct sk_buff *skb); @@ -1470,6 +1472,7 @@ extern __be32 xfrm6_tunnel_spi_lookup(struct net *net, xfrm_address_t *saddr); extern int xfrm6_extract_output(struct xfrm_state *x, struct sk_buff *skb); extern int xfrm6_prepare_output(struct xfrm_state *x, struct sk_buff *skb); extern int xfrm6_output(struct sk_buff *skb); +extern int xfrm6_output_finish(struct sk_buff *skb); extern int xfrm6_find_1stfragopt(struct xfrm_state *x, struct sk_buff *skb, u8 **prevhdr); diff --git a/trunk/include/trace/events/gfpflags.h b/trunk/include/trace/events/gfpflags.h index e3615c093741..9fe3a36646e9 100644 --- a/trunk/include/trace/events/gfpflags.h +++ b/trunk/include/trace/events/gfpflags.h @@ -10,6 +10,7 @@ */ #define show_gfp_flags(flags) \ (flags) ? __print_flags(flags, "|", \ + {(unsigned long)GFP_TRANSHUGE, "GFP_TRANSHUGE"}, \ {(unsigned long)GFP_HIGHUSER_MOVABLE, "GFP_HIGHUSER_MOVABLE"}, \ {(unsigned long)GFP_HIGHUSER, "GFP_HIGHUSER"}, \ {(unsigned long)GFP_USER, "GFP_USER"}, \ @@ -32,6 +33,9 @@ {(unsigned long)__GFP_HARDWALL, "GFP_HARDWALL"}, \ {(unsigned long)__GFP_THISNODE, "GFP_THISNODE"}, \ {(unsigned long)__GFP_RECLAIMABLE, "GFP_RECLAIMABLE"}, \ - {(unsigned long)__GFP_MOVABLE, "GFP_MOVABLE"} \ + {(unsigned long)__GFP_MOVABLE, "GFP_MOVABLE"}, \ + {(unsigned long)__GFP_NOTRACK, "GFP_NOTRACK"}, \ + {(unsigned long)__GFP_NO_KSWAPD, "GFP_NO_KSWAPD"}, \ + {(unsigned long)__GFP_OTHER_NODE, "GFP_OTHER_NODE"} \ ) : "GFP_NOWAIT" diff --git a/trunk/init/Kconfig b/trunk/init/Kconfig index 9c812c5fb6e8..7a71e0a9992a 100644 --- a/trunk/init/Kconfig +++ b/trunk/init/Kconfig @@ -827,11 +827,6 @@ config SCHED_AUTOGROUP desktop applications. Task group autogeneration is currently based upon task session. -config SCHED_TTWU_QUEUE - bool - depends on !SPARC32 - default y - config MM_OWNER bool @@ -1231,7 +1226,6 @@ config SLAB per cpu and per node queues. config SLUB - depends on BROKEN || NUMA || !DISCONTIGMEM bool "SLUB (Unqueued Allocator)" help SLUB is a slab allocator that minimizes cache line usage diff --git a/trunk/kernel/capability.c b/trunk/kernel/capability.c index bf0c734d0c12..32a80e08ff4b 100644 --- a/trunk/kernel/capability.c +++ b/trunk/kernel/capability.c @@ -399,3 +399,15 @@ bool task_ns_capable(struct task_struct *t, int cap) return ns_capable(task_cred_xxx(t, user)->user_ns, cap); } EXPORT_SYMBOL(task_ns_capable); + +/** + * nsown_capable - Check superior capability to one's own user_ns + * @cap: The capability in question + * + * Return true if the current task has the given superior capability + * targeted at its own user namespace. + */ +bool nsown_capable(int cap) +{ + return ns_capable(current_user_ns(), cap); +} diff --git a/trunk/kernel/cpuset.c b/trunk/kernel/cpuset.c index 2bb8c2e98fff..33eee16addb8 100644 --- a/trunk/kernel/cpuset.c +++ b/trunk/kernel/cpuset.c @@ -1159,7 +1159,7 @@ int current_cpuset_is_being_rebound(void) static int update_relax_domain_level(struct cpuset *cs, s64 val) { #ifdef CONFIG_SMP - if (val < -1 || val >= sched_domain_level_max) + if (val < -1 || val >= SD_LV_MAX) return -EINVAL; #endif diff --git a/trunk/kernel/cred.c b/trunk/kernel/cred.c index 5557b55048df..8093c16b84b1 100644 --- a/trunk/kernel/cred.c +++ b/trunk/kernel/cred.c @@ -54,6 +54,7 @@ struct cred init_cred = { .cap_effective = CAP_INIT_EFF_SET, .cap_bset = CAP_INIT_BSET, .user = INIT_USER, + .user_ns = &init_user_ns, .group_info = &init_groups, #ifdef CONFIG_KEYS .tgcred = &init_tgcred, @@ -410,6 +411,11 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags) goto error_put; } + /* cache user_ns in cred. Doesn't need a refcount because it will + * stay pinned by cred->user + */ + new->user_ns = new->user->user_ns; + #ifdef CONFIG_KEYS /* new threads get their own thread keyrings if their parent already * had one */ @@ -741,12 +747,6 @@ int set_create_files_as(struct cred *new, struct inode *inode) } EXPORT_SYMBOL(set_create_files_as); -struct user_namespace *current_user_ns(void) -{ - return _current_user_ns(); -} -EXPORT_SYMBOL(current_user_ns); - #ifdef CONFIG_DEBUG_CREDENTIALS bool creds_are_invalid(const struct cred *cred) diff --git a/trunk/kernel/fork.c b/trunk/kernel/fork.c index 2b44d82b8237..e7548dee636b 100644 --- a/trunk/kernel/fork.c +++ b/trunk/kernel/fork.c @@ -1103,6 +1103,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, posix_cpu_timers_init(p); + p->lock_depth = -1; /* -1 = no lock */ do_posix_clock_monotonic_gettime(&p->start_time); p->real_start_time = p->start_time; monotonic_to_bootbased(&p->real_start_time); @@ -1152,7 +1153,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, #endif /* Perform scheduler related setup. Assign this task to a CPU. */ - sched_fork(p); + sched_fork(p, clone_flags); retval = perf_event_init_task(p); if (retval) @@ -1463,7 +1464,7 @@ long do_fork(unsigned long clone_flags, */ p->flags &= ~PF_STARTING; - wake_up_new_task(p); + wake_up_new_task(p, clone_flags); tracehook_report_clone_complete(trace, regs, clone_flags, nr, p); diff --git a/trunk/kernel/mutex-debug.c b/trunk/kernel/mutex-debug.c index 73da83aff418..ec815a960b5d 100644 --- a/trunk/kernel/mutex-debug.c +++ b/trunk/kernel/mutex-debug.c @@ -75,7 +75,7 @@ void debug_mutex_unlock(struct mutex *lock) return; DEBUG_LOCKS_WARN_ON(lock->magic != lock); - DEBUG_LOCKS_WARN_ON(lock->owner != current); + DEBUG_LOCKS_WARN_ON(lock->owner != current_thread_info()); DEBUG_LOCKS_WARN_ON(!lock->wait_list.prev && !lock->wait_list.next); mutex_clear_owner(lock); } diff --git a/trunk/kernel/mutex-debug.h b/trunk/kernel/mutex-debug.h index 0799fd3e4cfa..57d527a16f9d 100644 --- a/trunk/kernel/mutex-debug.h +++ b/trunk/kernel/mutex-debug.h @@ -29,7 +29,7 @@ extern void debug_mutex_init(struct mutex *lock, const char *name, static inline void mutex_set_owner(struct mutex *lock) { - lock->owner = current; + lock->owner = current_thread_info(); } static inline void mutex_clear_owner(struct mutex *lock) diff --git a/trunk/kernel/mutex.c b/trunk/kernel/mutex.c index 2c938e2337cd..c4195fa98900 100644 --- a/trunk/kernel/mutex.c +++ b/trunk/kernel/mutex.c @@ -160,7 +160,14 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, */ for (;;) { - struct task_struct *owner; + struct thread_info *owner; + + /* + * If we own the BKL, then don't spin. The owner of + * the mutex might be waiting on us to release the BKL. + */ + if (unlikely(current->lock_depth >= 0)) + break; /* * If there's an owner, wait for it to either diff --git a/trunk/kernel/mutex.h b/trunk/kernel/mutex.h index 4115fbf83b12..67578ca48f94 100644 --- a/trunk/kernel/mutex.h +++ b/trunk/kernel/mutex.h @@ -19,7 +19,7 @@ #ifdef CONFIG_SMP static inline void mutex_set_owner(struct mutex *lock) { - lock->owner = current; + lock->owner = current_thread_info(); } static inline void mutex_clear_owner(struct mutex *lock) diff --git a/trunk/kernel/power/suspend.c b/trunk/kernel/power/suspend.c index 8935369d503a..6275970b2189 100644 --- a/trunk/kernel/power/suspend.c +++ b/trunk/kernel/power/suspend.c @@ -216,7 +216,6 @@ int suspend_devices_and_enter(suspend_state_t state) goto Close; } suspend_console(); - pm_restrict_gfp_mask(); suspend_test_start(); error = dpm_suspend_start(PMSG_SUSPEND); if (error) { @@ -233,7 +232,6 @@ int suspend_devices_and_enter(suspend_state_t state) suspend_test_start(); dpm_resume_end(PMSG_RESUME); suspend_test_finish("resume devices"); - pm_restore_gfp_mask(); resume_console(); Close: if (suspend_ops->end) @@ -294,7 +292,9 @@ int enter_state(suspend_state_t state) goto Finish; pr_debug("PM: Entering %s sleep\n", pm_states[state]); + pm_restrict_gfp_mask(); error = suspend_devices_and_enter(state); + pm_restore_gfp_mask(); Finish: pr_debug("PM: Finishing wakeup.\n"); diff --git a/trunk/kernel/power/user.c b/trunk/kernel/power/user.c index c36c3b9e8a84..7d02d33be699 100644 --- a/trunk/kernel/power/user.c +++ b/trunk/kernel/power/user.c @@ -135,8 +135,10 @@ static int snapshot_release(struct inode *inode, struct file *filp) free_basic_memory_bitmaps(); data = filp->private_data; free_all_swap_pages(data->swap); - if (data->frozen) + if (data->frozen) { + pm_restore_gfp_mask(); thaw_processes(); + } pm_notifier_call_chain(data->mode == O_RDONLY ? PM_POST_HIBERNATION : PM_POST_RESTORE); atomic_inc(&snapshot_device_available); @@ -379,6 +381,7 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd, * PM_HIBERNATION_PREPARE */ error = suspend_devices_and_enter(PM_SUSPEND_MEM); + data->ready = 0; break; case SNAPSHOT_PLATFORM_SUPPORT: diff --git a/trunk/kernel/sched.c b/trunk/kernel/sched.c index c62acf45d3b9..312f8b95c2d4 100644 --- a/trunk/kernel/sched.c +++ b/trunk/kernel/sched.c @@ -231,7 +231,7 @@ static void destroy_rt_bandwidth(struct rt_bandwidth *rt_b) #endif /* - * sched_domains_mutex serializes calls to init_sched_domains, + * sched_domains_mutex serializes calls to arch_init_sched_domains, * detach_destroy_domains and partition_sched_domains. */ static DEFINE_MUTEX(sched_domains_mutex); @@ -312,9 +312,6 @@ struct cfs_rq { u64 exec_clock; u64 min_vruntime; -#ifndef CONFIG_64BIT - u64 min_vruntime_copy; -#endif struct rb_root tasks_timeline; struct rb_node *rb_leftmost; @@ -328,9 +325,7 @@ struct cfs_rq { */ struct sched_entity *curr, *next, *last, *skip; -#ifdef CONFIG_SCHED_DEBUG unsigned int nr_spread_over; -#endif #ifdef CONFIG_FAIR_GROUP_SCHED struct rq *rq; /* cpu runqueue to which this cfs_rq is attached */ @@ -422,7 +417,6 @@ struct rt_rq { */ struct root_domain { atomic_t refcount; - struct rcu_head rcu; cpumask_var_t span; cpumask_var_t online; @@ -466,7 +460,7 @@ struct rq { u64 nohz_stamp; unsigned char nohz_balance_kick; #endif - int skip_clock_update; + unsigned int skip_clock_update; /* capture load from *all* tasks on this cpu: */ struct load_weight load; @@ -559,10 +553,6 @@ struct rq { unsigned int ttwu_count; unsigned int ttwu_local; #endif - -#ifdef CONFIG_SMP - struct task_struct *wake_list; -#endif }; static DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); @@ -581,7 +571,7 @@ static inline int cpu_of(struct rq *rq) #define rcu_dereference_check_sched_domain(p) \ rcu_dereference_check((p), \ - rcu_read_lock_held() || \ + rcu_read_lock_sched_held() || \ lockdep_is_held(&sched_domains_mutex)) /* @@ -606,7 +596,7 @@ static inline int cpu_of(struct rq *rq) * Return the group to which this tasks belongs. * * We use task_subsys_state_check() and extend the RCU verification - * with lockdep_is_held(&p->pi_lock) because cpu_cgroup_attach() + * with lockdep_is_held(&task_rq(p)->lock) because cpu_cgroup_attach() * holds that lock for each task it moves into the cgroup. Therefore * by holding that lock, we pin the task to the current cgroup. */ @@ -616,7 +606,7 @@ static inline struct task_group *task_group(struct task_struct *p) struct cgroup_subsys_state *css; css = task_subsys_state_check(p, cpu_cgroup_subsys_id, - lockdep_is_held(&p->pi_lock)); + lockdep_is_held(&task_rq(p)->lock)); tg = container_of(css, struct task_group, css); return autogroup_task_group(p, tg); @@ -652,7 +642,7 @@ static void update_rq_clock(struct rq *rq) { s64 delta; - if (rq->skip_clock_update > 0) + if (rq->skip_clock_update) return; delta = sched_clock_cpu(cpu_of(rq)) - rq->clock; @@ -848,39 +838,18 @@ static inline int task_current(struct rq *rq, struct task_struct *p) return rq->curr == p; } +#ifndef __ARCH_WANT_UNLOCKED_CTXSW static inline int task_running(struct rq *rq, struct task_struct *p) { -#ifdef CONFIG_SMP - return p->on_cpu; -#else return task_current(rq, p); -#endif } -#ifndef __ARCH_WANT_UNLOCKED_CTXSW static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next) { -#ifdef CONFIG_SMP - /* - * We can optimise this out completely for !SMP, because the - * SMP rebalancing from interrupt is the only thing that cares - * here. - */ - next->on_cpu = 1; -#endif } static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev) { -#ifdef CONFIG_SMP - /* - * After ->on_cpu is cleared, the task can be moved to a different CPU. - * We must ensure this doesn't happen until the switch is completely - * finished. - */ - smp_wmb(); - prev->on_cpu = 0; -#endif #ifdef CONFIG_DEBUG_SPINLOCK /* this is a valid case when another task releases the spinlock */ rq->lock.owner = current; @@ -896,6 +865,15 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev) } #else /* __ARCH_WANT_UNLOCKED_CTXSW */ +static inline int task_running(struct rq *rq, struct task_struct *p) +{ +#ifdef CONFIG_SMP + return p->oncpu; +#else + return task_current(rq, p); +#endif +} + static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next) { #ifdef CONFIG_SMP @@ -904,7 +882,7 @@ static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next) * SMP rebalancing from interrupt is the only thing that cares * here. */ - next->on_cpu = 1; + next->oncpu = 1; #endif #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW raw_spin_unlock_irq(&rq->lock); @@ -917,12 +895,12 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev) { #ifdef CONFIG_SMP /* - * After ->on_cpu is cleared, the task can be moved to a different CPU. + * After ->oncpu is cleared, the task can be moved to a different CPU. * We must ensure this doesn't happen until the switch is completely * finished. */ smp_wmb(); - prev->on_cpu = 0; + prev->oncpu = 0; #endif #ifndef __ARCH_WANT_INTERRUPTS_ON_CTXSW local_irq_enable(); @@ -931,15 +909,23 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev) #endif /* __ARCH_WANT_UNLOCKED_CTXSW */ /* - * __task_rq_lock - lock the rq @p resides on. + * Check whether the task is waking, we use this to synchronize ->cpus_allowed + * against ttwu(). + */ +static inline int task_is_waking(struct task_struct *p) +{ + return unlikely(p->state == TASK_WAKING); +} + +/* + * __task_rq_lock - lock the runqueue a given task resides on. + * Must be called interrupts disabled. */ static inline struct rq *__task_rq_lock(struct task_struct *p) __acquires(rq->lock) { struct rq *rq; - lockdep_assert_held(&p->pi_lock); - for (;;) { rq = task_rq(p); raw_spin_lock(&rq->lock); @@ -950,22 +936,22 @@ static inline struct rq *__task_rq_lock(struct task_struct *p) } /* - * task_rq_lock - lock p->pi_lock and lock the rq @p resides on. + * task_rq_lock - lock the runqueue a given task resides on and disable + * interrupts. Note the ordering: we can safely lookup the task_rq without + * explicitly disabling preemption. */ static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags) - __acquires(p->pi_lock) __acquires(rq->lock) { struct rq *rq; for (;;) { - raw_spin_lock_irqsave(&p->pi_lock, *flags); + local_irq_save(*flags); rq = task_rq(p); raw_spin_lock(&rq->lock); if (likely(rq == task_rq(p))) return rq; - raw_spin_unlock(&rq->lock); - raw_spin_unlock_irqrestore(&p->pi_lock, *flags); + raw_spin_unlock_irqrestore(&rq->lock, *flags); } } @@ -975,13 +961,10 @@ static void __task_rq_unlock(struct rq *rq) raw_spin_unlock(&rq->lock); } -static inline void -task_rq_unlock(struct rq *rq, struct task_struct *p, unsigned long *flags) +static inline void task_rq_unlock(struct rq *rq, unsigned long *flags) __releases(rq->lock) - __releases(p->pi_lock) { - raw_spin_unlock(&rq->lock); - raw_spin_unlock_irqrestore(&p->pi_lock, *flags); + raw_spin_unlock_irqrestore(&rq->lock, *flags); } /* @@ -1210,17 +1193,11 @@ int get_nohz_timer_target(void) int i; struct sched_domain *sd; - rcu_read_lock(); for_each_domain(cpu, sd) { - for_each_cpu(i, sched_domain_span(sd)) { - if (!idle_cpu(i)) { - cpu = i; - goto unlock; - } - } + for_each_cpu(i, sched_domain_span(sd)) + if (!idle_cpu(i)) + return i; } -unlock: - rcu_read_unlock(); return cpu; } /* @@ -1330,15 +1307,15 @@ calc_delta_mine(unsigned long delta_exec, unsigned long weight, { u64 tmp; - tmp = (u64)delta_exec * weight; - if (!lw->inv_weight) { if (BITS_PER_LONG > 32 && unlikely(lw->weight >= WMULT_CONST)) lw->inv_weight = 1; else - lw->inv_weight = WMULT_CONST / lw->weight; + lw->inv_weight = 1 + (WMULT_CONST-lw->weight/2) + / (lw->weight+1); } + tmp = (u64)delta_exec * weight; /* * Check whether we'd overflow the 64-bit multiplication: */ @@ -1796,6 +1773,7 @@ static void enqueue_task(struct rq *rq, struct task_struct *p, int flags) update_rq_clock(rq); sched_info_queued(p); p->sched_class->enqueue_task(rq, p, flags); + p->se.on_rq = 1; } static void dequeue_task(struct rq *rq, struct task_struct *p, int flags) @@ -1803,6 +1781,7 @@ static void dequeue_task(struct rq *rq, struct task_struct *p, int flags) update_rq_clock(rq); sched_info_dequeued(p); p->sched_class->dequeue_task(rq, p, flags); + p->se.on_rq = 0; } /* @@ -2137,7 +2116,7 @@ static void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags) * A queue event has occurred, and we're going to schedule. In * this case, we can save a useless back to back clock update. */ - if (rq->curr->on_rq && test_tsk_need_resched(rq->curr)) + if (rq->curr->se.on_rq && test_tsk_need_resched(rq->curr)) rq->skip_clock_update = 1; } @@ -2183,11 +2162,6 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu) */ WARN_ON_ONCE(p->state != TASK_RUNNING && p->state != TASK_WAKING && !(task_thread_info(p)->preempt_count & PREEMPT_ACTIVE)); - -#ifdef CONFIG_LOCKDEP - WARN_ON_ONCE(debug_locks && !(lockdep_is_held(&p->pi_lock) || - lockdep_is_held(&task_rq(p)->lock))); -#endif #endif trace_sched_migrate_task(p, new_cpu); @@ -2207,6 +2181,19 @@ struct migration_arg { static int migration_cpu_stop(void *data); +/* + * The task's runqueue lock must be held. + * Returns true if you have to wait for migration thread. + */ +static bool migrate_task(struct task_struct *p, struct rq *rq) +{ + /* + * If the task is not on a runqueue (and not running), then + * the next wake-up will properly place the task. + */ + return p->se.on_rq || task_running(rq, p); +} + /* * wait_task_inactive - wait for a thread to unschedule. * @@ -2264,11 +2251,11 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state) rq = task_rq_lock(p, &flags); trace_sched_wait_task(p); running = task_running(rq, p); - on_rq = p->on_rq; + on_rq = p->se.on_rq; ncsw = 0; if (!match_state || p->state == match_state) ncsw = p->nvcsw | LONG_MIN; /* sets MSB */ - task_rq_unlock(rq, p, &flags); + task_rq_unlock(rq, &flags); /* * If it changed from the expected state, bail out now. @@ -2343,7 +2330,7 @@ EXPORT_SYMBOL_GPL(kick_process); #ifdef CONFIG_SMP /* - * ->cpus_allowed is protected by both rq->lock and p->pi_lock + * ->cpus_allowed is protected by either TASK_WAKING or rq->lock held. */ static int select_fallback_rq(int cpu, struct task_struct *p) { @@ -2376,12 +2363,12 @@ static int select_fallback_rq(int cpu, struct task_struct *p) } /* - * The caller (fork, wakeup) owns p->pi_lock, ->cpus_allowed is stable. + * The caller (fork, wakeup) owns TASK_WAKING, ->cpus_allowed is stable. */ static inline -int select_task_rq(struct task_struct *p, int sd_flags, int wake_flags) +int select_task_rq(struct rq *rq, struct task_struct *p, int sd_flags, int wake_flags) { - int cpu = p->sched_class->select_task_rq(p, sd_flags, wake_flags); + int cpu = p->sched_class->select_task_rq(rq, p, sd_flags, wake_flags); /* * In order not to call set_task_cpu() on a blocking task we need @@ -2407,62 +2394,27 @@ static void update_avg(u64 *avg, u64 sample) } #endif -static void -ttwu_stat(struct task_struct *p, int cpu, int wake_flags) +static inline void ttwu_activate(struct task_struct *p, struct rq *rq, + bool is_sync, bool is_migrate, bool is_local, + unsigned long en_flags) { -#ifdef CONFIG_SCHEDSTATS - struct rq *rq = this_rq(); - -#ifdef CONFIG_SMP - int this_cpu = smp_processor_id(); - - if (cpu == this_cpu) { - schedstat_inc(rq, ttwu_local); - schedstat_inc(p, se.statistics.nr_wakeups_local); - } else { - struct sched_domain *sd; - - schedstat_inc(p, se.statistics.nr_wakeups_remote); - rcu_read_lock(); - for_each_domain(this_cpu, sd) { - if (cpumask_test_cpu(cpu, sched_domain_span(sd))) { - schedstat_inc(sd, ttwu_wake_remote); - break; - } - } - rcu_read_unlock(); - } -#endif /* CONFIG_SMP */ - - schedstat_inc(rq, ttwu_count); schedstat_inc(p, se.statistics.nr_wakeups); - - if (wake_flags & WF_SYNC) + if (is_sync) schedstat_inc(p, se.statistics.nr_wakeups_sync); - - if (cpu != task_cpu(p)) + if (is_migrate) schedstat_inc(p, se.statistics.nr_wakeups_migrate); + if (is_local) + schedstat_inc(p, se.statistics.nr_wakeups_local); + else + schedstat_inc(p, se.statistics.nr_wakeups_remote); -#endif /* CONFIG_SCHEDSTATS */ -} - -static void ttwu_activate(struct rq *rq, struct task_struct *p, int en_flags) -{ activate_task(rq, p, en_flags); - p->on_rq = 1; - - /* if a worker is waking up, notify workqueue */ - if (p->flags & PF_WQ_WORKER) - wq_worker_waking_up(p, cpu_of(rq)); } -/* - * Mark the task runnable and perform wakeup-preemption. - */ -static void -ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags) +static inline void ttwu_post_activation(struct task_struct *p, struct rq *rq, + int wake_flags, bool success) { - trace_sched_wakeup(p, true); + trace_sched_wakeup(p, success); check_preempt_curr(rq, p, wake_flags); p->state = TASK_RUNNING; @@ -2481,99 +2433,9 @@ ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags) rq->idle_stamp = 0; } #endif -} - -static void -ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags) -{ -#ifdef CONFIG_SMP - if (p->sched_contributes_to_load) - rq->nr_uninterruptible--; -#endif - - ttwu_activate(rq, p, ENQUEUE_WAKEUP | ENQUEUE_WAKING); - ttwu_do_wakeup(rq, p, wake_flags); -} - -/* - * Called in case the task @p isn't fully descheduled from its runqueue, - * in this case we must do a remote wakeup. Its a 'light' wakeup though, - * since all we need to do is flip p->state to TASK_RUNNING, since - * the task is still ->on_rq. - */ -static int ttwu_remote(struct task_struct *p, int wake_flags) -{ - struct rq *rq; - int ret = 0; - - rq = __task_rq_lock(p); - if (p->on_rq) { - ttwu_do_wakeup(rq, p, wake_flags); - ret = 1; - } - __task_rq_unlock(rq); - - return ret; -} - -#ifdef CONFIG_SMP -static void sched_ttwu_pending(void) -{ - struct rq *rq = this_rq(); - struct task_struct *list = xchg(&rq->wake_list, NULL); - - if (!list) - return; - - raw_spin_lock(&rq->lock); - - while (list) { - struct task_struct *p = list; - list = list->wake_entry; - ttwu_do_activate(rq, p, 0); - } - - raw_spin_unlock(&rq->lock); -} - -void scheduler_ipi(void) -{ - sched_ttwu_pending(); -} - -static void ttwu_queue_remote(struct task_struct *p, int cpu) -{ - struct rq *rq = cpu_rq(cpu); - struct task_struct *next = rq->wake_list; - - for (;;) { - struct task_struct *old = next; - - p->wake_entry = next; - next = cmpxchg(&rq->wake_list, old, p); - if (next == old) - break; - } - - if (!next) - smp_send_reschedule(cpu); -} -#endif - -static void ttwu_queue(struct task_struct *p, int cpu) -{ - struct rq *rq = cpu_rq(cpu); - -#if defined(CONFIG_SMP) && defined(CONFIG_SCHED_TTWU_QUEUE) - if (sched_feat(TTWU_QUEUE) && cpu != smp_processor_id()) { - ttwu_queue_remote(p, cpu); - return; - } -#endif - - raw_spin_lock(&rq->lock); - ttwu_do_activate(rq, p, 0); - raw_spin_unlock(&rq->lock); + /* if a worker is waking up, notify workqueue */ + if ((p->flags & PF_WQ_WORKER) && success) + wq_worker_waking_up(p, cpu_of(rq)); } /** @@ -2591,64 +2453,92 @@ static void ttwu_queue(struct task_struct *p, int cpu) * Returns %true if @p was woken up, %false if it was already running * or @state didn't match @p's state. */ -static int -try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) +static int try_to_wake_up(struct task_struct *p, unsigned int state, + int wake_flags) { + int cpu, orig_cpu, this_cpu, success = 0; unsigned long flags; - int cpu, success = 0; + unsigned long en_flags = ENQUEUE_WAKEUP; + struct rq *rq; + + this_cpu = get_cpu(); smp_wmb(); - raw_spin_lock_irqsave(&p->pi_lock, flags); + rq = task_rq_lock(p, &flags); if (!(p->state & state)) goto out; - success = 1; /* we're going to change ->state */ - cpu = task_cpu(p); + if (p->se.on_rq) + goto out_running; - if (p->on_rq && ttwu_remote(p, wake_flags)) - goto stat; + cpu = task_cpu(p); + orig_cpu = cpu; #ifdef CONFIG_SMP + if (unlikely(task_running(rq, p))) + goto out_activate; + /* - * If the owning (remote) cpu is still in the middle of schedule() with - * this task as prev, wait until its done referencing the task. + * In order to handle concurrent wakeups and release the rq->lock + * we put the task in TASK_WAKING state. + * + * First fix up the nr_uninterruptible count: */ - while (p->on_cpu) { -#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW - /* - * If called from interrupt context we could have landed in the - * middle of schedule(), in this case we should take care not - * to spin on ->on_cpu if p is current, since that would - * deadlock. - */ - if (p == current) { - ttwu_queue(p, cpu); - goto stat; - } -#endif - cpu_relax(); + if (task_contributes_to_load(p)) { + if (likely(cpu_online(orig_cpu))) + rq->nr_uninterruptible--; + else + this_rq()->nr_uninterruptible--; } - /* - * Pairs with the smp_wmb() in finish_lock_switch(). - */ - smp_rmb(); - - p->sched_contributes_to_load = !!task_contributes_to_load(p); p->state = TASK_WAKING; - if (p->sched_class->task_waking) - p->sched_class->task_waking(p); + if (p->sched_class->task_waking) { + p->sched_class->task_waking(rq, p); + en_flags |= ENQUEUE_WAKING; + } - cpu = select_task_rq(p, SD_BALANCE_WAKE, wake_flags); - if (task_cpu(p) != cpu) + cpu = select_task_rq(rq, p, SD_BALANCE_WAKE, wake_flags); + if (cpu != orig_cpu) set_task_cpu(p, cpu); -#endif /* CONFIG_SMP */ + __task_rq_unlock(rq); + + rq = cpu_rq(cpu); + raw_spin_lock(&rq->lock); + + /* + * We migrated the task without holding either rq->lock, however + * since the task is not on the task list itself, nobody else + * will try and migrate the task, hence the rq should match the + * cpu we just moved it to. + */ + WARN_ON(task_cpu(p) != cpu); + WARN_ON(p->state != TASK_WAKING); + +#ifdef CONFIG_SCHEDSTATS + schedstat_inc(rq, ttwu_count); + if (cpu == this_cpu) + schedstat_inc(rq, ttwu_local); + else { + struct sched_domain *sd; + for_each_domain(this_cpu, sd) { + if (cpumask_test_cpu(cpu, sched_domain_span(sd))) { + schedstat_inc(sd, ttwu_wake_remote); + break; + } + } + } +#endif /* CONFIG_SCHEDSTATS */ - ttwu_queue(p, cpu); -stat: - ttwu_stat(p, cpu, wake_flags); +out_activate: +#endif /* CONFIG_SMP */ + ttwu_activate(p, rq, wake_flags & WF_SYNC, orig_cpu != cpu, + cpu == this_cpu, en_flags); + success = 1; +out_running: + ttwu_post_activation(p, rq, wake_flags, success); out: - raw_spin_unlock_irqrestore(&p->pi_lock, flags); + task_rq_unlock(rq, &flags); + put_cpu(); return success; } @@ -2657,34 +2547,31 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) * try_to_wake_up_local - try to wake up a local task with rq lock held * @p: the thread to be awakened * - * Put @p on the run-queue if it's not already there. The caller must + * Put @p on the run-queue if it's not already there. The caller must * ensure that this_rq() is locked, @p is bound to this_rq() and not - * the current task. + * the current task. this_rq() stays locked over invocation. */ static void try_to_wake_up_local(struct task_struct *p) { struct rq *rq = task_rq(p); + bool success = false; BUG_ON(rq != this_rq()); BUG_ON(p == current); lockdep_assert_held(&rq->lock); - if (!raw_spin_trylock(&p->pi_lock)) { - raw_spin_unlock(&rq->lock); - raw_spin_lock(&p->pi_lock); - raw_spin_lock(&rq->lock); - } - if (!(p->state & TASK_NORMAL)) - goto out; - - if (!p->on_rq) - ttwu_activate(rq, p, ENQUEUE_WAKEUP); + return; - ttwu_do_wakeup(rq, p, 0); - ttwu_stat(p, smp_processor_id(), 0); -out: - raw_spin_unlock(&p->pi_lock); + if (!p->se.on_rq) { + if (likely(!task_running(rq, p))) { + schedstat_inc(rq, ttwu_count); + schedstat_inc(rq, ttwu_local); + } + ttwu_activate(p, rq, false, false, true, ENQUEUE_WAKEUP); + success = true; + } + ttwu_post_activation(p, rq, 0, success); } /** @@ -2717,21 +2604,19 @@ int wake_up_state(struct task_struct *p, unsigned int state) */ static void __sched_fork(struct task_struct *p) { - p->on_rq = 0; - - p->se.on_rq = 0; p->se.exec_start = 0; p->se.sum_exec_runtime = 0; p->se.prev_sum_exec_runtime = 0; p->se.nr_migrations = 0; p->se.vruntime = 0; - INIT_LIST_HEAD(&p->se.group_node); #ifdef CONFIG_SCHEDSTATS memset(&p->se.statistics, 0, sizeof(p->se.statistics)); #endif INIT_LIST_HEAD(&p->rt.run_list); + p->se.on_rq = 0; + INIT_LIST_HEAD(&p->se.group_node); #ifdef CONFIG_PREEMPT_NOTIFIERS INIT_HLIST_HEAD(&p->preempt_notifiers); @@ -2741,9 +2626,8 @@ static void __sched_fork(struct task_struct *p) /* * fork()/clone()-time setup: */ -void sched_fork(struct task_struct *p) +void sched_fork(struct task_struct *p, int clone_flags) { - unsigned long flags; int cpu = get_cpu(); __sched_fork(p); @@ -2794,16 +2678,16 @@ void sched_fork(struct task_struct *p) * * Silence PROVE_RCU. */ - raw_spin_lock_irqsave(&p->pi_lock, flags); + rcu_read_lock(); set_task_cpu(p, cpu); - raw_spin_unlock_irqrestore(&p->pi_lock, flags); + rcu_read_unlock(); #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) if (likely(sched_info_on())) memset(&p->sched_info, 0, sizeof(p->sched_info)); #endif -#if defined(CONFIG_SMP) - p->on_cpu = 0; +#if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW) + p->oncpu = 0; #endif #ifdef CONFIG_PREEMPT /* Want to start with kernel preemption disabled. */ @@ -2823,31 +2707,41 @@ void sched_fork(struct task_struct *p) * that must be done for every newly created context, then puts the task * on the runqueue and wakes it. */ -void wake_up_new_task(struct task_struct *p) +void wake_up_new_task(struct task_struct *p, unsigned long clone_flags) { unsigned long flags; struct rq *rq; + int cpu __maybe_unused = get_cpu(); - raw_spin_lock_irqsave(&p->pi_lock, flags); #ifdef CONFIG_SMP + rq = task_rq_lock(p, &flags); + p->state = TASK_WAKING; + /* * Fork balancing, do it here and not earlier because: * - cpus_allowed can change in the fork path * - any previously selected cpu might disappear through hotplug + * + * We set TASK_WAKING so that select_task_rq() can drop rq->lock + * without people poking at ->cpus_allowed. */ - set_task_cpu(p, select_task_rq(p, SD_BALANCE_FORK, 0)); + cpu = select_task_rq(rq, p, SD_BALANCE_FORK, 0); + set_task_cpu(p, cpu); + + p->state = TASK_RUNNING; + task_rq_unlock(rq, &flags); #endif - rq = __task_rq_lock(p); + rq = task_rq_lock(p, &flags); activate_task(rq, p, 0); - p->on_rq = 1; - trace_sched_wakeup_new(p, true); + trace_sched_wakeup_new(p, 1); check_preempt_curr(rq, p, WF_FORK); #ifdef CONFIG_SMP if (p->sched_class->task_woken) p->sched_class->task_woken(rq, p); #endif - task_rq_unlock(rq, p, &flags); + task_rq_unlock(rq, &flags); + put_cpu(); } #ifdef CONFIG_PREEMPT_NOTIFIERS @@ -3556,22 +3450,27 @@ void sched_exec(void) { struct task_struct *p = current; unsigned long flags; + struct rq *rq; int dest_cpu; - raw_spin_lock_irqsave(&p->pi_lock, flags); - dest_cpu = p->sched_class->select_task_rq(p, SD_BALANCE_EXEC, 0); + rq = task_rq_lock(p, &flags); + dest_cpu = p->sched_class->select_task_rq(rq, p, SD_BALANCE_EXEC, 0); if (dest_cpu == smp_processor_id()) goto unlock; - if (likely(cpu_active(dest_cpu))) { + /* + * select_task_rq() can race against ->cpus_allowed + */ + if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed) && + likely(cpu_active(dest_cpu)) && migrate_task(p, rq)) { struct migration_arg arg = { p, dest_cpu }; - raw_spin_unlock_irqrestore(&p->pi_lock, flags); - stop_one_cpu(task_cpu(p), migration_cpu_stop, &arg); + task_rq_unlock(rq, &flags); + stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg); return; } unlock: - raw_spin_unlock_irqrestore(&p->pi_lock, flags); + task_rq_unlock(rq, &flags); } #endif @@ -3608,7 +3507,7 @@ unsigned long long task_delta_exec(struct task_struct *p) rq = task_rq_lock(p, &flags); ns = do_task_delta_exec(p, rq); - task_rq_unlock(rq, p, &flags); + task_rq_unlock(rq, &flags); return ns; } @@ -3626,7 +3525,7 @@ unsigned long long task_sched_runtime(struct task_struct *p) rq = task_rq_lock(p, &flags); ns = p->se.sum_exec_runtime + do_task_delta_exec(p, rq); - task_rq_unlock(rq, p, &flags); + task_rq_unlock(rq, &flags); return ns; } @@ -3650,7 +3549,7 @@ unsigned long long thread_group_sched_runtime(struct task_struct *p) rq = task_rq_lock(p, &flags); thread_group_cputime(p, &totals); ns = totals.sum_exec_runtime + do_task_delta_exec(p, rq); - task_rq_unlock(rq, p, &flags); + task_rq_unlock(rq, &flags); return ns; } @@ -4004,6 +3903,9 @@ void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st) /* * This function gets called by the timer code, with HZ frequency. * We call it with interrupts disabled. + * + * It also gets called by the fork code, when changing the parent's + * timeslices. */ void scheduler_tick(void) { @@ -4123,11 +4025,17 @@ static inline void schedule_debug(struct task_struct *prev) profile_hit(SCHED_PROFILING, __builtin_return_address(0)); schedstat_inc(this_rq(), sched_count); +#ifdef CONFIG_SCHEDSTATS + if (unlikely(prev->lock_depth >= 0)) { + schedstat_inc(this_rq(), rq_sched_info.bkl_count); + schedstat_inc(prev, sched_info.bkl_count); + } +#endif } static void put_prev_task(struct rq *rq, struct task_struct *prev) { - if (prev->on_rq || rq->skip_clock_update < 0) + if (prev->se.on_rq) update_rq_clock(rq); prev->sched_class->put_prev_task(rq, prev); } @@ -4189,13 +4097,11 @@ asmlinkage void __sched schedule(void) if (unlikely(signal_pending_state(prev->state, prev))) { prev->state = TASK_RUNNING; } else { - deactivate_task(rq, prev, DEQUEUE_SLEEP); - prev->on_rq = 0; - /* - * If a worker went to sleep, notify and ask workqueue - * whether it wants to wake up a task to maintain - * concurrency. + * If a worker is going to sleep, notify and + * ask workqueue whether it wants to wake up a + * task to maintain concurrency. If so, wake + * up the task. */ if (prev->flags & PF_WQ_WORKER) { struct task_struct *to_wakeup; @@ -4204,10 +4110,11 @@ asmlinkage void __sched schedule(void) if (to_wakeup) try_to_wake_up_local(to_wakeup); } + deactivate_task(rq, prev, DEQUEUE_SLEEP); /* - * If we are going to sleep and we have plugged IO - * queued, make sure to submit it to avoid deadlocks. + * If we are going to sleep and we have plugged IO queued, make + * sure to submit it to avoid deadlocks. */ if (blk_needs_flush_plug(prev)) { raw_spin_unlock(&rq->lock); @@ -4254,53 +4161,70 @@ asmlinkage void __sched schedule(void) EXPORT_SYMBOL(schedule); #ifdef CONFIG_MUTEX_SPIN_ON_OWNER - -static inline bool owner_running(struct mutex *lock, struct task_struct *owner) -{ - bool ret = false; - - rcu_read_lock(); - if (lock->owner != owner) - goto fail; - - /* - * Ensure we emit the owner->on_cpu, dereference _after_ checking - * lock->owner still matches owner, if that fails, owner might - * point to free()d memory, if it still matches, the rcu_read_lock() - * ensures the memory stays valid. - */ - barrier(); - - ret = owner->on_cpu; -fail: - rcu_read_unlock(); - - return ret; -} - /* * Look out! "owner" is an entirely speculative pointer * access and not reliable. */ -int mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner) +int mutex_spin_on_owner(struct mutex *lock, struct thread_info *owner) { + unsigned int cpu; + struct rq *rq; + if (!sched_feat(OWNER_SPIN)) return 0; - while (owner_running(lock, owner)) { - if (need_resched()) - return 0; +#ifdef CONFIG_DEBUG_PAGEALLOC + /* + * Need to access the cpu field knowing that + * DEBUG_PAGEALLOC could have unmapped it if + * the mutex owner just released it and exited. + */ + if (probe_kernel_address(&owner->cpu, cpu)) + return 0; +#else + cpu = owner->cpu; +#endif - arch_mutex_cpu_relax(); - } + /* + * Even if the access succeeded (likely case), + * the cpu field may no longer be valid. + */ + if (cpu >= nr_cpumask_bits) + return 0; /* - * If the owner changed to another task there is likely - * heavy contention, stop spinning. + * We need to validate that we can do a + * get_cpu() and that we have the percpu area. */ - if (lock->owner) + if (!cpu_online(cpu)) return 0; + rq = cpu_rq(cpu); + + for (;;) { + /* + * Owner changed, break to re-assess state. + */ + if (lock->owner != owner) { + /* + * If the lock has switched to a different owner, + * we likely have heavy contention. Return 0 to quit + * optimistic spinning and not contend further: + */ + if (lock->owner) + return 0; + break; + } + + /* + * Is that owner really running on that cpu? + */ + if (task_thread_info(rq->curr) != owner || need_resched()) + return 0; + + arch_mutex_cpu_relax(); + } + return 1; } #endif @@ -4760,18 +4684,19 @@ EXPORT_SYMBOL(sleep_on_timeout); */ void rt_mutex_setprio(struct task_struct *p, int prio) { + unsigned long flags; int oldprio, on_rq, running; struct rq *rq; const struct sched_class *prev_class; BUG_ON(prio < 0 || prio > MAX_PRIO); - rq = __task_rq_lock(p); + rq = task_rq_lock(p, &flags); trace_sched_pi_setprio(p, prio); oldprio = p->prio; prev_class = p->sched_class; - on_rq = p->on_rq; + on_rq = p->se.on_rq; running = task_current(rq, p); if (on_rq) dequeue_task(rq, p, 0); @@ -4791,7 +4716,7 @@ void rt_mutex_setprio(struct task_struct *p, int prio) enqueue_task(rq, p, oldprio < prio ? ENQUEUE_HEAD : 0); check_class_changed(rq, p, prev_class, oldprio); - __task_rq_unlock(rq); + task_rq_unlock(rq, &flags); } #endif @@ -4819,7 +4744,7 @@ void set_user_nice(struct task_struct *p, long nice) p->static_prio = NICE_TO_PRIO(nice); goto out_unlock; } - on_rq = p->on_rq; + on_rq = p->se.on_rq; if (on_rq) dequeue_task(rq, p, 0); @@ -4839,7 +4764,7 @@ void set_user_nice(struct task_struct *p, long nice) resched_task(rq->curr); } out_unlock: - task_rq_unlock(rq, p, &flags); + task_rq_unlock(rq, &flags); } EXPORT_SYMBOL(set_user_nice); @@ -4953,6 +4878,8 @@ static struct task_struct *find_process_by_pid(pid_t pid) static void __setscheduler(struct rq *rq, struct task_struct *p, int policy, int prio) { + BUG_ON(p->se.on_rq); + p->policy = policy; p->rt_priority = prio; p->normal_prio = normal_prio(p); @@ -5067,17 +4994,20 @@ static int __sched_setscheduler(struct task_struct *p, int policy, /* * make sure no PI-waiters arrive (or leave) while we are * changing the priority of the task: - * + */ + raw_spin_lock_irqsave(&p->pi_lock, flags); + /* * To be able to change p->policy safely, the appropriate * runqueue lock must be held. */ - rq = task_rq_lock(p, &flags); + rq = __task_rq_lock(p); /* * Changing the policy of the stop threads its a very bad idea */ if (p == rq->stop) { - task_rq_unlock(rq, p, &flags); + __task_rq_unlock(rq); + raw_spin_unlock_irqrestore(&p->pi_lock, flags); return -EINVAL; } @@ -5101,7 +5031,8 @@ static int __sched_setscheduler(struct task_struct *p, int policy, if (rt_bandwidth_enabled() && rt_policy(policy) && task_group(p)->rt_bandwidth.rt_runtime == 0 && !task_group_is_autogroup(task_group(p))) { - task_rq_unlock(rq, p, &flags); + __task_rq_unlock(rq); + raw_spin_unlock_irqrestore(&p->pi_lock, flags); return -EPERM; } } @@ -5110,10 +5041,11 @@ static int __sched_setscheduler(struct task_struct *p, int policy, /* recheck policy now with rq lock held */ if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) { policy = oldpolicy = -1; - task_rq_unlock(rq, p, &flags); + __task_rq_unlock(rq); + raw_spin_unlock_irqrestore(&p->pi_lock, flags); goto recheck; } - on_rq = p->on_rq; + on_rq = p->se.on_rq; running = task_current(rq, p); if (on_rq) deactivate_task(rq, p, 0); @@ -5132,7 +5064,8 @@ static int __sched_setscheduler(struct task_struct *p, int policy, activate_task(rq, p, 0); check_class_changed(rq, p, prev_class, oldprio); - task_rq_unlock(rq, p, &flags); + __task_rq_unlock(rq); + raw_spin_unlock_irqrestore(&p->pi_lock, flags); rt_mutex_adjust_pi(p); @@ -5383,6 +5316,7 @@ long sched_getaffinity(pid_t pid, struct cpumask *mask) { struct task_struct *p; unsigned long flags; + struct rq *rq; int retval; get_online_cpus(); @@ -5397,9 +5331,9 @@ long sched_getaffinity(pid_t pid, struct cpumask *mask) if (retval) goto out_unlock; - raw_spin_lock_irqsave(&p->pi_lock, flags); + rq = task_rq_lock(p, &flags); cpumask_and(mask, &p->cpus_allowed, cpu_online_mask); - raw_spin_unlock_irqrestore(&p->pi_lock, flags); + task_rq_unlock(rq, &flags); out_unlock: rcu_read_unlock(); @@ -5724,7 +5658,7 @@ SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid, rq = task_rq_lock(p, &flags); time_slice = p->sched_class->get_rr_interval(rq, p); - task_rq_unlock(rq, p, &flags); + task_rq_unlock(rq, &flags); rcu_read_unlock(); jiffies_to_timespec(time_slice, &t); @@ -5842,14 +5776,17 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu) rcu_read_unlock(); rq->curr = rq->idle = idle; -#if defined(CONFIG_SMP) - idle->on_cpu = 1; +#if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW) + idle->oncpu = 1; #endif raw_spin_unlock_irqrestore(&rq->lock, flags); /* Set the preempt count _outside_ the spinlocks! */ +#if defined(CONFIG_PREEMPT) + task_thread_info(idle)->preempt_count = (idle->lock_depth >= 0); +#else task_thread_info(idle)->preempt_count = 0; - +#endif /* * The idle tasks have their own, simple scheduling class: */ @@ -5944,17 +5881,26 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) unsigned int dest_cpu; int ret = 0; + /* + * Serialize against TASK_WAKING so that ttwu() and wunt() can + * drop the rq->lock and still rely on ->cpus_allowed. + */ +again: + while (task_is_waking(p)) + cpu_relax(); rq = task_rq_lock(p, &flags); - - if (cpumask_equal(&p->cpus_allowed, new_mask)) - goto out; + if (task_is_waking(p)) { + task_rq_unlock(rq, &flags); + goto again; + } if (!cpumask_intersects(new_mask, cpu_active_mask)) { ret = -EINVAL; goto out; } - if (unlikely((p->flags & PF_THREAD_BOUND) && p != current)) { + if (unlikely((p->flags & PF_THREAD_BOUND) && p != current && + !cpumask_equal(&p->cpus_allowed, new_mask))) { ret = -EINVAL; goto out; } @@ -5971,16 +5917,16 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) goto out; dest_cpu = cpumask_any_and(cpu_active_mask, new_mask); - if (p->on_rq) { + if (migrate_task(p, rq)) { struct migration_arg arg = { p, dest_cpu }; /* Need help from migration thread: drop lock and wait. */ - task_rq_unlock(rq, p, &flags); + task_rq_unlock(rq, &flags); stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg); tlb_migrate_finish(p->mm); return 0; } out: - task_rq_unlock(rq, p, &flags); + task_rq_unlock(rq, &flags); return ret; } @@ -6008,7 +5954,6 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu) rq_src = cpu_rq(src_cpu); rq_dest = cpu_rq(dest_cpu); - raw_spin_lock(&p->pi_lock); double_rq_lock(rq_src, rq_dest); /* Already moved. */ if (task_cpu(p) != src_cpu) @@ -6021,7 +5966,7 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu) * If we're not on a rq, the next wake-up will ensure we're * placed properly. */ - if (p->on_rq) { + if (p->se.on_rq) { deactivate_task(rq_src, p, 0); set_task_cpu(p, dest_cpu); activate_task(rq_dest, p, 0); @@ -6031,7 +5976,6 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu) ret = 1; fail: double_rq_unlock(rq_src, rq_dest); - raw_spin_unlock(&p->pi_lock); return ret; } @@ -6372,7 +6316,6 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) #ifdef CONFIG_HOTPLUG_CPU case CPU_DYING: - sched_ttwu_pending(); /* Update our root-domain */ raw_spin_lock_irqsave(&rq->lock, flags); if (rq->rd) { @@ -6451,8 +6394,6 @@ early_initcall(migration_init); #ifdef CONFIG_SMP -static cpumask_var_t sched_domains_tmpmask; /* sched_domains_mutex */ - #ifdef CONFIG_SCHED_DEBUG static __read_mostly int sched_domain_debug_enabled; @@ -6548,6 +6489,7 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, static void sched_domain_debug(struct sched_domain *sd, int cpu) { + cpumask_var_t groupmask; int level = 0; if (!sched_domain_debug_enabled) @@ -6560,14 +6502,20 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu) printk(KERN_DEBUG "CPU%d attaching sched-domain:\n", cpu); + if (!alloc_cpumask_var(&groupmask, GFP_KERNEL)) { + printk(KERN_DEBUG "Cannot load-balance (out of memory)\n"); + return; + } + for (;;) { - if (sched_domain_debug_one(sd, cpu, level, sched_domains_tmpmask)) + if (sched_domain_debug_one(sd, cpu, level, groupmask)) break; level++; sd = sd->parent; if (!sd) break; } + free_cpumask_var(groupmask); } #else /* !CONFIG_SCHED_DEBUG */ # define sched_domain_debug(sd, cpu) do { } while (0) @@ -6624,11 +6572,12 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent) return 1; } -static void free_rootdomain(struct rcu_head *rcu) +static void free_rootdomain(struct root_domain *rd) { - struct root_domain *rd = container_of(rcu, struct root_domain, rcu); + synchronize_sched(); cpupri_cleanup(&rd->cpupri); + free_cpumask_var(rd->rto_mask); free_cpumask_var(rd->online); free_cpumask_var(rd->span); @@ -6669,7 +6618,7 @@ static void rq_attach_root(struct rq *rq, struct root_domain *rd) raw_spin_unlock_irqrestore(&rq->lock, flags); if (old_rd) - call_rcu_sched(&old_rd->rcu, free_rootdomain); + free_rootdomain(old_rd); } static int init_rootdomain(struct root_domain *rd) @@ -6720,25 +6669,6 @@ static struct root_domain *alloc_rootdomain(void) return rd; } -static void free_sched_domain(struct rcu_head *rcu) -{ - struct sched_domain *sd = container_of(rcu, struct sched_domain, rcu); - if (atomic_dec_and_test(&sd->groups->ref)) - kfree(sd->groups); - kfree(sd); -} - -static void destroy_sched_domain(struct sched_domain *sd, int cpu) -{ - call_rcu(&sd->rcu, free_sched_domain); -} - -static void destroy_sched_domains(struct sched_domain *sd, int cpu) -{ - for (; sd; sd = sd->parent) - destroy_sched_domain(sd, cpu); -} - /* * Attach the domain 'sd' to 'cpu' as its base domain. Callers must * hold the hotplug lock. @@ -6749,6 +6679,9 @@ cpu_attach_domain(struct sched_domain *sd, struct root_domain *rd, int cpu) struct rq *rq = cpu_rq(cpu); struct sched_domain *tmp; + for (tmp = sd; tmp; tmp = tmp->parent) + tmp->span_weight = cpumask_weight(sched_domain_span(tmp)); + /* Remove the sched domains which do not contribute to scheduling. */ for (tmp = sd; tmp; ) { struct sched_domain *parent = tmp->parent; @@ -6759,15 +6692,12 @@ cpu_attach_domain(struct sched_domain *sd, struct root_domain *rd, int cpu) tmp->parent = parent->parent; if (parent->parent) parent->parent->child = tmp; - destroy_sched_domain(parent, cpu); } else tmp = tmp->parent; } if (sd && sd_degenerate(sd)) { - tmp = sd; sd = sd->parent; - destroy_sched_domain(tmp, cpu); if (sd) sd->child = NULL; } @@ -6775,9 +6705,7 @@ cpu_attach_domain(struct sched_domain *sd, struct root_domain *rd, int cpu) sched_domain_debug(sd, cpu); rq_attach_root(rq, rd); - tmp = rq->sd; rcu_assign_pointer(rq->sd, sd); - destroy_sched_domains(tmp, cpu); } /* cpus with isolated domains */ @@ -6793,6 +6721,56 @@ static int __init isolated_cpu_setup(char *str) __setup("isolcpus=", isolated_cpu_setup); +/* + * init_sched_build_groups takes the cpumask we wish to span, and a pointer + * to a function which identifies what group(along with sched group) a CPU + * belongs to. The return value of group_fn must be a >= 0 and < nr_cpu_ids + * (due to the fact that we keep track of groups covered with a struct cpumask). + * + * init_sched_build_groups will build a circular linked list of the groups + * covered by the given span, and will set each group's ->cpumask correctly, + * and ->cpu_power to 0. + */ +static void +init_sched_build_groups(const struct cpumask *span, + const struct cpumask *cpu_map, + int (*group_fn)(int cpu, const struct cpumask *cpu_map, + struct sched_group **sg, + struct cpumask *tmpmask), + struct cpumask *covered, struct cpumask *tmpmask) +{ + struct sched_group *first = NULL, *last = NULL; + int i; + + cpumask_clear(covered); + + for_each_cpu(i, span) { + struct sched_group *sg; + int group = group_fn(i, cpu_map, &sg, tmpmask); + int j; + + if (cpumask_test_cpu(i, covered)) + continue; + + cpumask_clear(sched_group_cpus(sg)); + sg->cpu_power = 0; + + for_each_cpu(j, span) { + if (group_fn(j, cpu_map, NULL, tmpmask) != group) + continue; + + cpumask_set_cpu(j, covered); + cpumask_set_cpu(j, sched_group_cpus(sg)); + } + if (!first) + first = sg; + if (last) + last->next = sg; + last = sg; + } + last->next = first; +} + #define SD_NODES_PER_DOMAIN 16 #ifdef CONFIG_NUMA @@ -6809,7 +6787,7 @@ __setup("isolcpus=", isolated_cpu_setup); */ static int find_next_best_node(int node, nodemask_t *used_nodes) { - int i, n, val, min_val, best_node = -1; + int i, n, val, min_val, best_node = 0; min_val = INT_MAX; @@ -6833,8 +6811,7 @@ static int find_next_best_node(int node, nodemask_t *used_nodes) } } - if (best_node != -1) - node_set(best_node, *used_nodes); + node_set(best_node, *used_nodes); return best_node; } @@ -6860,130 +6837,315 @@ static void sched_domain_node_span(int node, struct cpumask *span) for (i = 1; i < SD_NODES_PER_DOMAIN; i++) { int next_node = find_next_best_node(node, &used_nodes); - if (next_node < 0) - break; + cpumask_or(span, span, cpumask_of_node(next_node)); } } - -static const struct cpumask *cpu_node_mask(int cpu) -{ - lockdep_assert_held(&sched_domains_mutex); - - sched_domain_node_span(cpu_to_node(cpu), sched_domains_tmpmask); - - return sched_domains_tmpmask; -} - -static const struct cpumask *cpu_allnodes_mask(int cpu) -{ - return cpu_possible_mask; -} #endif /* CONFIG_NUMA */ -static const struct cpumask *cpu_cpu_mask(int cpu) -{ - return cpumask_of_node(cpu_to_node(cpu)); -} - int sched_smt_power_savings = 0, sched_mc_power_savings = 0; -struct sd_data { - struct sched_domain **__percpu sd; - struct sched_group **__percpu sg; +/* + * The cpus mask in sched_group and sched_domain hangs off the end. + * + * ( See the the comments in include/linux/sched.h:struct sched_group + * and struct sched_domain. ) + */ +struct static_sched_group { + struct sched_group sg; + DECLARE_BITMAP(cpus, CONFIG_NR_CPUS); +}; + +struct static_sched_domain { + struct sched_domain sd; + DECLARE_BITMAP(span, CONFIG_NR_CPUS); }; struct s_data { - struct sched_domain ** __percpu sd; +#ifdef CONFIG_NUMA + int sd_allnodes; + cpumask_var_t domainspan; + cpumask_var_t covered; + cpumask_var_t notcovered; +#endif + cpumask_var_t nodemask; + cpumask_var_t this_sibling_map; + cpumask_var_t this_core_map; + cpumask_var_t this_book_map; + cpumask_var_t send_covered; + cpumask_var_t tmpmask; + struct sched_group **sched_group_nodes; struct root_domain *rd; }; enum s_alloc { + sa_sched_groups = 0, sa_rootdomain, - sa_sd, - sa_sd_storage, + sa_tmpmask, + sa_send_covered, + sa_this_book_map, + sa_this_core_map, + sa_this_sibling_map, + sa_nodemask, + sa_sched_group_nodes, +#ifdef CONFIG_NUMA + sa_notcovered, + sa_covered, + sa_domainspan, +#endif sa_none, }; -struct sched_domain_topology_level; - -typedef struct sched_domain *(*sched_domain_init_f)(struct sched_domain_topology_level *tl, int cpu); -typedef const struct cpumask *(*sched_domain_mask_f)(int cpu); +/* + * SMT sched-domains: + */ +#ifdef CONFIG_SCHED_SMT +static DEFINE_PER_CPU(struct static_sched_domain, cpu_domains); +static DEFINE_PER_CPU(struct static_sched_group, sched_groups); -struct sched_domain_topology_level { - sched_domain_init_f init; - sched_domain_mask_f mask; - struct sd_data data; -}; +static int +cpu_to_cpu_group(int cpu, const struct cpumask *cpu_map, + struct sched_group **sg, struct cpumask *unused) +{ + if (sg) + *sg = &per_cpu(sched_groups, cpu).sg; + return cpu; +} +#endif /* CONFIG_SCHED_SMT */ /* - * Assumes the sched_domain tree is fully constructed + * multi-core sched-domains: */ -static int get_group(int cpu, struct sd_data *sdd, struct sched_group **sg) +#ifdef CONFIG_SCHED_MC +static DEFINE_PER_CPU(struct static_sched_domain, core_domains); +static DEFINE_PER_CPU(struct static_sched_group, sched_group_core); + +static int +cpu_to_core_group(int cpu, const struct cpumask *cpu_map, + struct sched_group **sg, struct cpumask *mask) { - struct sched_domain *sd = *per_cpu_ptr(sdd->sd, cpu); - struct sched_domain *child = sd->child; + int group; +#ifdef CONFIG_SCHED_SMT + cpumask_and(mask, topology_thread_cpumask(cpu), cpu_map); + group = cpumask_first(mask); +#else + group = cpu; +#endif + if (sg) + *sg = &per_cpu(sched_group_core, group).sg; + return group; +} +#endif /* CONFIG_SCHED_MC */ - if (child) - cpu = cpumask_first(sched_domain_span(child)); +/* + * book sched-domains: + */ +#ifdef CONFIG_SCHED_BOOK +static DEFINE_PER_CPU(struct static_sched_domain, book_domains); +static DEFINE_PER_CPU(struct static_sched_group, sched_group_book); +static int +cpu_to_book_group(int cpu, const struct cpumask *cpu_map, + struct sched_group **sg, struct cpumask *mask) +{ + int group = cpu; +#ifdef CONFIG_SCHED_MC + cpumask_and(mask, cpu_coregroup_mask(cpu), cpu_map); + group = cpumask_first(mask); +#elif defined(CONFIG_SCHED_SMT) + cpumask_and(mask, topology_thread_cpumask(cpu), cpu_map); + group = cpumask_first(mask); +#endif if (sg) - *sg = *per_cpu_ptr(sdd->sg, cpu); + *sg = &per_cpu(sched_group_book, group).sg; + return group; +} +#endif /* CONFIG_SCHED_BOOK */ - return cpu; +static DEFINE_PER_CPU(struct static_sched_domain, phys_domains); +static DEFINE_PER_CPU(struct static_sched_group, sched_group_phys); + +static int +cpu_to_phys_group(int cpu, const struct cpumask *cpu_map, + struct sched_group **sg, struct cpumask *mask) +{ + int group; +#ifdef CONFIG_SCHED_BOOK + cpumask_and(mask, cpu_book_mask(cpu), cpu_map); + group = cpumask_first(mask); +#elif defined(CONFIG_SCHED_MC) + cpumask_and(mask, cpu_coregroup_mask(cpu), cpu_map); + group = cpumask_first(mask); +#elif defined(CONFIG_SCHED_SMT) + cpumask_and(mask, topology_thread_cpumask(cpu), cpu_map); + group = cpumask_first(mask); +#else + group = cpu; +#endif + if (sg) + *sg = &per_cpu(sched_group_phys, group).sg; + return group; } +#ifdef CONFIG_NUMA /* - * build_sched_groups takes the cpumask we wish to span, and a pointer - * to a function which identifies what group(along with sched group) a CPU - * belongs to. The return value of group_fn must be a >= 0 and < nr_cpu_ids - * (due to the fact that we keep track of groups covered with a struct cpumask). - * - * build_sched_groups will build a circular linked list of the groups - * covered by the given span, and will set each group's ->cpumask correctly, - * and ->cpu_power to 0. + * The init_sched_build_groups can't handle what we want to do with node + * groups, so roll our own. Now each node has its own list of groups which + * gets dynamically allocated. */ -static void -build_sched_groups(struct sched_domain *sd) +static DEFINE_PER_CPU(struct static_sched_domain, node_domains); +static struct sched_group ***sched_group_nodes_bycpu; + +static DEFINE_PER_CPU(struct static_sched_domain, allnodes_domains); +static DEFINE_PER_CPU(struct static_sched_group, sched_group_allnodes); + +static int cpu_to_allnodes_group(int cpu, const struct cpumask *cpu_map, + struct sched_group **sg, + struct cpumask *nodemask) { - struct sched_group *first = NULL, *last = NULL; - struct sd_data *sdd = sd->private; - const struct cpumask *span = sched_domain_span(sd); - struct cpumask *covered; - int i; + int group; - lockdep_assert_held(&sched_domains_mutex); - covered = sched_domains_tmpmask; + cpumask_and(nodemask, cpumask_of_node(cpu_to_node(cpu)), cpu_map); + group = cpumask_first(nodemask); - cpumask_clear(covered); + if (sg) + *sg = &per_cpu(sched_group_allnodes, group).sg; + return group; +} - for_each_cpu(i, span) { - struct sched_group *sg; - int group = get_group(i, sdd, &sg); - int j; +static void init_numa_sched_groups_power(struct sched_group *group_head) +{ + struct sched_group *sg = group_head; + int j; - if (cpumask_test_cpu(i, covered)) - continue; + if (!sg) + return; + do { + for_each_cpu(j, sched_group_cpus(sg)) { + struct sched_domain *sd; - cpumask_clear(sched_group_cpus(sg)); + sd = &per_cpu(phys_domains, j).sd; + if (j != group_first_cpu(sd->groups)) { + /* + * Only add "power" once for each + * physical package. + */ + continue; + } + + sg->cpu_power += sd->groups->cpu_power; + } + sg = sg->next; + } while (sg != group_head); +} + +static int build_numa_sched_groups(struct s_data *d, + const struct cpumask *cpu_map, int num) +{ + struct sched_domain *sd; + struct sched_group *sg, *prev; + int n, j; + + cpumask_clear(d->covered); + cpumask_and(d->nodemask, cpumask_of_node(num), cpu_map); + if (cpumask_empty(d->nodemask)) { + d->sched_group_nodes[num] = NULL; + goto out; + } + + sched_domain_node_span(num, d->domainspan); + cpumask_and(d->domainspan, d->domainspan, cpu_map); + + sg = kmalloc_node(sizeof(struct sched_group) + cpumask_size(), + GFP_KERNEL, num); + if (!sg) { + printk(KERN_WARNING "Can not alloc domain group for node %d\n", + num); + return -ENOMEM; + } + d->sched_group_nodes[num] = sg; + + for_each_cpu(j, d->nodemask) { + sd = &per_cpu(node_domains, j).sd; + sd->groups = sg; + } + + sg->cpu_power = 0; + cpumask_copy(sched_group_cpus(sg), d->nodemask); + sg->next = sg; + cpumask_or(d->covered, d->covered, d->nodemask); + + prev = sg; + for (j = 0; j < nr_node_ids; j++) { + n = (num + j) % nr_node_ids; + cpumask_complement(d->notcovered, d->covered); + cpumask_and(d->tmpmask, d->notcovered, cpu_map); + cpumask_and(d->tmpmask, d->tmpmask, d->domainspan); + if (cpumask_empty(d->tmpmask)) + break; + cpumask_and(d->tmpmask, d->tmpmask, cpumask_of_node(n)); + if (cpumask_empty(d->tmpmask)) + continue; + sg = kmalloc_node(sizeof(struct sched_group) + cpumask_size(), + GFP_KERNEL, num); + if (!sg) { + printk(KERN_WARNING + "Can not alloc domain group for node %d\n", j); + return -ENOMEM; + } sg->cpu_power = 0; + cpumask_copy(sched_group_cpus(sg), d->tmpmask); + sg->next = prev->next; + cpumask_or(d->covered, d->covered, d->tmpmask); + prev->next = sg; + prev = sg; + } +out: + return 0; +} +#endif /* CONFIG_NUMA */ - for_each_cpu(j, span) { - if (get_group(j, sdd, NULL) != group) +#ifdef CONFIG_NUMA +/* Free memory allocated for various sched_group structures */ +static void free_sched_groups(const struct cpumask *cpu_map, + struct cpumask *nodemask) +{ + int cpu, i; + + for_each_cpu(cpu, cpu_map) { + struct sched_group **sched_group_nodes + = sched_group_nodes_bycpu[cpu]; + + if (!sched_group_nodes) + continue; + + for (i = 0; i < nr_node_ids; i++) { + struct sched_group *oldsg, *sg = sched_group_nodes[i]; + + cpumask_and(nodemask, cpumask_of_node(i), cpu_map); + if (cpumask_empty(nodemask)) continue; - cpumask_set_cpu(j, covered); - cpumask_set_cpu(j, sched_group_cpus(sg)); + if (sg == NULL) + continue; + sg = sg->next; +next_sg: + oldsg = sg; + sg = sg->next; + kfree(oldsg); + if (oldsg != sched_group_nodes[i]) + goto next_sg; } - - if (!first) - first = sg; - if (last) - last->next = sg; - last = sg; + kfree(sched_group_nodes); + sched_group_nodes_bycpu[cpu] = NULL; } - last->next = first; } +#else /* !CONFIG_NUMA */ +static void free_sched_groups(const struct cpumask *cpu_map, + struct cpumask *nodemask) +{ +} +#endif /* CONFIG_NUMA */ /* * Initialize sched groups cpu_power. @@ -6997,6 +7159,11 @@ build_sched_groups(struct sched_domain *sd) */ static void init_sched_groups_power(int cpu, struct sched_domain *sd) { + struct sched_domain *child; + struct sched_group *group; + long power; + int weight; + WARN_ON(!sd || !sd->groups); if (cpu != group_first_cpu(sd->groups)) @@ -7004,7 +7171,36 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd) sd->groups->group_weight = cpumask_weight(sched_group_cpus(sd->groups)); - update_group_power(sd, cpu); + child = sd->child; + + sd->groups->cpu_power = 0; + + if (!child) { + power = SCHED_LOAD_SCALE; + weight = cpumask_weight(sched_domain_span(sd)); + /* + * SMT siblings share the power of a single core. + * Usually multiple threads get a better yield out of + * that one core than a single thread would have, + * reflect that in sd->smt_gain. + */ + if ((sd->flags & SD_SHARE_CPUPOWER) && weight > 1) { + power *= sd->smt_gain; + power /= weight; + power >>= SCHED_LOAD_SHIFT; + } + sd->groups->cpu_power += power; + return; + } + + /* + * Add cpu_power of each child group to this groups cpu_power. + */ + group = child->groups; + do { + sd->groups->cpu_power += group->cpu_power; + group = group->next; + } while (group != child->groups); } /* @@ -7018,15 +7214,15 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd) # define SD_INIT_NAME(sd, type) do { } while (0) #endif -#define SD_INIT_FUNC(type) \ -static noinline struct sched_domain * \ -sd_init_##type(struct sched_domain_topology_level *tl, int cpu) \ -{ \ - struct sched_domain *sd = *per_cpu_ptr(tl->data.sd, cpu); \ - *sd = SD_##type##_INIT; \ - SD_INIT_NAME(sd, type); \ - sd->private = &tl->data; \ - return sd; \ +#define SD_INIT(sd, type) sd_init_##type(sd) + +#define SD_INIT_FUNC(type) \ +static noinline void sd_init_##type(struct sched_domain *sd) \ +{ \ + memset(sd, 0, sizeof(*sd)); \ + *sd = SD_##type##_INIT; \ + sd->level = SD_LV_##type; \ + SD_INIT_NAME(sd, type); \ } SD_INIT_FUNC(CPU) @@ -7045,14 +7241,13 @@ SD_INIT_FUNC(CPU) #endif static int default_relax_domain_level = -1; -int sched_domain_level_max; static int __init setup_relax_domain_level(char *str) { unsigned long val; val = simple_strtoul(str, NULL, 0); - if (val < sched_domain_level_max) + if (val < SD_LV_MAX) default_relax_domain_level = val; return 1; @@ -7080,20 +7275,37 @@ static void set_domain_attribute(struct sched_domain *sd, } } -static void __sdt_free(const struct cpumask *cpu_map); -static int __sdt_alloc(const struct cpumask *cpu_map); - static void __free_domain_allocs(struct s_data *d, enum s_alloc what, const struct cpumask *cpu_map) { switch (what) { + case sa_sched_groups: + free_sched_groups(cpu_map, d->tmpmask); /* fall through */ + d->sched_group_nodes = NULL; case sa_rootdomain: - if (!atomic_read(&d->rd->refcount)) - free_rootdomain(&d->rd->rcu); /* fall through */ - case sa_sd: - free_percpu(d->sd); /* fall through */ - case sa_sd_storage: - __sdt_free(cpu_map); /* fall through */ + free_rootdomain(d->rd); /* fall through */ + case sa_tmpmask: + free_cpumask_var(d->tmpmask); /* fall through */ + case sa_send_covered: + free_cpumask_var(d->send_covered); /* fall through */ + case sa_this_book_map: + free_cpumask_var(d->this_book_map); /* fall through */ + case sa_this_core_map: + free_cpumask_var(d->this_core_map); /* fall through */ + case sa_this_sibling_map: + free_cpumask_var(d->this_sibling_map); /* fall through */ + case sa_nodemask: + free_cpumask_var(d->nodemask); /* fall through */ + case sa_sched_group_nodes: +#ifdef CONFIG_NUMA + kfree(d->sched_group_nodes); /* fall through */ + case sa_notcovered: + free_cpumask_var(d->notcovered); /* fall through */ + case sa_covered: + free_cpumask_var(d->covered); /* fall through */ + case sa_domainspan: + free_cpumask_var(d->domainspan); /* fall through */ +#endif case sa_none: break; } @@ -7102,212 +7314,308 @@ static void __free_domain_allocs(struct s_data *d, enum s_alloc what, static enum s_alloc __visit_domain_allocation_hell(struct s_data *d, const struct cpumask *cpu_map) { - memset(d, 0, sizeof(*d)); - - if (__sdt_alloc(cpu_map)) - return sa_sd_storage; - d->sd = alloc_percpu(struct sched_domain *); - if (!d->sd) - return sa_sd_storage; +#ifdef CONFIG_NUMA + if (!alloc_cpumask_var(&d->domainspan, GFP_KERNEL)) + return sa_none; + if (!alloc_cpumask_var(&d->covered, GFP_KERNEL)) + return sa_domainspan; + if (!alloc_cpumask_var(&d->notcovered, GFP_KERNEL)) + return sa_covered; + /* Allocate the per-node list of sched groups */ + d->sched_group_nodes = kcalloc(nr_node_ids, + sizeof(struct sched_group *), GFP_KERNEL); + if (!d->sched_group_nodes) { + printk(KERN_WARNING "Can not alloc sched group node list\n"); + return sa_notcovered; + } + sched_group_nodes_bycpu[cpumask_first(cpu_map)] = d->sched_group_nodes; +#endif + if (!alloc_cpumask_var(&d->nodemask, GFP_KERNEL)) + return sa_sched_group_nodes; + if (!alloc_cpumask_var(&d->this_sibling_map, GFP_KERNEL)) + return sa_nodemask; + if (!alloc_cpumask_var(&d->this_core_map, GFP_KERNEL)) + return sa_this_sibling_map; + if (!alloc_cpumask_var(&d->this_book_map, GFP_KERNEL)) + return sa_this_core_map; + if (!alloc_cpumask_var(&d->send_covered, GFP_KERNEL)) + return sa_this_book_map; + if (!alloc_cpumask_var(&d->tmpmask, GFP_KERNEL)) + return sa_send_covered; d->rd = alloc_rootdomain(); - if (!d->rd) - return sa_sd; + if (!d->rd) { + printk(KERN_WARNING "Cannot alloc root domain\n"); + return sa_tmpmask; + } return sa_rootdomain; } -/* - * NULL the sd_data elements we've used to build the sched_domain and - * sched_group structure so that the subsequent __free_domain_allocs() - * will not free the data we're using. - */ -static void claim_allocations(int cpu, struct sched_domain *sd) +static struct sched_domain *__build_numa_sched_domains(struct s_data *d, + const struct cpumask *cpu_map, struct sched_domain_attr *attr, int i) { - struct sd_data *sdd = sd->private; - struct sched_group *sg = sd->groups; + struct sched_domain *sd = NULL; +#ifdef CONFIG_NUMA + struct sched_domain *parent; + + d->sd_allnodes = 0; + if (cpumask_weight(cpu_map) > + SD_NODES_PER_DOMAIN * cpumask_weight(d->nodemask)) { + sd = &per_cpu(allnodes_domains, i).sd; + SD_INIT(sd, ALLNODES); + set_domain_attribute(sd, attr); + cpumask_copy(sched_domain_span(sd), cpu_map); + cpu_to_allnodes_group(i, cpu_map, &sd->groups, d->tmpmask); + d->sd_allnodes = 1; + } + parent = sd; + + sd = &per_cpu(node_domains, i).sd; + SD_INIT(sd, NODE); + set_domain_attribute(sd, attr); + sched_domain_node_span(cpu_to_node(i), sched_domain_span(sd)); + sd->parent = parent; + if (parent) + parent->child = sd; + cpumask_and(sched_domain_span(sd), sched_domain_span(sd), cpu_map); +#endif + return sd; +} - WARN_ON_ONCE(*per_cpu_ptr(sdd->sd, cpu) != sd); - *per_cpu_ptr(sdd->sd, cpu) = NULL; +static struct sched_domain *__build_cpu_sched_domain(struct s_data *d, + const struct cpumask *cpu_map, struct sched_domain_attr *attr, + struct sched_domain *parent, int i) +{ + struct sched_domain *sd; + sd = &per_cpu(phys_domains, i).sd; + SD_INIT(sd, CPU); + set_domain_attribute(sd, attr); + cpumask_copy(sched_domain_span(sd), d->nodemask); + sd->parent = parent; + if (parent) + parent->child = sd; + cpu_to_phys_group(i, cpu_map, &sd->groups, d->tmpmask); + return sd; +} - if (cpu == cpumask_first(sched_group_cpus(sg))) { - WARN_ON_ONCE(*per_cpu_ptr(sdd->sg, cpu) != sg); - *per_cpu_ptr(sdd->sg, cpu) = NULL; - } +static struct sched_domain *__build_book_sched_domain(struct s_data *d, + const struct cpumask *cpu_map, struct sched_domain_attr *attr, + struct sched_domain *parent, int i) +{ + struct sched_domain *sd = parent; +#ifdef CONFIG_SCHED_BOOK + sd = &per_cpu(book_domains, i).sd; + SD_INIT(sd, BOOK); + set_domain_attribute(sd, attr); + cpumask_and(sched_domain_span(sd), cpu_map, cpu_book_mask(i)); + sd->parent = parent; + parent->child = sd; + cpu_to_book_group(i, cpu_map, &sd->groups, d->tmpmask); +#endif + return sd; } -#ifdef CONFIG_SCHED_SMT -static const struct cpumask *cpu_smt_mask(int cpu) +static struct sched_domain *__build_mc_sched_domain(struct s_data *d, + const struct cpumask *cpu_map, struct sched_domain_attr *attr, + struct sched_domain *parent, int i) { - return topology_thread_cpumask(cpu); + struct sched_domain *sd = parent; +#ifdef CONFIG_SCHED_MC + sd = &per_cpu(core_domains, i).sd; + SD_INIT(sd, MC); + set_domain_attribute(sd, attr); + cpumask_and(sched_domain_span(sd), cpu_map, cpu_coregroup_mask(i)); + sd->parent = parent; + parent->child = sd; + cpu_to_core_group(i, cpu_map, &sd->groups, d->tmpmask); +#endif + return sd; } + +static struct sched_domain *__build_smt_sched_domain(struct s_data *d, + const struct cpumask *cpu_map, struct sched_domain_attr *attr, + struct sched_domain *parent, int i) +{ + struct sched_domain *sd = parent; +#ifdef CONFIG_SCHED_SMT + sd = &per_cpu(cpu_domains, i).sd; + SD_INIT(sd, SIBLING); + set_domain_attribute(sd, attr); + cpumask_and(sched_domain_span(sd), cpu_map, topology_thread_cpumask(i)); + sd->parent = parent; + parent->child = sd; + cpu_to_cpu_group(i, cpu_map, &sd->groups, d->tmpmask); #endif + return sd; +} -/* - * Topology list, bottom-up. - */ -static struct sched_domain_topology_level default_topology[] = { +static void build_sched_groups(struct s_data *d, enum sched_domain_level l, + const struct cpumask *cpu_map, int cpu) +{ + switch (l) { #ifdef CONFIG_SCHED_SMT - { sd_init_SIBLING, cpu_smt_mask, }, + case SD_LV_SIBLING: /* set up CPU (sibling) groups */ + cpumask_and(d->this_sibling_map, cpu_map, + topology_thread_cpumask(cpu)); + if (cpu == cpumask_first(d->this_sibling_map)) + init_sched_build_groups(d->this_sibling_map, cpu_map, + &cpu_to_cpu_group, + d->send_covered, d->tmpmask); + break; #endif #ifdef CONFIG_SCHED_MC - { sd_init_MC, cpu_coregroup_mask, }, + case SD_LV_MC: /* set up multi-core groups */ + cpumask_and(d->this_core_map, cpu_map, cpu_coregroup_mask(cpu)); + if (cpu == cpumask_first(d->this_core_map)) + init_sched_build_groups(d->this_core_map, cpu_map, + &cpu_to_core_group, + d->send_covered, d->tmpmask); + break; #endif #ifdef CONFIG_SCHED_BOOK - { sd_init_BOOK, cpu_book_mask, }, + case SD_LV_BOOK: /* set up book groups */ + cpumask_and(d->this_book_map, cpu_map, cpu_book_mask(cpu)); + if (cpu == cpumask_first(d->this_book_map)) + init_sched_build_groups(d->this_book_map, cpu_map, + &cpu_to_book_group, + d->send_covered, d->tmpmask); + break; #endif - { sd_init_CPU, cpu_cpu_mask, }, + case SD_LV_CPU: /* set up physical groups */ + cpumask_and(d->nodemask, cpumask_of_node(cpu), cpu_map); + if (!cpumask_empty(d->nodemask)) + init_sched_build_groups(d->nodemask, cpu_map, + &cpu_to_phys_group, + d->send_covered, d->tmpmask); + break; #ifdef CONFIG_NUMA - { sd_init_NODE, cpu_node_mask, }, - { sd_init_ALLNODES, cpu_allnodes_mask, }, + case SD_LV_ALLNODES: + init_sched_build_groups(cpu_map, cpu_map, &cpu_to_allnodes_group, + d->send_covered, d->tmpmask); + break; #endif - { NULL, }, -}; - -static struct sched_domain_topology_level *sched_domain_topology = default_topology; - -static int __sdt_alloc(const struct cpumask *cpu_map) -{ - struct sched_domain_topology_level *tl; - int j; - - for (tl = sched_domain_topology; tl->init; tl++) { - struct sd_data *sdd = &tl->data; - - sdd->sd = alloc_percpu(struct sched_domain *); - if (!sdd->sd) - return -ENOMEM; - - sdd->sg = alloc_percpu(struct sched_group *); - if (!sdd->sg) - return -ENOMEM; - - for_each_cpu(j, cpu_map) { - struct sched_domain *sd; - struct sched_group *sg; - - sd = kzalloc_node(sizeof(struct sched_domain) + cpumask_size(), - GFP_KERNEL, cpu_to_node(j)); - if (!sd) - return -ENOMEM; - - *per_cpu_ptr(sdd->sd, j) = sd; - - sg = kzalloc_node(sizeof(struct sched_group) + cpumask_size(), - GFP_KERNEL, cpu_to_node(j)); - if (!sg) - return -ENOMEM; - - *per_cpu_ptr(sdd->sg, j) = sg; - } - } - - return 0; -} - -static void __sdt_free(const struct cpumask *cpu_map) -{ - struct sched_domain_topology_level *tl; - int j; - - for (tl = sched_domain_topology; tl->init; tl++) { - struct sd_data *sdd = &tl->data; - - for_each_cpu(j, cpu_map) { - kfree(*per_cpu_ptr(sdd->sd, j)); - kfree(*per_cpu_ptr(sdd->sg, j)); - } - free_percpu(sdd->sd); - free_percpu(sdd->sg); - } -} - -struct sched_domain *build_sched_domain(struct sched_domain_topology_level *tl, - struct s_data *d, const struct cpumask *cpu_map, - struct sched_domain_attr *attr, struct sched_domain *child, - int cpu) -{ - struct sched_domain *sd = tl->init(tl, cpu); - if (!sd) - return child; - - set_domain_attribute(sd, attr); - cpumask_and(sched_domain_span(sd), cpu_map, tl->mask(cpu)); - if (child) { - sd->level = child->level + 1; - sched_domain_level_max = max(sched_domain_level_max, sd->level); - child->parent = sd; + default: + break; } - sd->child = child; - - return sd; } /* * Build sched domains for a given set of cpus and attach the sched domains * to the individual cpus */ -static int build_sched_domains(const struct cpumask *cpu_map, - struct sched_domain_attr *attr) +static int __build_sched_domains(const struct cpumask *cpu_map, + struct sched_domain_attr *attr) { enum s_alloc alloc_state = sa_none; - struct sched_domain *sd; struct s_data d; - int i, ret = -ENOMEM; + struct sched_domain *sd; + int i; +#ifdef CONFIG_NUMA + d.sd_allnodes = 0; +#endif alloc_state = __visit_domain_allocation_hell(&d, cpu_map); if (alloc_state != sa_rootdomain) goto error; + alloc_state = sa_sched_groups; + + /* + * Set up domains for cpus specified by the cpu_map. + */ + for_each_cpu(i, cpu_map) { + cpumask_and(d.nodemask, cpumask_of_node(cpu_to_node(i)), + cpu_map); + + sd = __build_numa_sched_domains(&d, cpu_map, attr, i); + sd = __build_cpu_sched_domain(&d, cpu_map, attr, sd, i); + sd = __build_book_sched_domain(&d, cpu_map, attr, sd, i); + sd = __build_mc_sched_domain(&d, cpu_map, attr, sd, i); + sd = __build_smt_sched_domain(&d, cpu_map, attr, sd, i); + } - /* Set up domains for cpus specified by the cpu_map. */ for_each_cpu(i, cpu_map) { - struct sched_domain_topology_level *tl; + build_sched_groups(&d, SD_LV_SIBLING, cpu_map, i); + build_sched_groups(&d, SD_LV_BOOK, cpu_map, i); + build_sched_groups(&d, SD_LV_MC, cpu_map, i); + } - sd = NULL; - for (tl = sched_domain_topology; tl->init; tl++) - sd = build_sched_domain(tl, &d, cpu_map, attr, sd, i); + /* Set up physical groups */ + for (i = 0; i < nr_node_ids; i++) + build_sched_groups(&d, SD_LV_CPU, cpu_map, i); - while (sd->child) - sd = sd->child; +#ifdef CONFIG_NUMA + /* Set up node groups */ + if (d.sd_allnodes) + build_sched_groups(&d, SD_LV_ALLNODES, cpu_map, 0); - *per_cpu_ptr(d.sd, i) = sd; - } + for (i = 0; i < nr_node_ids; i++) + if (build_numa_sched_groups(&d, cpu_map, i)) + goto error; +#endif - /* Build the groups for the domains */ + /* Calculate CPU power for physical packages and nodes */ +#ifdef CONFIG_SCHED_SMT for_each_cpu(i, cpu_map) { - for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) { - sd->span_weight = cpumask_weight(sched_domain_span(sd)); - get_group(i, sd->private, &sd->groups); - atomic_inc(&sd->groups->ref); - - if (i != cpumask_first(sched_domain_span(sd))) - continue; + sd = &per_cpu(cpu_domains, i).sd; + init_sched_groups_power(i, sd); + } +#endif +#ifdef CONFIG_SCHED_MC + for_each_cpu(i, cpu_map) { + sd = &per_cpu(core_domains, i).sd; + init_sched_groups_power(i, sd); + } +#endif +#ifdef CONFIG_SCHED_BOOK + for_each_cpu(i, cpu_map) { + sd = &per_cpu(book_domains, i).sd; + init_sched_groups_power(i, sd); + } +#endif - build_sched_groups(sd); - } + for_each_cpu(i, cpu_map) { + sd = &per_cpu(phys_domains, i).sd; + init_sched_groups_power(i, sd); } - /* Calculate CPU power for physical packages and nodes */ - for (i = nr_cpumask_bits-1; i >= 0; i--) { - if (!cpumask_test_cpu(i, cpu_map)) - continue; +#ifdef CONFIG_NUMA + for (i = 0; i < nr_node_ids; i++) + init_numa_sched_groups_power(d.sched_group_nodes[i]); - for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) { - claim_allocations(i, sd); - init_sched_groups_power(i, sd); - } + if (d.sd_allnodes) { + struct sched_group *sg; + + cpu_to_allnodes_group(cpumask_first(cpu_map), cpu_map, &sg, + d.tmpmask); + init_numa_sched_groups_power(sg); } +#endif /* Attach the domains */ - rcu_read_lock(); for_each_cpu(i, cpu_map) { - sd = *per_cpu_ptr(d.sd, i); +#ifdef CONFIG_SCHED_SMT + sd = &per_cpu(cpu_domains, i).sd; +#elif defined(CONFIG_SCHED_MC) + sd = &per_cpu(core_domains, i).sd; +#elif defined(CONFIG_SCHED_BOOK) + sd = &per_cpu(book_domains, i).sd; +#else + sd = &per_cpu(phys_domains, i).sd; +#endif cpu_attach_domain(sd, d.rd, i); } - rcu_read_unlock(); - ret = 0; + d.sched_group_nodes = NULL; /* don't free this we still need it */ + __free_domain_allocs(&d, sa_tmpmask, cpu_map); + return 0; + error: __free_domain_allocs(&d, alloc_state, cpu_map); - return ret; + return -ENOMEM; +} + +static int build_sched_domains(const struct cpumask *cpu_map) +{ + return __build_sched_domains(cpu_map, NULL); } static cpumask_var_t *doms_cur; /* current sched domains */ @@ -7362,7 +7670,7 @@ void free_sched_domains(cpumask_var_t doms[], unsigned int ndoms) * For now this just excludes isolated cpus, but could be used to * exclude other special cases in the future. */ -static int init_sched_domains(const struct cpumask *cpu_map) +static int arch_init_sched_domains(const struct cpumask *cpu_map) { int err; @@ -7373,24 +7681,32 @@ static int init_sched_domains(const struct cpumask *cpu_map) doms_cur = &fallback_doms; cpumask_andnot(doms_cur[0], cpu_map, cpu_isolated_map); dattr_cur = NULL; - err = build_sched_domains(doms_cur[0], NULL); + err = build_sched_domains(doms_cur[0]); register_sched_domain_sysctl(); return err; } +static void arch_destroy_sched_domains(const struct cpumask *cpu_map, + struct cpumask *tmpmask) +{ + free_sched_groups(cpu_map, tmpmask); +} + /* * Detach sched domains from a group of cpus specified in cpu_map * These cpus will now be attached to the NULL domain */ static void detach_destroy_domains(const struct cpumask *cpu_map) { + /* Save because hotplug lock held. */ + static DECLARE_BITMAP(tmpmask, CONFIG_NR_CPUS); int i; - rcu_read_lock(); for_each_cpu(i, cpu_map) cpu_attach_domain(NULL, &def_root_domain, i); - rcu_read_unlock(); + synchronize_sched(); + arch_destroy_sched_domains(cpu_map, to_cpumask(tmpmask)); } /* handle null as "default" */ @@ -7479,7 +7795,8 @@ void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[], goto match2; } /* no match - add a new doms_new */ - build_sched_domains(doms_new[i], dattr_new ? dattr_new + i : NULL); + __build_sched_domains(doms_new[i], + dattr_new ? dattr_new + i : NULL); match2: ; } @@ -7498,7 +7815,7 @@ void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[], } #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) -static void reinit_sched_domains(void) +static void arch_reinit_sched_domains(void) { get_online_cpus(); @@ -7531,7 +7848,7 @@ static ssize_t sched_power_savings_store(const char *buf, size_t count, int smt) else sched_mc_power_savings = level; - reinit_sched_domains(); + arch_reinit_sched_domains(); return count; } @@ -7650,9 +7967,14 @@ void __init sched_init_smp(void) alloc_cpumask_var(&non_isolated_cpus, GFP_KERNEL); alloc_cpumask_var(&fallback_doms, GFP_KERNEL); +#if defined(CONFIG_NUMA) + sched_group_nodes_bycpu = kzalloc(nr_cpu_ids * sizeof(void **), + GFP_KERNEL); + BUG_ON(sched_group_nodes_bycpu == NULL); +#endif get_online_cpus(); mutex_lock(&sched_domains_mutex); - init_sched_domains(cpu_active_mask); + arch_init_sched_domains(cpu_active_mask); cpumask_andnot(non_isolated_cpus, cpu_possible_mask, cpu_isolated_map); if (cpumask_empty(non_isolated_cpus)) cpumask_set_cpu(smp_processor_id(), non_isolated_cpus); @@ -7959,7 +8281,6 @@ void __init sched_init(void) /* Allocate the nohz_cpu_mask if CONFIG_CPUMASK_OFFSTACK */ zalloc_cpumask_var(&nohz_cpu_mask, GFP_NOWAIT); #ifdef CONFIG_SMP - zalloc_cpumask_var(&sched_domains_tmpmask, GFP_NOWAIT); #ifdef CONFIG_NO_HZ zalloc_cpumask_var(&nohz.idle_cpus_mask, GFP_NOWAIT); alloc_cpumask_var(&nohz.grp_idle_mask, GFP_NOWAIT); @@ -8019,7 +8340,7 @@ static void normalize_task(struct rq *rq, struct task_struct *p) int old_prio = p->prio; int on_rq; - on_rq = p->on_rq; + on_rq = p->se.on_rq; if (on_rq) deactivate_task(rq, p, 0); __setscheduler(rq, p, SCHED_NORMAL, 0); @@ -8232,6 +8553,7 @@ int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent) { struct rt_rq *rt_rq; struct sched_rt_entity *rt_se; + struct rq *rq; int i; tg->rt_rq = kzalloc(sizeof(rt_rq) * nr_cpu_ids, GFP_KERNEL); @@ -8245,6 +8567,8 @@ int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent) ktime_to_ns(def_rt_bandwidth.rt_period), 0); for_each_possible_cpu(i) { + rq = cpu_rq(i); + rt_rq = kzalloc_node(sizeof(struct rt_rq), GFP_KERNEL, cpu_to_node(i)); if (!rt_rq) @@ -8359,7 +8683,7 @@ void sched_move_task(struct task_struct *tsk) rq = task_rq_lock(tsk, &flags); running = task_current(rq, tsk); - on_rq = tsk->on_rq; + on_rq = tsk->se.on_rq; if (on_rq) dequeue_task(rq, tsk, 0); @@ -8378,7 +8702,7 @@ void sched_move_task(struct task_struct *tsk) if (on_rq) enqueue_task(rq, tsk, 0); - task_rq_unlock(rq, tsk, &flags); + task_rq_unlock(rq, &flags); } #endif /* CONFIG_CGROUP_SCHED */ diff --git a/trunk/kernel/sched_debug.c b/trunk/kernel/sched_debug.c index a6710a112b4f..7bacd83a4158 100644 --- a/trunk/kernel/sched_debug.c +++ b/trunk/kernel/sched_debug.c @@ -152,7 +152,7 @@ static void print_rq(struct seq_file *m, struct rq *rq, int rq_cpu) read_lock_irqsave(&tasklist_lock, flags); do_each_thread(g, p) { - if (!p->on_rq || task_cpu(p) != rq_cpu) + if (!p->se.on_rq || task_cpu(p) != rq_cpu) continue; print_task(m, rq, p); @@ -296,6 +296,9 @@ static void print_cpu(struct seq_file *m, int cpu) P(ttwu_count); P(ttwu_local); + SEQ_printf(m, " .%-30s: %d\n", "bkl_count", + rq->rq_sched_info.bkl_count); + #undef P #undef P64 #endif @@ -438,6 +441,7 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m) P(se.statistics.wait_count); PN(se.statistics.iowait_sum); P(se.statistics.iowait_count); + P(sched_info.bkl_count); P(se.nr_migrations); P(se.statistics.nr_migrations_cold); P(se.statistics.nr_failed_migrations_affine); diff --git a/trunk/kernel/sched_fair.c b/trunk/kernel/sched_fair.c index 37f22626225e..6fa833ab2cb8 100644 --- a/trunk/kernel/sched_fair.c +++ b/trunk/kernel/sched_fair.c @@ -358,10 +358,6 @@ static void update_min_vruntime(struct cfs_rq *cfs_rq) } cfs_rq->min_vruntime = max_vruntime(cfs_rq->min_vruntime, vruntime); -#ifndef CONFIG_64BIT - smp_wmb(); - cfs_rq->min_vruntime_copy = cfs_rq->min_vruntime; -#endif } /* @@ -1344,8 +1340,6 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) hrtick_update(rq); } -static void set_next_buddy(struct sched_entity *se); - /* * The dequeue_task method is called before nr_running is * decreased. We remove the task from the rbtree and @@ -1355,22 +1349,14 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) { struct cfs_rq *cfs_rq; struct sched_entity *se = &p->se; - int task_sleep = flags & DEQUEUE_SLEEP; for_each_sched_entity(se) { cfs_rq = cfs_rq_of(se); dequeue_entity(cfs_rq, se, flags); /* Don't dequeue parent if it has other entities besides us */ - if (cfs_rq->load.weight) { - /* - * Bias pick_next to pick a task from this cfs_rq, as - * p is sleeping when it is within its sched_slice. - */ - if (task_sleep && parent_entity(se)) - set_next_buddy(parent_entity(se)); + if (cfs_rq->load.weight) break; - } flags |= DEQUEUE_SLEEP; } @@ -1386,25 +1372,12 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) #ifdef CONFIG_SMP -static void task_waking_fair(struct task_struct *p) +static void task_waking_fair(struct rq *rq, struct task_struct *p) { struct sched_entity *se = &p->se; struct cfs_rq *cfs_rq = cfs_rq_of(se); - u64 min_vruntime; - -#ifndef CONFIG_64BIT - u64 min_vruntime_copy; - do { - min_vruntime_copy = cfs_rq->min_vruntime_copy; - smp_rmb(); - min_vruntime = cfs_rq->min_vruntime; - } while (min_vruntime != min_vruntime_copy); -#else - min_vruntime = cfs_rq->min_vruntime; -#endif - - se->vruntime -= min_vruntime; + se->vruntime -= cfs_rq->min_vruntime; } #ifdef CONFIG_FAIR_GROUP_SCHED @@ -1649,7 +1622,6 @@ static int select_idle_sibling(struct task_struct *p, int target) /* * Otherwise, iterate the domains and find an elegible idle cpu. */ - rcu_read_lock(); for_each_domain(target, sd) { if (!(sd->flags & SD_SHARE_PKG_RESOURCES)) break; @@ -1669,7 +1641,6 @@ static int select_idle_sibling(struct task_struct *p, int target) cpumask_test_cpu(prev_cpu, sched_domain_span(sd))) break; } - rcu_read_unlock(); return target; } @@ -1686,7 +1657,7 @@ static int select_idle_sibling(struct task_struct *p, int target) * preempt must be disabled. */ static int -select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flags) +select_task_rq_fair(struct rq *rq, struct task_struct *p, int sd_flag, int wake_flags) { struct sched_domain *tmp, *affine_sd = NULL, *sd = NULL; int cpu = smp_processor_id(); @@ -1702,7 +1673,6 @@ select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flags) new_cpu = prev_cpu; } - rcu_read_lock(); for_each_domain(cpu, tmp) { if (!(tmp->flags & SD_LOAD_BALANCE)) continue; @@ -1753,10 +1723,9 @@ select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flags) if (affine_sd) { if (cpu == prev_cpu || wake_affine(affine_sd, p, sync)) - prev_cpu = cpu; - - new_cpu = select_idle_sibling(p, prev_cpu); - goto unlock; + return select_idle_sibling(p, cpu); + else + return select_idle_sibling(p, prev_cpu); } while (sd) { @@ -1797,8 +1766,6 @@ select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flags) } /* while loop will break here if sd == NULL */ } -unlock: - rcu_read_unlock(); return new_cpu; } @@ -1822,7 +1789,10 @@ wakeup_gran(struct sched_entity *curr, struct sched_entity *se) * This is especially important for buddies when the leftmost * task is higher priority than the buddy. */ - return calc_delta_fair(gran, se); + if (unlikely(se->load.weight != NICE_0_LOAD)) + gran = calc_delta_fair(gran, se); + + return gran; } /* @@ -1856,26 +1826,26 @@ wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se) static void set_last_buddy(struct sched_entity *se) { - if (entity_is_task(se) && unlikely(task_of(se)->policy == SCHED_IDLE)) - return; - - for_each_sched_entity(se) - cfs_rq_of(se)->last = se; + if (likely(task_of(se)->policy != SCHED_IDLE)) { + for_each_sched_entity(se) + cfs_rq_of(se)->last = se; + } } static void set_next_buddy(struct sched_entity *se) { - if (entity_is_task(se) && unlikely(task_of(se)->policy == SCHED_IDLE)) - return; - - for_each_sched_entity(se) - cfs_rq_of(se)->next = se; + if (likely(task_of(se)->policy != SCHED_IDLE)) { + for_each_sched_entity(se) + cfs_rq_of(se)->next = se; + } } static void set_skip_buddy(struct sched_entity *se) { - for_each_sched_entity(se) - cfs_rq_of(se)->skip = se; + if (likely(task_of(se)->policy != SCHED_IDLE)) { + for_each_sched_entity(se) + cfs_rq_of(se)->skip = se; + } } /* @@ -1887,15 +1857,12 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ struct sched_entity *se = &curr->se, *pse = &p->se; struct cfs_rq *cfs_rq = task_cfs_rq(curr); int scale = cfs_rq->nr_running >= sched_nr_latency; - int next_buddy_marked = 0; if (unlikely(se == pse)) return; - if (sched_feat(NEXT_BUDDY) && scale && !(wake_flags & WF_FORK)) { + if (sched_feat(NEXT_BUDDY) && scale && !(wake_flags & WF_FORK)) set_next_buddy(pse); - next_buddy_marked = 1; - } /* * We can come here with TIF_NEED_RESCHED already set from new task @@ -1923,15 +1890,8 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ update_curr(cfs_rq); find_matching_se(&se, &pse); BUG_ON(!pse); - if (wakeup_preempt_entity(se, pse) == 1) { - /* - * Bias pick_next to pick the sched entity that is - * triggering this preemption. - */ - if (!next_buddy_marked) - set_next_buddy(pse); + if (wakeup_preempt_entity(se, pse) == 1) goto preempt; - } return; @@ -2142,7 +2102,7 @@ static unsigned long balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest, unsigned long max_load_move, struct sched_domain *sd, enum cpu_idle_type idle, int *all_pinned, - struct cfs_rq *busiest_cfs_rq) + int *this_best_prio, struct cfs_rq *busiest_cfs_rq) { int loops = 0, pulled = 0; long rem_load_move = max_load_move; @@ -2180,6 +2140,9 @@ balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest, */ if (rem_load_move <= 0) break; + + if (p->prio < *this_best_prio) + *this_best_prio = p->prio; } out: /* @@ -2239,7 +2202,7 @@ static unsigned long load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, unsigned long max_load_move, struct sched_domain *sd, enum cpu_idle_type idle, - int *all_pinned) + int *all_pinned, int *this_best_prio) { long rem_load_move = max_load_move; int busiest_cpu = cpu_of(busiest); @@ -2264,7 +2227,7 @@ load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, rem_load = div_u64(rem_load, busiest_h_load + 1); moved_load = balance_tasks(this_rq, this_cpu, busiest, - rem_load, sd, idle, all_pinned, + rem_load, sd, idle, all_pinned, this_best_prio, busiest_cfs_rq); if (!moved_load) @@ -2290,11 +2253,11 @@ static unsigned long load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, unsigned long max_load_move, struct sched_domain *sd, enum cpu_idle_type idle, - int *all_pinned) + int *all_pinned, int *this_best_prio) { return balance_tasks(this_rq, this_cpu, busiest, max_load_move, sd, idle, all_pinned, - &busiest->cfs); + this_best_prio, &busiest->cfs); } #endif @@ -2311,11 +2274,12 @@ static int move_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest, int *all_pinned) { unsigned long total_load_moved = 0, load_moved; + int this_best_prio = this_rq->curr->prio; do { load_moved = load_balance_fair(this_rq, this_cpu, busiest, max_load_move - total_load_moved, - sd, idle, all_pinned); + sd, idle, all_pinned, &this_best_prio); total_load_moved += load_moved; @@ -2684,7 +2648,7 @@ fix_small_capacity(struct sched_domain *sd, struct sched_group *group) /* * Only siblings can have significantly less than SCHED_LOAD_SCALE */ - if (!(sd->flags & SD_SHARE_CPUPOWER)) + if (sd->level != SD_LV_SIBLING) return 0; /* @@ -3501,7 +3465,6 @@ static void idle_balance(int this_cpu, struct rq *this_rq) raw_spin_unlock(&this_rq->lock); update_shares(this_cpu); - rcu_read_lock(); for_each_domain(this_cpu, sd) { unsigned long interval; int balance = 1; @@ -3523,7 +3486,6 @@ static void idle_balance(int this_cpu, struct rq *this_rq) break; } } - rcu_read_unlock(); raw_spin_lock(&this_rq->lock); @@ -3572,7 +3534,6 @@ static int active_load_balance_cpu_stop(void *data) double_lock_balance(busiest_rq, target_rq); /* Search for an sd spanning us and the target CPU. */ - rcu_read_lock(); for_each_domain(target_cpu, sd) { if ((sd->flags & SD_LOAD_BALANCE) && cpumask_test_cpu(busiest_cpu, sched_domain_span(sd))) @@ -3588,7 +3549,6 @@ static int active_load_balance_cpu_stop(void *data) else schedstat_inc(sd, alb_failed); } - rcu_read_unlock(); double_unlock_balance(busiest_rq, target_rq); out_unlock: busiest_rq->active_balance = 0; @@ -3715,7 +3675,6 @@ static int find_new_ilb(int cpu) { struct sched_domain *sd; struct sched_group *ilb_group; - int ilb = nr_cpu_ids; /* * Have idle load balancer selection from semi-idle packages only @@ -3731,25 +3690,20 @@ static int find_new_ilb(int cpu) if (cpumask_weight(nohz.idle_cpus_mask) < 2) goto out_done; - rcu_read_lock(); for_each_flag_domain(cpu, sd, SD_POWERSAVINGS_BALANCE) { ilb_group = sd->groups; do { - if (is_semi_idle_group(ilb_group)) { - ilb = cpumask_first(nohz.grp_idle_mask); - goto unlock; - } + if (is_semi_idle_group(ilb_group)) + return cpumask_first(nohz.grp_idle_mask); ilb_group = ilb_group->next; } while (ilb_group != sd->groups); } -unlock: - rcu_read_unlock(); out_done: - return ilb; + return nr_cpu_ids; } #else /* (CONFIG_SCHED_MC || CONFIG_SCHED_SMT) */ static inline int find_new_ilb(int call_cpu) @@ -3894,7 +3848,6 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle) update_shares(cpu); - rcu_read_lock(); for_each_domain(cpu, sd) { if (!(sd->flags & SD_LOAD_BALANCE)) continue; @@ -3940,7 +3893,6 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle) if (!balance) break; } - rcu_read_unlock(); /* * next_balance will be updated only when there is a need. diff --git a/trunk/kernel/sched_features.h b/trunk/kernel/sched_features.h index be40f7371ee1..68e69acc29b9 100644 --- a/trunk/kernel/sched_features.h +++ b/trunk/kernel/sched_features.h @@ -64,9 +64,3 @@ SCHED_FEAT(OWNER_SPIN, 1) * Decrement CPU power based on irq activity */ SCHED_FEAT(NONIRQ_POWER, 1) - -/* - * Queue remote wakeups on the target CPU and process them - * using the scheduler IPI. Reduces rq->lock contention/bounces. - */ -SCHED_FEAT(TTWU_QUEUE, 1) diff --git a/trunk/kernel/sched_idletask.c b/trunk/kernel/sched_idletask.c index 0a51882534ea..a776a6396427 100644 --- a/trunk/kernel/sched_idletask.c +++ b/trunk/kernel/sched_idletask.c @@ -7,7 +7,7 @@ #ifdef CONFIG_SMP static int -select_task_rq_idle(struct task_struct *p, int sd_flag, int flags) +select_task_rq_idle(struct rq *rq, struct task_struct *p, int sd_flag, int flags) { return task_cpu(p); /* IDLE tasks as never migrated */ } diff --git a/trunk/kernel/sched_rt.c b/trunk/kernel/sched_rt.c index 0943ed7a4038..f8fcf8297c5f 100644 --- a/trunk/kernel/sched_rt.c +++ b/trunk/kernel/sched_rt.c @@ -183,6 +183,14 @@ static inline u64 sched_rt_period(struct rt_rq *rt_rq) return ktime_to_ns(rt_rq->tg->rt_bandwidth.rt_period); } +typedef struct task_group *rt_rq_iter_t; + +#define for_each_rt_rq(rt_rq, iter, rq) \ + for (iter = list_entry_rcu(task_groups.next, typeof(*iter), list); \ + (&iter->list != &task_groups) && \ + (rt_rq = iter->rt_rq[cpu_of(rq)]); \ + iter = list_entry_rcu(iter->list.next, typeof(*iter), list)) + static inline void list_add_leaf_rt_rq(struct rt_rq *rt_rq) { list_add_rcu(&rt_rq->leaf_rt_rq_list, @@ -288,6 +296,11 @@ static inline u64 sched_rt_period(struct rt_rq *rt_rq) return ktime_to_ns(def_rt_bandwidth.rt_period); } +typedef struct rt_rq *rt_rq_iter_t; + +#define for_each_rt_rq(rt_rq, iter, rq) \ + for ((void) iter, rt_rq = &rq->rt; rt_rq; rt_rq = NULL) + static inline void list_add_leaf_rt_rq(struct rt_rq *rt_rq) { } @@ -402,12 +415,13 @@ static int do_balance_runtime(struct rt_rq *rt_rq) static void __disable_runtime(struct rq *rq) { struct root_domain *rd = rq->rd; + rt_rq_iter_t iter; struct rt_rq *rt_rq; if (unlikely(!scheduler_running)) return; - for_each_leaf_rt_rq(rt_rq, rq) { + for_each_rt_rq(rt_rq, iter, rq) { struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); s64 want; int i; @@ -487,6 +501,7 @@ static void disable_runtime(struct rq *rq) static void __enable_runtime(struct rq *rq) { + rt_rq_iter_t iter; struct rt_rq *rt_rq; if (unlikely(!scheduler_running)) @@ -495,7 +510,7 @@ static void __enable_runtime(struct rq *rq) /* * Reset each runqueue's bandwidth settings */ - for_each_leaf_rt_rq(rt_rq, rq) { + for_each_rt_rq(rt_rq, iter, rq) { struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); raw_spin_lock(&rt_b->rt_runtime_lock); @@ -562,13 +577,6 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun) if (rt_rq->rt_throttled && rt_rq->rt_time < runtime) { rt_rq->rt_throttled = 0; enqueue = 1; - - /* - * Force a clock update if the CPU was idle, - * lest wakeup -> unthrottle time accumulate. - */ - if (rt_rq->rt_nr_running && rq->curr == rq->idle) - rq->skip_clock_update = -1; } if (rt_rq->rt_time || rt_rq->rt_nr_running) idle = 0; @@ -984,23 +992,13 @@ static void yield_task_rt(struct rq *rq) static int find_lowest_rq(struct task_struct *task); static int -select_task_rq_rt(struct task_struct *p, int sd_flag, int flags) +select_task_rq_rt(struct rq *rq, struct task_struct *p, int sd_flag, int flags) { - struct task_struct *curr; - struct rq *rq; - int cpu; - if (sd_flag != SD_BALANCE_WAKE) return smp_processor_id(); - cpu = task_cpu(p); - rq = cpu_rq(cpu); - - rcu_read_lock(); - curr = ACCESS_ONCE(rq->curr); /* unlocked access */ - /* - * If the current task on @p's runqueue is an RT task, then + * If the current task is an RT task, then * try to see if we can wake this RT task up on another * runqueue. Otherwise simply start this RT task * on its current runqueue. @@ -1014,25 +1012,21 @@ select_task_rq_rt(struct task_struct *p, int sd_flag, int flags) * lock? * * For equal prio tasks, we just let the scheduler sort it out. - * - * Otherwise, just let it ride on the affined RQ and the - * post-schedule router will push the preempted task away - * - * This test is optimistic, if we get it wrong the load-balancer - * will have to sort it out. */ - if (curr && unlikely(rt_task(curr)) && - (curr->rt.nr_cpus_allowed < 2 || - curr->prio < p->prio) && + if (unlikely(rt_task(rq->curr)) && + (rq->curr->rt.nr_cpus_allowed < 2 || + rq->curr->prio < p->prio) && (p->rt.nr_cpus_allowed > 1)) { - int target = find_lowest_rq(p); + int cpu = find_lowest_rq(p); - if (target != -1) - cpu = target; + return (cpu == -1) ? task_cpu(p) : cpu; } - rcu_read_unlock(); - return cpu; + /* + * Otherwise, just let it ride on the affined RQ and the + * post-schedule router will push the preempted task away + */ + return task_cpu(p); } static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p) @@ -1157,7 +1151,7 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p) * The previous task needs to be made eligible for pushing * if it is still active */ - if (on_rt_rq(&p->rt) && p->rt.nr_cpus_allowed > 1) + if (p->se.on_rq && p->rt.nr_cpus_allowed > 1) enqueue_pushable_task(rq, p); } @@ -1308,7 +1302,7 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq) !cpumask_test_cpu(lowest_rq->cpu, &task->cpus_allowed) || task_running(rq, task) || - !task->on_rq)) { + !task->se.on_rq)) { raw_spin_unlock(&lowest_rq->lock); lowest_rq = NULL; @@ -1342,7 +1336,7 @@ static struct task_struct *pick_next_pushable_task(struct rq *rq) BUG_ON(task_current(rq, p)); BUG_ON(p->rt.nr_cpus_allowed <= 1); - BUG_ON(!p->on_rq); + BUG_ON(!p->se.on_rq); BUG_ON(!rt_task(p)); return p; @@ -1488,7 +1482,7 @@ static int pull_rt_task(struct rq *this_rq) */ if (p && (p->prio < this_rq->rt.highest_prio.curr)) { WARN_ON(p == src_rq->curr); - WARN_ON(!p->on_rq); + WARN_ON(!p->se.on_rq); /* * There's a chance that p is higher in priority @@ -1559,7 +1553,7 @@ static void set_cpus_allowed_rt(struct task_struct *p, * Update the migration status of the RQ if we have an RT task * which is running AND changing its weight value. */ - if (p->on_rq && (weight != p->rt.nr_cpus_allowed)) { + if (p->se.on_rq && (weight != p->rt.nr_cpus_allowed)) { struct rq *rq = task_rq(p); if (!task_current(rq, p)) { @@ -1629,7 +1623,7 @@ static void switched_from_rt(struct rq *rq, struct task_struct *p) * we may need to handle the pulling of RT tasks * now. */ - if (p->on_rq && !rq->rt.rt_nr_running) + if (p->se.on_rq && !rq->rt.rt_nr_running) pull_rt_task(rq); } @@ -1659,7 +1653,7 @@ static void switched_to_rt(struct rq *rq, struct task_struct *p) * If that current running task is also an RT task * then see if we can move to another run queue. */ - if (p->on_rq && rq->curr != p) { + if (p->se.on_rq && rq->curr != p) { #ifdef CONFIG_SMP if (rq->rt.overloaded && push_rt_task(rq) && /* Don't resched if we changed runqueues */ @@ -1678,7 +1672,7 @@ static void switched_to_rt(struct rq *rq, struct task_struct *p) static void prio_changed_rt(struct rq *rq, struct task_struct *p, int oldprio) { - if (!p->on_rq) + if (!p->se.on_rq) return; if (rq->curr == p) { @@ -1817,10 +1811,11 @@ extern void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq); static void print_rt_stats(struct seq_file *m, int cpu) { + rt_rq_iter_t iter; struct rt_rq *rt_rq; rcu_read_lock(); - for_each_leaf_rt_rq(rt_rq, cpu_rq(cpu)) + for_each_rt_rq(rt_rq, iter, cpu_rq(cpu)) print_rt_rq(m, cpu, rt_rq); rcu_read_unlock(); } diff --git a/trunk/kernel/sched_stoptask.c b/trunk/kernel/sched_stoptask.c index 6f437632afab..1ba2bd40fdac 100644 --- a/trunk/kernel/sched_stoptask.c +++ b/trunk/kernel/sched_stoptask.c @@ -9,7 +9,8 @@ #ifdef CONFIG_SMP static int -select_task_rq_stop(struct task_struct *p, int sd_flag, int flags) +select_task_rq_stop(struct rq *rq, struct task_struct *p, + int sd_flag, int flags) { return task_cpu(p); /* stop tasks as never migrate */ } @@ -25,7 +26,7 @@ static struct task_struct *pick_next_task_stop(struct rq *rq) { struct task_struct *stop = rq->stop; - if (stop && stop->on_rq) + if (stop && stop->se.on_rq) return stop; return NULL; diff --git a/trunk/kernel/trace/trace_kprobe.c b/trunk/kernel/trace/trace_kprobe.c index f925c45f0afa..35d55a386145 100644 --- a/trunk/kernel/trace/trace_kprobe.c +++ b/trunk/kernel/trace/trace_kprobe.c @@ -53,6 +53,7 @@ const char *reserved_field_names[] = { "common_preempt_count", "common_pid", "common_tgid", + "common_lock_depth", FIELD_STRING_IP, FIELD_STRING_RETIP, FIELD_STRING_FUNC, diff --git a/trunk/lib/vsprintf.c b/trunk/lib/vsprintf.c index bc0ac6b333dc..dfd60192bc2e 100644 --- a/trunk/lib/vsprintf.c +++ b/trunk/lib/vsprintf.c @@ -797,7 +797,7 @@ char *uuid_string(char *buf, char *end, const u8 *addr, return string(buf, end, uuid, spec); } -int kptr_restrict = 1; +int kptr_restrict __read_mostly; /* * Show a '%p' thing. A kernel extension is that the '%p' is followed diff --git a/trunk/mm/page_alloc.c b/trunk/mm/page_alloc.c index 9f8a97b9a350..570d944daeb5 100644 --- a/trunk/mm/page_alloc.c +++ b/trunk/mm/page_alloc.c @@ -2317,6 +2317,21 @@ void free_pages(unsigned long addr, unsigned int order) EXPORT_SYMBOL(free_pages); +static void *make_alloc_exact(unsigned long addr, unsigned order, size_t size) +{ + if (addr) { + unsigned long alloc_end = addr + (PAGE_SIZE << order); + unsigned long used = addr + PAGE_ALIGN(size); + + split_page(virt_to_page((void *)addr), order); + while (used < alloc_end) { + free_page(used); + used += PAGE_SIZE; + } + } + return (void *)addr; +} + /** * alloc_pages_exact - allocate an exact number physically-contiguous pages. * @size: the number of bytes to allocate @@ -2336,21 +2351,31 @@ void *alloc_pages_exact(size_t size, gfp_t gfp_mask) unsigned long addr; addr = __get_free_pages(gfp_mask, order); - if (addr) { - unsigned long alloc_end = addr + (PAGE_SIZE << order); - unsigned long used = addr + PAGE_ALIGN(size); - - split_page(virt_to_page((void *)addr), order); - while (used < alloc_end) { - free_page(used); - used += PAGE_SIZE; - } - } - - return (void *)addr; + return make_alloc_exact(addr, order, size); } EXPORT_SYMBOL(alloc_pages_exact); +/** + * alloc_pages_exact_nid - allocate an exact number of physically-contiguous + * pages on a node. + * @size: the number of bytes to allocate + * @gfp_mask: GFP flags for the allocation + * + * Like alloc_pages_exact(), but try to allocate on node nid first before falling + * back. + * Note this is not alloc_pages_exact_node() which allocates on a specific node, + * but is not exact. + */ +void *alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask) +{ + unsigned order = get_order(size); + struct page *p = alloc_pages_node(nid, gfp_mask, order); + if (!p) + return NULL; + return make_alloc_exact((unsigned long)page_address(p), order, size); +} +EXPORT_SYMBOL(alloc_pages_exact_nid); + /** * free_pages_exact - release memory allocated via alloc_pages_exact() * @virt: the value returned by alloc_pages_exact. @@ -3564,7 +3589,7 @@ int zone_wait_table_init(struct zone *zone, unsigned long zone_size_pages) if (!slab_is_available()) { zone->wait_table = (wait_queue_head_t *) - alloc_bootmem_node(pgdat, alloc_size); + alloc_bootmem_node_nopanic(pgdat, alloc_size); } else { /* * This case means that a zone whose size was 0 gets new memory @@ -4141,7 +4166,8 @@ static void __init setup_usemap(struct pglist_data *pgdat, unsigned long usemapsize = usemap_size(zonesize); zone->pageblock_flags = NULL; if (usemapsize) - zone->pageblock_flags = alloc_bootmem_node(pgdat, usemapsize); + zone->pageblock_flags = alloc_bootmem_node_nopanic(pgdat, + usemapsize); } #else static inline void setup_usemap(struct pglist_data *pgdat, @@ -4307,7 +4333,7 @@ static void __init_refok alloc_node_mem_map(struct pglist_data *pgdat) size = (end - start) * sizeof(struct page); map = alloc_remap(pgdat->node_id, size); if (!map) - map = alloc_bootmem_node(pgdat, size); + map = alloc_bootmem_node_nopanic(pgdat, size); pgdat->node_mem_map = map + (pgdat->node_start_pfn - start); } #ifndef CONFIG_NEED_MULTIPLE_NODES diff --git a/trunk/mm/page_cgroup.c b/trunk/mm/page_cgroup.c index 99055010cece..2daadc322ba6 100644 --- a/trunk/mm/page_cgroup.c +++ b/trunk/mm/page_cgroup.c @@ -134,7 +134,7 @@ static void *__init_refok alloc_page_cgroup(size_t size, int nid) { void *addr = NULL; - addr = alloc_pages_exact(size, GFP_KERNEL | __GFP_NOWARN); + addr = alloc_pages_exact_nid(nid, size, GFP_KERNEL | __GFP_NOWARN); if (addr) return addr; diff --git a/trunk/mm/shmem.c b/trunk/mm/shmem.c index 8fa27e4e582a..dfc7069102ee 100644 --- a/trunk/mm/shmem.c +++ b/trunk/mm/shmem.c @@ -852,7 +852,7 @@ static inline int shmem_find_swp(swp_entry_t entry, swp_entry_t *dir, swp_entry_ static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, struct page *page) { - struct inode *inode; + struct address_space *mapping; unsigned long idx; unsigned long size; unsigned long limit; @@ -875,8 +875,10 @@ static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, s if (size > SHMEM_NR_DIRECT) size = SHMEM_NR_DIRECT; offset = shmem_find_swp(entry, ptr, ptr+size); - if (offset >= 0) + if (offset >= 0) { + shmem_swp_balance_unmap(); goto found; + } if (!info->i_indirect) goto lost2; @@ -914,11 +916,11 @@ static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, s if (size > ENTRIES_PER_PAGE) size = ENTRIES_PER_PAGE; offset = shmem_find_swp(entry, ptr, ptr+size); - shmem_swp_unmap(ptr); if (offset >= 0) { shmem_dir_unmap(dir); goto found; } + shmem_swp_unmap(ptr); } } lost1: @@ -928,8 +930,7 @@ static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, s return 0; found: idx += offset; - inode = igrab(&info->vfs_inode); - spin_unlock(&info->lock); + ptr += offset; /* * Move _head_ to start search for next from here. @@ -940,37 +941,18 @@ static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, s */ if (shmem_swaplist.next != &info->swaplist) list_move_tail(&shmem_swaplist, &info->swaplist); - mutex_unlock(&shmem_swaplist_mutex); - error = 1; - if (!inode) - goto out; /* - * Charge page using GFP_KERNEL while we can wait. - * Charged back to the user(not to caller) when swap account is used. - * add_to_page_cache() will be called with GFP_NOWAIT. + * We rely on shmem_swaplist_mutex, not only to protect the swaplist, + * but also to hold up shmem_evict_inode(): so inode cannot be freed + * beneath us (pagelock doesn't help until the page is in pagecache). */ - error = mem_cgroup_cache_charge(page, current->mm, GFP_KERNEL); - if (error) - goto out; - error = radix_tree_preload(GFP_KERNEL); - if (error) { - mem_cgroup_uncharge_cache_page(page); - goto out; - } - error = 1; - - spin_lock(&info->lock); - ptr = shmem_swp_entry(info, idx, NULL); - if (ptr && ptr->val == entry.val) { - error = add_to_page_cache_locked(page, inode->i_mapping, - idx, GFP_NOWAIT); - /* does mem_cgroup_uncharge_cache_page on error */ - } else /* we must compensate for our precharge above */ - mem_cgroup_uncharge_cache_page(page); + mapping = info->vfs_inode.i_mapping; + error = add_to_page_cache_locked(page, mapping, idx, GFP_NOWAIT); + /* which does mem_cgroup_uncharge_cache_page on error */ if (error == -EEXIST) { - struct page *filepage = find_get_page(inode->i_mapping, idx); + struct page *filepage = find_get_page(mapping, idx); error = 1; if (filepage) { /* @@ -990,14 +972,8 @@ static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, s swap_free(entry); error = 1; /* not an error, but entry was found */ } - if (ptr) - shmem_swp_unmap(ptr); + shmem_swp_unmap(ptr); spin_unlock(&info->lock); - radix_tree_preload_end(); -out: - unlock_page(page); - page_cache_release(page); - iput(inode); /* allows for NULL */ return error; } @@ -1009,6 +985,26 @@ int shmem_unuse(swp_entry_t entry, struct page *page) struct list_head *p, *next; struct shmem_inode_info *info; int found = 0; + int error; + + /* + * Charge page using GFP_KERNEL while we can wait, before taking + * the shmem_swaplist_mutex which might hold up shmem_writepage(). + * Charged back to the user (not to caller) when swap account is used. + * add_to_page_cache() will be called with GFP_NOWAIT. + */ + error = mem_cgroup_cache_charge(page, current->mm, GFP_KERNEL); + if (error) + goto out; + /* + * Try to preload while we can wait, to not make a habit of + * draining atomic reserves; but don't latch on to this cpu, + * it's okay if sometimes we get rescheduled after this. + */ + error = radix_tree_preload(GFP_KERNEL); + if (error) + goto uncharge; + radix_tree_preload_end(); mutex_lock(&shmem_swaplist_mutex); list_for_each_safe(p, next, &shmem_swaplist) { @@ -1016,17 +1012,19 @@ int shmem_unuse(swp_entry_t entry, struct page *page) found = shmem_unuse_inode(info, entry, page); cond_resched(); if (found) - goto out; + break; } mutex_unlock(&shmem_swaplist_mutex); - /* - * Can some race bring us here? We've been holding page lock, - * so I think not; but would rather try again later than BUG() - */ + +uncharge: + if (!found) + mem_cgroup_uncharge_cache_page(page); + if (found < 0) + error = found; +out: unlock_page(page); page_cache_release(page); -out: - return (found < 0) ? found : 0; + return error; } /* @@ -1064,7 +1062,25 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc) else swap.val = 0; + /* + * Add inode to shmem_unuse()'s list of swapped-out inodes, + * if it's not already there. Do it now because we cannot take + * mutex while holding spinlock, and must do so before the page + * is moved to swap cache, when its pagelock no longer protects + * the inode from eviction. But don't unlock the mutex until + * we've taken the spinlock, because shmem_unuse_inode() will + * prune a !swapped inode from the swaplist under both locks. + */ + if (swap.val) { + mutex_lock(&shmem_swaplist_mutex); + if (list_empty(&info->swaplist)) + list_add_tail(&info->swaplist, &shmem_swaplist); + } + spin_lock(&info->lock); + if (swap.val) + mutex_unlock(&shmem_swaplist_mutex); + if (index >= info->next_index) { BUG_ON(!(info->flags & SHMEM_TRUNCATE)); goto unlock; @@ -1084,21 +1100,10 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc) delete_from_page_cache(page); shmem_swp_set(info, entry, swap.val); shmem_swp_unmap(entry); - if (list_empty(&info->swaplist)) - inode = igrab(inode); - else - inode = NULL; spin_unlock(&info->lock); swap_shmem_alloc(swap); BUG_ON(page_mapped(page)); swap_writepage(page, wbc); - if (inode) { - mutex_lock(&shmem_swaplist_mutex); - /* move instead of add in case we're racing */ - list_move_tail(&info->swaplist, &shmem_swaplist); - mutex_unlock(&shmem_swaplist_mutex); - iput(inode); - } return 0; } @@ -1400,20 +1405,14 @@ static int shmem_getpage(struct inode *inode, unsigned long idx, if (sbinfo->max_blocks) { if (percpu_counter_compare(&sbinfo->used_blocks, sbinfo->max_blocks) >= 0 || - shmem_acct_block(info->flags)) { - spin_unlock(&info->lock); - error = -ENOSPC; - goto failed; - } + shmem_acct_block(info->flags)) + goto nospace; percpu_counter_inc(&sbinfo->used_blocks); spin_lock(&inode->i_lock); inode->i_blocks += BLOCKS_PER_PAGE; spin_unlock(&inode->i_lock); - } else if (shmem_acct_block(info->flags)) { - spin_unlock(&info->lock); - error = -ENOSPC; - goto failed; - } + } else if (shmem_acct_block(info->flags)) + goto nospace; if (!filepage) { int ret; @@ -1493,6 +1492,24 @@ static int shmem_getpage(struct inode *inode, unsigned long idx, error = 0; goto out; +nospace: + /* + * Perhaps the page was brought in from swap between find_lock_page + * and taking info->lock? We allow for that at add_to_page_cache_lru, + * but must also avoid reporting a spurious ENOSPC while working on a + * full tmpfs. (When filepage has been passed in to shmem_getpage, it + * is already in page cache, which prevents this race from occurring.) + */ + if (!filepage) { + struct page *page = find_get_page(mapping, idx); + if (page) { + spin_unlock(&info->lock); + page_cache_release(page); + goto repeat; + } + } + spin_unlock(&info->lock); + error = -ENOSPC; failed: if (*pagep != filepage) { unlock_page(filepage); diff --git a/trunk/mm/swap.c b/trunk/mm/swap.c index a448db377cb0..5602f1a1b1e7 100644 --- a/trunk/mm/swap.c +++ b/trunk/mm/swap.c @@ -396,6 +396,9 @@ static void lru_deactivate_fn(struct page *page, void *arg) if (!PageLRU(page)) return; + if (PageUnevictable(page)) + return; + /* Some processes are using the page */ if (page_mapped(page)) return; diff --git a/trunk/net/8021q/vlan.c b/trunk/net/8021q/vlan.c index 7850412f52b7..0eb1a886b370 100644 --- a/trunk/net/8021q/vlan.c +++ b/trunk/net/8021q/vlan.c @@ -124,6 +124,9 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head) grp->nr_vlans--; + if (vlan->flags & VLAN_FLAG_GVRP) + vlan_gvrp_request_leave(dev); + vlan_group_set_device(grp, vlan_id, NULL); if (!grp->killall) synchronize_net(); diff --git a/trunk/net/8021q/vlan_dev.c b/trunk/net/8021q/vlan_dev.c index e34ea9e5e28b..b2ff6c8d3603 100644 --- a/trunk/net/8021q/vlan_dev.c +++ b/trunk/net/8021q/vlan_dev.c @@ -487,9 +487,6 @@ static int vlan_dev_stop(struct net_device *dev) struct vlan_dev_info *vlan = vlan_dev_info(dev); struct net_device *real_dev = vlan->real_dev; - if (vlan->flags & VLAN_FLAG_GVRP) - vlan_gvrp_request_leave(dev); - dev_mc_unsync(real_dev, dev); dev_uc_unsync(real_dev, dev); if (dev->flags & IFF_ALLMULTI) diff --git a/trunk/net/9p/client.c b/trunk/net/9p/client.c index 77367745be9b..a9aa2dd66482 100644 --- a/trunk/net/9p/client.c +++ b/trunk/net/9p/client.c @@ -614,7 +614,7 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...) err = c->trans_mod->request(c, req); if (err < 0) { - if (err != -ERESTARTSYS) + if (err != -ERESTARTSYS && err != -EFAULT) c->status = Disconnected; goto reterr; } diff --git a/trunk/net/9p/protocol.c b/trunk/net/9p/protocol.c index b58a501cf3d1..a873277cb996 100644 --- a/trunk/net/9p/protocol.c +++ b/trunk/net/9p/protocol.c @@ -674,6 +674,7 @@ int p9dirent_read(char *buf, int len, struct p9_dirent *dirent, } strcpy(dirent->d_name, nameptr); + kfree(nameptr); out: return fake_pdu.offset; diff --git a/trunk/net/9p/trans_common.c b/trunk/net/9p/trans_common.c index e883172f9aa2..9a70ebdec56e 100644 --- a/trunk/net/9p/trans_common.c +++ b/trunk/net/9p/trans_common.c @@ -63,7 +63,7 @@ p9_payload_gup(struct p9_req_t *req, size_t *pdata_off, int *pdata_len, int nr_pages, u8 rw) { uint32_t first_page_bytes = 0; - uint32_t pdata_mapped_pages; + int32_t pdata_mapped_pages; struct trans_rpage_info *rpinfo; *pdata_off = (__force size_t)req->tc->pubuf & (PAGE_SIZE-1); @@ -75,14 +75,9 @@ p9_payload_gup(struct p9_req_t *req, size_t *pdata_off, int *pdata_len, rpinfo = req->tc->private; pdata_mapped_pages = get_user_pages_fast((unsigned long)req->tc->pubuf, nr_pages, rw, &rpinfo->rp_data[0]); + if (pdata_mapped_pages <= 0) + return pdata_mapped_pages; - if (pdata_mapped_pages < 0) { - printk(KERN_ERR "get_user_pages_fast failed:%d udata:%p" - "nr_pages:%d\n", pdata_mapped_pages, - req->tc->pubuf, nr_pages); - pdata_mapped_pages = 0; - return -EIO; - } rpinfo->rp_nr_pages = pdata_mapped_pages; if (*pdata_off) { *pdata_len = first_page_bytes; diff --git a/trunk/net/bluetooth/sco.c b/trunk/net/bluetooth/sco.c index 94954c74f6ae..42fdffd1d76c 100644 --- a/trunk/net/bluetooth/sco.c +++ b/trunk/net/bluetooth/sco.c @@ -369,15 +369,6 @@ static void __sco_sock_close(struct sock *sk) case BT_CONNECTED: case BT_CONFIG: - if (sco_pi(sk)->conn) { - sk->sk_state = BT_DISCONN; - sco_sock_set_timer(sk, SCO_DISCONN_TIMEOUT); - hci_conn_put(sco_pi(sk)->conn->hcon); - sco_pi(sk)->conn = NULL; - } else - sco_chan_del(sk, ECONNRESET); - break; - case BT_CONNECT: case BT_DISCONN: sco_chan_del(sk, ECONNRESET); diff --git a/trunk/net/bridge/br_netfilter.c b/trunk/net/bridge/br_netfilter.c index f3bc322c5891..74ef4d4846a4 100644 --- a/trunk/net/bridge/br_netfilter.c +++ b/trunk/net/bridge/br_netfilter.c @@ -737,7 +737,7 @@ static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff *skb, nf_bridge->mask |= BRNF_PKT_TYPE; } - if (br_parse_ip_options(skb)) + if (pf == PF_INET && br_parse_ip_options(skb)) return NF_DROP; /* The physdev module checks on this */ diff --git a/trunk/net/bridge/netfilter/ebtables.c b/trunk/net/bridge/netfilter/ebtables.c index 893669caa8de..1a92b369c820 100644 --- a/trunk/net/bridge/netfilter/ebtables.c +++ b/trunk/net/bridge/netfilter/ebtables.c @@ -1766,7 +1766,7 @@ static int compat_table_info(const struct ebt_table_info *info, newinfo->entries_size = size; - xt_compat_init_offsets(AF_INET, info->nentries); + xt_compat_init_offsets(NFPROTO_BRIDGE, info->nentries); return EBT_ENTRY_ITERATE(entries, size, compat_calc_entry, info, entries, newinfo); } @@ -1882,7 +1882,7 @@ static int compat_mtw_from_user(struct compat_ebt_entry_mwt *mwt, struct xt_match *match; struct xt_target *wt; void *dst = NULL; - int off, pad = 0, ret = 0; + int off, pad = 0; unsigned int size_kern, entry_offset, match_size = mwt->match_size; strlcpy(name, mwt->u.name, sizeof(name)); @@ -1935,13 +1935,6 @@ static int compat_mtw_from_user(struct compat_ebt_entry_mwt *mwt, break; } - if (!dst) { - ret = xt_compat_add_offset(NFPROTO_BRIDGE, entry_offset, - off + ebt_compat_entry_padsize()); - if (ret < 0) - return ret; - } - state->buf_kern_offset += match_size + off; state->buf_user_offset += match_size; pad = XT_ALIGN(size_kern) - size_kern; @@ -2016,50 +2009,6 @@ static int ebt_size_mwt(struct compat_ebt_entry_mwt *match32, return growth; } -#define EBT_COMPAT_WATCHER_ITERATE(e, fn, args...) \ -({ \ - unsigned int __i; \ - int __ret = 0; \ - struct compat_ebt_entry_mwt *__watcher; \ - \ - for (__i = e->watchers_offset; \ - __i < (e)->target_offset; \ - __i += __watcher->watcher_size + \ - sizeof(struct compat_ebt_entry_mwt)) { \ - __watcher = (void *)(e) + __i; \ - __ret = fn(__watcher , ## args); \ - if (__ret != 0) \ - break; \ - } \ - if (__ret == 0) { \ - if (__i != (e)->target_offset) \ - __ret = -EINVAL; \ - } \ - __ret; \ -}) - -#define EBT_COMPAT_MATCH_ITERATE(e, fn, args...) \ -({ \ - unsigned int __i; \ - int __ret = 0; \ - struct compat_ebt_entry_mwt *__match; \ - \ - for (__i = sizeof(struct ebt_entry); \ - __i < (e)->watchers_offset; \ - __i += __match->match_size + \ - sizeof(struct compat_ebt_entry_mwt)) { \ - __match = (void *)(e) + __i; \ - __ret = fn(__match , ## args); \ - if (__ret != 0) \ - break; \ - } \ - if (__ret == 0) { \ - if (__i != (e)->watchers_offset) \ - __ret = -EINVAL; \ - } \ - __ret; \ -}) - /* called for all ebt_entry structures. */ static int size_entry_mwt(struct ebt_entry *entry, const unsigned char *base, unsigned int *total, @@ -2132,6 +2081,14 @@ static int size_entry_mwt(struct ebt_entry *entry, const unsigned char *base, } } + if (state->buf_kern_start == NULL) { + unsigned int offset = buf_start - (char *) base; + + ret = xt_compat_add_offset(NFPROTO_BRIDGE, offset, new_offset); + if (ret < 0) + return ret; + } + startoff = state->buf_user_offset - startoff; BUG_ON(*total < startoff); @@ -2240,6 +2197,7 @@ static int compat_do_replace(struct net *net, void __user *user, xt_compat_lock(NFPROTO_BRIDGE); + xt_compat_init_offsets(NFPROTO_BRIDGE, tmp.nentries); ret = compat_copy_entries(entries_tmp, tmp.entries_size, &state); if (ret < 0) goto out_unlock; diff --git a/trunk/net/core/dev.c b/trunk/net/core/dev.c index 856b6ee9a1d5..92009440d28b 100644 --- a/trunk/net/core/dev.c +++ b/trunk/net/core/dev.c @@ -1284,11 +1284,13 @@ static int dev_close_many(struct list_head *head) */ int dev_close(struct net_device *dev) { - LIST_HEAD(single); + if (dev->flags & IFF_UP) { + LIST_HEAD(single); - list_add(&dev->unreg_list, &single); - dev_close_many(&single); - list_del(&single); + list_add(&dev->unreg_list, &single); + dev_close_many(&single); + list_del(&single); + } return 0; } EXPORT_SYMBOL(dev_close); diff --git a/trunk/net/dccp/options.c b/trunk/net/dccp/options.c index f06ffcfc8d71..4b2ab657ac8e 100644 --- a/trunk/net/dccp/options.c +++ b/trunk/net/dccp/options.c @@ -123,6 +123,8 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq, case DCCPO_CHANGE_L ... DCCPO_CONFIRM_R: if (pkt_type == DCCP_PKT_DATA) /* RFC 4340, 6 */ break; + if (len == 0) + goto out_invalid_option; rc = dccp_feat_parse_options(sk, dreq, mandatory, opt, *value, value + 1, len - 1); if (rc) diff --git a/trunk/net/ipv4/ip_fragment.c b/trunk/net/ipv4/ip_fragment.c index a1151b8adf3c..b1d282f11be7 100644 --- a/trunk/net/ipv4/ip_fragment.c +++ b/trunk/net/ipv4/ip_fragment.c @@ -223,31 +223,30 @@ static void ip_expire(unsigned long arg) if ((qp->q.last_in & INET_FRAG_FIRST_IN) && qp->q.fragments != NULL) { struct sk_buff *head = qp->q.fragments; + const struct iphdr *iph; + int err; rcu_read_lock(); head->dev = dev_get_by_index_rcu(net, qp->iif); if (!head->dev) goto out_rcu_unlock; + /* skb dst is stale, drop it, and perform route lookup again */ + skb_dst_drop(head); + iph = ip_hdr(head); + err = ip_route_input_noref(head, iph->daddr, iph->saddr, + iph->tos, head->dev); + if (err) + goto out_rcu_unlock; + /* - * Only search router table for the head fragment, - * when defraging timeout at PRE_ROUTING HOOK. + * Only an end host needs to send an ICMP + * "Fragment Reassembly Timeout" message, per RFC792. */ - if (qp->user == IP_DEFRAG_CONNTRACK_IN && !skb_dst(head)) { - const struct iphdr *iph = ip_hdr(head); - int err = ip_route_input(head, iph->daddr, iph->saddr, - iph->tos, head->dev); - if (unlikely(err)) - goto out_rcu_unlock; - - /* - * Only an end host needs to send an ICMP - * "Fragment Reassembly Timeout" message, per RFC792. - */ - if (skb_rtable(head)->rt_type != RTN_LOCAL) - goto out_rcu_unlock; + if (qp->user == IP_DEFRAG_CONNTRACK_IN && + skb_rtable(head)->rt_type != RTN_LOCAL) + goto out_rcu_unlock; - } /* Send an ICMP "Fragment Reassembly Timeout" message. */ icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0); diff --git a/trunk/net/ipv4/tcp_cubic.c b/trunk/net/ipv4/tcp_cubic.c index 34340c9c95fa..f376b05cca81 100644 --- a/trunk/net/ipv4/tcp_cubic.c +++ b/trunk/net/ipv4/tcp_cubic.c @@ -93,6 +93,7 @@ struct bictcp { u32 ack_cnt; /* number of acks */ u32 tcp_cwnd; /* estimated tcp cwnd */ #define ACK_RATIO_SHIFT 4 +#define ACK_RATIO_LIMIT (32u << ACK_RATIO_SHIFT) u16 delayed_ack; /* estimate the ratio of Packets/ACKs << 4 */ u8 sample_cnt; /* number of samples to decide curr_rtt */ u8 found; /* the exit point is found? */ @@ -398,8 +399,12 @@ static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt_us) u32 delay; if (icsk->icsk_ca_state == TCP_CA_Open) { - cnt -= ca->delayed_ack >> ACK_RATIO_SHIFT; - ca->delayed_ack += cnt; + u32 ratio = ca->delayed_ack; + + ratio -= ca->delayed_ack >> ACK_RATIO_SHIFT; + ratio += cnt; + + ca->delayed_ack = min(ratio, ACK_RATIO_LIMIT); } /* Some calls are for duplicates without timetamps */ diff --git a/trunk/net/ipv4/xfrm4_output.c b/trunk/net/ipv4/xfrm4_output.c index 571aa96a175c..2d51840e53a1 100644 --- a/trunk/net/ipv4/xfrm4_output.c +++ b/trunk/net/ipv4/xfrm4_output.c @@ -69,7 +69,7 @@ int xfrm4_prepare_output(struct xfrm_state *x, struct sk_buff *skb) } EXPORT_SYMBOL(xfrm4_prepare_output); -static int xfrm4_output_finish(struct sk_buff *skb) +int xfrm4_output_finish(struct sk_buff *skb) { #ifdef CONFIG_NETFILTER if (!skb_dst(skb)->xfrm) { @@ -86,7 +86,11 @@ static int xfrm4_output_finish(struct sk_buff *skb) int xfrm4_output(struct sk_buff *skb) { + struct dst_entry *dst = skb_dst(skb); + struct xfrm_state *x = dst->xfrm; + return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, skb, - NULL, skb_dst(skb)->dev, xfrm4_output_finish, + NULL, dst->dev, + x->outer_mode->afinfo->output_finish, !(IPCB(skb)->flags & IPSKB_REROUTED)); } diff --git a/trunk/net/ipv4/xfrm4_state.c b/trunk/net/ipv4/xfrm4_state.c index 1717c64628d1..805d63ef4340 100644 --- a/trunk/net/ipv4/xfrm4_state.c +++ b/trunk/net/ipv4/xfrm4_state.c @@ -78,6 +78,7 @@ static struct xfrm_state_afinfo xfrm4_state_afinfo = { .init_tempsel = __xfrm4_init_tempsel, .init_temprop = xfrm4_init_temprop, .output = xfrm4_output, + .output_finish = xfrm4_output_finish, .extract_input = xfrm4_extract_input, .extract_output = xfrm4_extract_output, .transport_finish = xfrm4_transport_finish, diff --git a/trunk/net/ipv6/netfilter/ip6t_REJECT.c b/trunk/net/ipv6/netfilter/ip6t_REJECT.c index 28e74488a329..a5a4c5dd5396 100644 --- a/trunk/net/ipv6/netfilter/ip6t_REJECT.c +++ b/trunk/net/ipv6/netfilter/ip6t_REJECT.c @@ -45,6 +45,8 @@ static void send_reset(struct net *net, struct sk_buff *oldskb) int tcphoff, needs_ack; const struct ipv6hdr *oip6h = ipv6_hdr(oldskb); struct ipv6hdr *ip6h; +#define DEFAULT_TOS_VALUE 0x0U + const __u8 tclass = DEFAULT_TOS_VALUE; struct dst_entry *dst = NULL; u8 proto; struct flowi6 fl6; @@ -124,7 +126,7 @@ static void send_reset(struct net *net, struct sk_buff *oldskb) skb_put(nskb, sizeof(struct ipv6hdr)); skb_reset_network_header(nskb); ip6h = ipv6_hdr(nskb); - ip6h->version = 6; + *(__be32 *)ip6h = htonl(0x60000000 | (tclass << 20)); ip6h->hop_limit = ip6_dst_hoplimit(dst); ip6h->nexthdr = IPPROTO_TCP; ipv6_addr_copy(&ip6h->saddr, &oip6h->daddr); diff --git a/trunk/net/ipv6/xfrm6_output.c b/trunk/net/ipv6/xfrm6_output.c index 8e688b3de9ab..49a91c5f5623 100644 --- a/trunk/net/ipv6/xfrm6_output.c +++ b/trunk/net/ipv6/xfrm6_output.c @@ -79,7 +79,7 @@ int xfrm6_prepare_output(struct xfrm_state *x, struct sk_buff *skb) } EXPORT_SYMBOL(xfrm6_prepare_output); -static int xfrm6_output_finish(struct sk_buff *skb) +int xfrm6_output_finish(struct sk_buff *skb) { #ifdef CONFIG_NETFILTER IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED; @@ -97,9 +97,9 @@ static int __xfrm6_output(struct sk_buff *skb) if ((x && x->props.mode == XFRM_MODE_TUNNEL) && ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) || dst_allfrag(skb_dst(skb)))) { - return ip6_fragment(skb, xfrm6_output_finish); + return ip6_fragment(skb, x->outer_mode->afinfo->output_finish); } - return xfrm6_output_finish(skb); + return x->outer_mode->afinfo->output_finish(skb); } int xfrm6_output(struct sk_buff *skb) diff --git a/trunk/net/ipv6/xfrm6_state.c b/trunk/net/ipv6/xfrm6_state.c index afe941e9415c..248f0b2a7ee9 100644 --- a/trunk/net/ipv6/xfrm6_state.c +++ b/trunk/net/ipv6/xfrm6_state.c @@ -178,6 +178,7 @@ static struct xfrm_state_afinfo xfrm6_state_afinfo = { .tmpl_sort = __xfrm6_tmpl_sort, .state_sort = __xfrm6_state_sort, .output = xfrm6_output, + .output_finish = xfrm6_output_finish, .extract_input = xfrm6_extract_input, .extract_output = xfrm6_extract_output, .transport_finish = xfrm6_transport_finish, diff --git a/trunk/net/mac80211/tx.c b/trunk/net/mac80211/tx.c index ce4596ed1268..bd1224fd216a 100644 --- a/trunk/net/mac80211/tx.c +++ b/trunk/net/mac80211/tx.c @@ -237,6 +237,10 @@ ieee80211_tx_h_dynamic_ps(struct ieee80211_tx_data *tx) &local->dynamic_ps_disable_work); } + /* Don't restart the timer if we're not disassociated */ + if (!ifmgd->associated) + return TX_CONTINUE; + mod_timer(&local->dynamic_ps_timer, jiffies + msecs_to_jiffies(local->hw.conf.dynamic_ps_timeout)); diff --git a/trunk/net/netfilter/ipvs/ip_vs_app.c b/trunk/net/netfilter/ipvs/ip_vs_app.c index 2dc6de13ac18..51f3af7c4743 100644 --- a/trunk/net/netfilter/ipvs/ip_vs_app.c +++ b/trunk/net/netfilter/ipvs/ip_vs_app.c @@ -576,7 +576,7 @@ static const struct file_operations ip_vs_app_fops = { }; #endif -static int __net_init __ip_vs_app_init(struct net *net) +int __net_init __ip_vs_app_init(struct net *net) { struct netns_ipvs *ipvs = net_ipvs(net); @@ -585,26 +585,17 @@ static int __net_init __ip_vs_app_init(struct net *net) return 0; } -static void __net_exit __ip_vs_app_cleanup(struct net *net) +void __net_exit __ip_vs_app_cleanup(struct net *net) { proc_net_remove(net, "ip_vs_app"); } -static struct pernet_operations ip_vs_app_ops = { - .init = __ip_vs_app_init, - .exit = __ip_vs_app_cleanup, -}; - int __init ip_vs_app_init(void) { - int rv; - - rv = register_pernet_subsys(&ip_vs_app_ops); - return rv; + return 0; } void ip_vs_app_cleanup(void) { - unregister_pernet_subsys(&ip_vs_app_ops); } diff --git a/trunk/net/netfilter/ipvs/ip_vs_conn.c b/trunk/net/netfilter/ipvs/ip_vs_conn.c index c97bd45975be..d3fd91bbba49 100644 --- a/trunk/net/netfilter/ipvs/ip_vs_conn.c +++ b/trunk/net/netfilter/ipvs/ip_vs_conn.c @@ -1258,22 +1258,17 @@ int __net_init __ip_vs_conn_init(struct net *net) return 0; } -static void __net_exit __ip_vs_conn_cleanup(struct net *net) +void __net_exit __ip_vs_conn_cleanup(struct net *net) { /* flush all the connection entries first */ ip_vs_conn_flush(net); proc_net_remove(net, "ip_vs_conn"); proc_net_remove(net, "ip_vs_conn_sync"); } -static struct pernet_operations ipvs_conn_ops = { - .init = __ip_vs_conn_init, - .exit = __ip_vs_conn_cleanup, -}; int __init ip_vs_conn_init(void) { int idx; - int retc; /* Compute size and mask */ ip_vs_conn_tab_size = 1 << ip_vs_conn_tab_bits; @@ -1309,17 +1304,14 @@ int __init ip_vs_conn_init(void) rwlock_init(&__ip_vs_conntbl_lock_array[idx].l); } - retc = register_pernet_subsys(&ipvs_conn_ops); - /* calculate the random value for connection hash */ get_random_bytes(&ip_vs_conn_rnd, sizeof(ip_vs_conn_rnd)); - return retc; + return 0; } void ip_vs_conn_cleanup(void) { - unregister_pernet_subsys(&ipvs_conn_ops); /* Release the empty cache */ kmem_cache_destroy(ip_vs_conn_cachep); vfree(ip_vs_conn_tab); diff --git a/trunk/net/netfilter/ipvs/ip_vs_core.c b/trunk/net/netfilter/ipvs/ip_vs_core.c index 07accf6b2401..a74dae6c5dbc 100644 --- a/trunk/net/netfilter/ipvs/ip_vs_core.c +++ b/trunk/net/netfilter/ipvs/ip_vs_core.c @@ -1113,6 +1113,9 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) return NF_ACCEPT; net = skb_net(skb); + if (!net_ipvs(net)->enable) + return NF_ACCEPT; + ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) { @@ -1343,6 +1346,7 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) return NF_ACCEPT; /* The packet looks wrong, ignore */ net = skb_net(skb); + pd = ip_vs_proto_data_get(net, cih->protocol); if (!pd) return NF_ACCEPT; @@ -1529,6 +1533,11 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) IP_VS_DBG_ADDR(af, &iph.daddr), hooknum); return NF_ACCEPT; } + /* ipvs enabled in this netns ? */ + net = skb_net(skb); + if (!net_ipvs(net)->enable) + return NF_ACCEPT; + ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); /* Bad... Do not break raw sockets */ @@ -1562,7 +1571,6 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); } - net = skb_net(skb); /* Protocol supported? */ pd = ip_vs_proto_data_get(net, iph.protocol); if (unlikely(!pd)) @@ -1588,7 +1596,6 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) } IP_VS_DBG_PKT(11, af, pp, skb, 0, "Incoming packet"); - net = skb_net(skb); ipvs = net_ipvs(net); /* Check the server status */ if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) { @@ -1743,10 +1750,16 @@ ip_vs_forward_icmp(unsigned int hooknum, struct sk_buff *skb, int (*okfn)(struct sk_buff *)) { int r; + struct net *net; if (ip_hdr(skb)->protocol != IPPROTO_ICMP) return NF_ACCEPT; + /* ipvs enabled in this netns ? */ + net = skb_net(skb); + if (!net_ipvs(net)->enable) + return NF_ACCEPT; + return ip_vs_in_icmp(skb, &r, hooknum); } @@ -1757,10 +1770,16 @@ ip_vs_forward_icmp_v6(unsigned int hooknum, struct sk_buff *skb, int (*okfn)(struct sk_buff *)) { int r; + struct net *net; if (ipv6_hdr(skb)->nexthdr != IPPROTO_ICMPV6) return NF_ACCEPT; + /* ipvs enabled in this netns ? */ + net = skb_net(skb); + if (!net_ipvs(net)->enable) + return NF_ACCEPT; + return ip_vs_in_icmp_v6(skb, &r, hooknum); } #endif @@ -1884,19 +1903,70 @@ static int __net_init __ip_vs_init(struct net *net) pr_err("%s(): no memory.\n", __func__); return -ENOMEM; } + /* Hold the beast until a service is registerd */ + ipvs->enable = 0; ipvs->net = net; /* Counters used for creating unique names */ ipvs->gen = atomic_read(&ipvs_netns_cnt); atomic_inc(&ipvs_netns_cnt); net->ipvs = ipvs; + + if (__ip_vs_estimator_init(net) < 0) + goto estimator_fail; + + if (__ip_vs_control_init(net) < 0) + goto control_fail; + + if (__ip_vs_protocol_init(net) < 0) + goto protocol_fail; + + if (__ip_vs_app_init(net) < 0) + goto app_fail; + + if (__ip_vs_conn_init(net) < 0) + goto conn_fail; + + if (__ip_vs_sync_init(net) < 0) + goto sync_fail; + printk(KERN_INFO "IPVS: Creating netns size=%zu id=%d\n", sizeof(struct netns_ipvs), ipvs->gen); return 0; +/* + * Error handling + */ + +sync_fail: + __ip_vs_conn_cleanup(net); +conn_fail: + __ip_vs_app_cleanup(net); +app_fail: + __ip_vs_protocol_cleanup(net); +protocol_fail: + __ip_vs_control_cleanup(net); +control_fail: + __ip_vs_estimator_cleanup(net); +estimator_fail: + return -ENOMEM; } static void __net_exit __ip_vs_cleanup(struct net *net) { - IP_VS_DBG(10, "ipvs netns %d released\n", net_ipvs(net)->gen); + __ip_vs_service_cleanup(net); /* ip_vs_flush() with locks */ + __ip_vs_conn_cleanup(net); + __ip_vs_app_cleanup(net); + __ip_vs_protocol_cleanup(net); + __ip_vs_control_cleanup(net); + __ip_vs_estimator_cleanup(net); + IP_VS_DBG(2, "ipvs netns %d released\n", net_ipvs(net)->gen); +} + +static void __net_exit __ip_vs_dev_cleanup(struct net *net) +{ + EnterFunction(2); + net_ipvs(net)->enable = 0; /* Disable packet reception */ + __ip_vs_sync_cleanup(net); + LeaveFunction(2); } static struct pernet_operations ipvs_core_ops = { @@ -1906,6 +1976,10 @@ static struct pernet_operations ipvs_core_ops = { .size = sizeof(struct netns_ipvs), }; +static struct pernet_operations ipvs_core_dev_ops = { + .exit = __ip_vs_dev_cleanup, +}; + /* * Initialize IP Virtual Server */ @@ -1913,10 +1987,6 @@ static int __init ip_vs_init(void) { int ret; - ret = register_pernet_subsys(&ipvs_core_ops); /* Alloc ip_vs struct */ - if (ret < 0) - return ret; - ip_vs_estimator_init(); ret = ip_vs_control_init(); if (ret < 0) { @@ -1944,15 +2014,28 @@ static int __init ip_vs_init(void) goto cleanup_conn; } + ret = register_pernet_subsys(&ipvs_core_ops); /* Alloc ip_vs struct */ + if (ret < 0) + goto cleanup_sync; + + ret = register_pernet_device(&ipvs_core_dev_ops); + if (ret < 0) + goto cleanup_sub; + ret = nf_register_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops)); if (ret < 0) { pr_err("can't register hooks.\n"); - goto cleanup_sync; + goto cleanup_dev; } pr_info("ipvs loaded.\n"); + return ret; +cleanup_dev: + unregister_pernet_device(&ipvs_core_dev_ops); +cleanup_sub: + unregister_pernet_subsys(&ipvs_core_ops); cleanup_sync: ip_vs_sync_cleanup(); cleanup_conn: @@ -1964,20 +2047,20 @@ static int __init ip_vs_init(void) ip_vs_control_cleanup(); cleanup_estimator: ip_vs_estimator_cleanup(); - unregister_pernet_subsys(&ipvs_core_ops); /* free ip_vs struct */ return ret; } static void __exit ip_vs_cleanup(void) { nf_unregister_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops)); + unregister_pernet_device(&ipvs_core_dev_ops); + unregister_pernet_subsys(&ipvs_core_ops); /* free ip_vs struct */ ip_vs_sync_cleanup(); ip_vs_conn_cleanup(); ip_vs_app_cleanup(); ip_vs_protocol_cleanup(); ip_vs_control_cleanup(); ip_vs_estimator_cleanup(); - unregister_pernet_subsys(&ipvs_core_ops); /* free ip_vs struct */ pr_info("ipvs unloaded.\n"); } diff --git a/trunk/net/netfilter/ipvs/ip_vs_ctl.c b/trunk/net/netfilter/ipvs/ip_vs_ctl.c index ae47090bf45f..ea722810faf3 100644 --- a/trunk/net/netfilter/ipvs/ip_vs_ctl.c +++ b/trunk/net/netfilter/ipvs/ip_vs_ctl.c @@ -69,6 +69,11 @@ int ip_vs_get_debug_level(void) } #endif + +/* Protos */ +static void __ip_vs_del_service(struct ip_vs_service *svc); + + #ifdef CONFIG_IP_VS_IPV6 /* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */ static int __ip_vs_addr_is_local_v6(struct net *net, @@ -1214,6 +1219,8 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u, write_unlock_bh(&__ip_vs_svc_lock); *svc_p = svc; + /* Now there is a service - full throttle */ + ipvs->enable = 1; return 0; @@ -1472,6 +1479,84 @@ static int ip_vs_flush(struct net *net) return 0; } +/* + * Delete service by {netns} in the service table. + * Called by __ip_vs_cleanup() + */ +void __ip_vs_service_cleanup(struct net *net) +{ + EnterFunction(2); + /* Check for "full" addressed entries */ + mutex_lock(&__ip_vs_mutex); + ip_vs_flush(net); + mutex_unlock(&__ip_vs_mutex); + LeaveFunction(2); +} +/* + * Release dst hold by dst_cache + */ +static inline void +__ip_vs_dev_reset(struct ip_vs_dest *dest, struct net_device *dev) +{ + spin_lock_bh(&dest->dst_lock); + if (dest->dst_cache && dest->dst_cache->dev == dev) { + IP_VS_DBG_BUF(3, "Reset dev:%s dest %s:%u ,dest->refcnt=%d\n", + dev->name, + IP_VS_DBG_ADDR(dest->af, &dest->addr), + ntohs(dest->port), + atomic_read(&dest->refcnt)); + ip_vs_dst_reset(dest); + } + spin_unlock_bh(&dest->dst_lock); + +} +/* + * Netdev event receiver + * Currently only NETDEV_UNREGISTER is handled, i.e. if we hold a reference to + * a device that is "unregister" it must be released. + */ +static int ip_vs_dst_event(struct notifier_block *this, unsigned long event, + void *ptr) +{ + struct net_device *dev = ptr; + struct net *net = dev_net(dev); + struct ip_vs_service *svc; + struct ip_vs_dest *dest; + unsigned int idx; + + if (event != NETDEV_UNREGISTER) + return NOTIFY_DONE; + IP_VS_DBG(3, "%s() dev=%s\n", __func__, dev->name); + EnterFunction(2); + mutex_lock(&__ip_vs_mutex); + for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { + list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { + if (net_eq(svc->net, net)) { + list_for_each_entry(dest, &svc->destinations, + n_list) { + __ip_vs_dev_reset(dest, dev); + } + } + } + + list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { + if (net_eq(svc->net, net)) { + list_for_each_entry(dest, &svc->destinations, + n_list) { + __ip_vs_dev_reset(dest, dev); + } + } + + } + } + + list_for_each_entry(dest, &net_ipvs(net)->dest_trash, n_list) { + __ip_vs_dev_reset(dest, dev); + } + mutex_unlock(&__ip_vs_mutex); + LeaveFunction(2); + return NOTIFY_DONE; +} /* * Zero counters in a service or all services @@ -3588,6 +3673,10 @@ void __net_init __ip_vs_control_cleanup_sysctl(struct net *net) { } #endif +static struct notifier_block ip_vs_dst_notifier = { + .notifier_call = ip_vs_dst_event, +}; + int __net_init __ip_vs_control_init(struct net *net) { int idx; @@ -3626,7 +3715,7 @@ int __net_init __ip_vs_control_init(struct net *net) return -ENOMEM; } -static void __net_exit __ip_vs_control_cleanup(struct net *net) +void __net_exit __ip_vs_control_cleanup(struct net *net) { struct netns_ipvs *ipvs = net_ipvs(net); @@ -3639,11 +3728,6 @@ static void __net_exit __ip_vs_control_cleanup(struct net *net) free_percpu(ipvs->tot_stats.cpustats); } -static struct pernet_operations ipvs_control_ops = { - .init = __ip_vs_control_init, - .exit = __ip_vs_control_cleanup, -}; - int __init ip_vs_control_init(void) { int idx; @@ -3657,33 +3741,32 @@ int __init ip_vs_control_init(void) INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]); } - ret = register_pernet_subsys(&ipvs_control_ops); - if (ret) { - pr_err("cannot register namespace.\n"); - goto err; - } - smp_wmb(); /* Do we really need it now ? */ ret = nf_register_sockopt(&ip_vs_sockopts); if (ret) { pr_err("cannot register sockopt.\n"); - goto err_net; + goto err_sock; } ret = ip_vs_genl_register(); if (ret) { pr_err("cannot register Generic Netlink interface.\n"); - nf_unregister_sockopt(&ip_vs_sockopts); - goto err_net; + goto err_genl; } + ret = register_netdevice_notifier(&ip_vs_dst_notifier); + if (ret < 0) + goto err_notf; + LeaveFunction(2); return 0; -err_net: - unregister_pernet_subsys(&ipvs_control_ops); -err: +err_notf: + ip_vs_genl_unregister(); +err_genl: + nf_unregister_sockopt(&ip_vs_sockopts); +err_sock: return ret; } @@ -3691,7 +3774,6 @@ int __init ip_vs_control_init(void) void ip_vs_control_cleanup(void) { EnterFunction(2); - unregister_pernet_subsys(&ipvs_control_ops); ip_vs_genl_unregister(); nf_unregister_sockopt(&ip_vs_sockopts); LeaveFunction(2); diff --git a/trunk/net/netfilter/ipvs/ip_vs_est.c b/trunk/net/netfilter/ipvs/ip_vs_est.c index 8c8766ca56ad..508cce98777c 100644 --- a/trunk/net/netfilter/ipvs/ip_vs_est.c +++ b/trunk/net/netfilter/ipvs/ip_vs_est.c @@ -192,7 +192,7 @@ void ip_vs_read_estimator(struct ip_vs_stats_user *dst, dst->outbps = (e->outbps + 0xF) >> 5; } -static int __net_init __ip_vs_estimator_init(struct net *net) +int __net_init __ip_vs_estimator_init(struct net *net) { struct netns_ipvs *ipvs = net_ipvs(net); @@ -203,24 +203,16 @@ static int __net_init __ip_vs_estimator_init(struct net *net) return 0; } -static void __net_exit __ip_vs_estimator_exit(struct net *net) +void __net_exit __ip_vs_estimator_cleanup(struct net *net) { del_timer_sync(&net_ipvs(net)->est_timer); } -static struct pernet_operations ip_vs_app_ops = { - .init = __ip_vs_estimator_init, - .exit = __ip_vs_estimator_exit, -}; int __init ip_vs_estimator_init(void) { - int rv; - - rv = register_pernet_subsys(&ip_vs_app_ops); - return rv; + return 0; } void ip_vs_estimator_cleanup(void) { - unregister_pernet_subsys(&ip_vs_app_ops); } diff --git a/trunk/net/netfilter/ipvs/ip_vs_proto.c b/trunk/net/netfilter/ipvs/ip_vs_proto.c index 17484a4416ef..eb86028536fc 100644 --- a/trunk/net/netfilter/ipvs/ip_vs_proto.c +++ b/trunk/net/netfilter/ipvs/ip_vs_proto.c @@ -316,7 +316,7 @@ ip_vs_tcpudp_debug_packet(int af, struct ip_vs_protocol *pp, /* * per network name-space init */ -static int __net_init __ip_vs_protocol_init(struct net *net) +int __net_init __ip_vs_protocol_init(struct net *net) { #ifdef CONFIG_IP_VS_PROTO_TCP register_ip_vs_proto_netns(net, &ip_vs_protocol_tcp); @@ -336,7 +336,7 @@ static int __net_init __ip_vs_protocol_init(struct net *net) return 0; } -static void __net_exit __ip_vs_protocol_cleanup(struct net *net) +void __net_exit __ip_vs_protocol_cleanup(struct net *net) { struct netns_ipvs *ipvs = net_ipvs(net); struct ip_vs_proto_data *pd; @@ -349,11 +349,6 @@ static void __net_exit __ip_vs_protocol_cleanup(struct net *net) } } -static struct pernet_operations ipvs_proto_ops = { - .init = __ip_vs_protocol_init, - .exit = __ip_vs_protocol_cleanup, -}; - int __init ip_vs_protocol_init(void) { char protocols[64]; @@ -382,7 +377,6 @@ int __init ip_vs_protocol_init(void) REGISTER_PROTOCOL(&ip_vs_protocol_esp); #endif pr_info("Registered protocols (%s)\n", &protocols[2]); - return register_pernet_subsys(&ipvs_proto_ops); return 0; } @@ -393,7 +387,6 @@ void ip_vs_protocol_cleanup(void) struct ip_vs_protocol *pp; int i; - unregister_pernet_subsys(&ipvs_proto_ops); /* unregister all the ipvs protocols */ for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) { while ((pp = ip_vs_proto_table[i]) != NULL) diff --git a/trunk/net/netfilter/ipvs/ip_vs_sync.c b/trunk/net/netfilter/ipvs/ip_vs_sync.c index 3e7961e85e9c..e292e5bddc70 100644 --- a/trunk/net/netfilter/ipvs/ip_vs_sync.c +++ b/trunk/net/netfilter/ipvs/ip_vs_sync.c @@ -1303,13 +1303,18 @@ static struct socket *make_send_sock(struct net *net) struct socket *sock; int result; - /* First create a socket */ - result = __sock_create(net, PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock, 1); + /* First create a socket move it to right name space later */ + result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock); if (result < 0) { pr_err("Error during creation of socket; terminating\n"); return ERR_PTR(result); } - + /* + * Kernel sockets that are a part of a namespace, should not + * hold a reference to a namespace in order to allow to stop it. + * After sk_change_net should be released using sk_release_kernel. + */ + sk_change_net(sock->sk, net); result = set_mcast_if(sock->sk, ipvs->master_mcast_ifn); if (result < 0) { pr_err("Error setting outbound mcast interface\n"); @@ -1334,8 +1339,8 @@ static struct socket *make_send_sock(struct net *net) return sock; - error: - sock_release(sock); +error: + sk_release_kernel(sock->sk); return ERR_PTR(result); } @@ -1350,12 +1355,17 @@ static struct socket *make_receive_sock(struct net *net) int result; /* First create a socket */ - result = __sock_create(net, PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock, 1); + result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock); if (result < 0) { pr_err("Error during creation of socket; terminating\n"); return ERR_PTR(result); } - + /* + * Kernel sockets that are a part of a namespace, should not + * hold a reference to a namespace in order to allow to stop it. + * After sk_change_net should be released using sk_release_kernel. + */ + sk_change_net(sock->sk, net); /* it is equivalent to the REUSEADDR option in user-space */ sock->sk->sk_reuse = 1; @@ -1377,8 +1387,8 @@ static struct socket *make_receive_sock(struct net *net) return sock; - error: - sock_release(sock); +error: + sk_release_kernel(sock->sk); return ERR_PTR(result); } @@ -1473,7 +1483,7 @@ static int sync_thread_master(void *data) ip_vs_sync_buff_release(sb); /* release the sending multicast socket */ - sock_release(tinfo->sock); + sk_release_kernel(tinfo->sock->sk); kfree(tinfo); return 0; @@ -1513,7 +1523,7 @@ static int sync_thread_backup(void *data) } /* release the sending multicast socket */ - sock_release(tinfo->sock); + sk_release_kernel(tinfo->sock->sk); kfree(tinfo->buf); kfree(tinfo); @@ -1601,7 +1611,7 @@ int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid) outbuf: kfree(buf); outsocket: - sock_release(sock); + sk_release_kernel(sock->sk); out: return result; } @@ -1610,6 +1620,7 @@ int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid) int stop_sync_thread(struct net *net, int state) { struct netns_ipvs *ipvs = net_ipvs(net); + int retc = -EINVAL; IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current)); @@ -1629,7 +1640,7 @@ int stop_sync_thread(struct net *net, int state) spin_lock_bh(&ipvs->sync_lock); ipvs->sync_state &= ~IP_VS_STATE_MASTER; spin_unlock_bh(&ipvs->sync_lock); - kthread_stop(ipvs->master_thread); + retc = kthread_stop(ipvs->master_thread); ipvs->master_thread = NULL; } else if (state == IP_VS_STATE_BACKUP) { if (!ipvs->backup_thread) @@ -1639,22 +1650,20 @@ int stop_sync_thread(struct net *net, int state) task_pid_nr(ipvs->backup_thread)); ipvs->sync_state &= ~IP_VS_STATE_BACKUP; - kthread_stop(ipvs->backup_thread); + retc = kthread_stop(ipvs->backup_thread); ipvs->backup_thread = NULL; - } else { - return -EINVAL; } /* decrease the module use count */ ip_vs_use_count_dec(); - return 0; + return retc; } /* * Initialize data struct for each netns */ -static int __net_init __ip_vs_sync_init(struct net *net) +int __net_init __ip_vs_sync_init(struct net *net) { struct netns_ipvs *ipvs = net_ipvs(net); @@ -1668,24 +1677,24 @@ static int __net_init __ip_vs_sync_init(struct net *net) return 0; } -static void __ip_vs_sync_cleanup(struct net *net) +void __ip_vs_sync_cleanup(struct net *net) { - stop_sync_thread(net, IP_VS_STATE_MASTER); - stop_sync_thread(net, IP_VS_STATE_BACKUP); -} + int retc; -static struct pernet_operations ipvs_sync_ops = { - .init = __ip_vs_sync_init, - .exit = __ip_vs_sync_cleanup, -}; + retc = stop_sync_thread(net, IP_VS_STATE_MASTER); + if (retc && retc != -ESRCH) + pr_err("Failed to stop Master Daemon\n"); + retc = stop_sync_thread(net, IP_VS_STATE_BACKUP); + if (retc && retc != -ESRCH) + pr_err("Failed to stop Backup Daemon\n"); +} int __init ip_vs_sync_init(void) { - return register_pernet_subsys(&ipvs_sync_ops); + return 0; } void ip_vs_sync_cleanup(void) { - unregister_pernet_subsys(&ipvs_sync_ops); } diff --git a/trunk/net/netfilter/nf_conntrack_netlink.c b/trunk/net/netfilter/nf_conntrack_netlink.c index 30bf8a167fc8..482e90c61850 100644 --- a/trunk/net/netfilter/nf_conntrack_netlink.c +++ b/trunk/net/netfilter/nf_conntrack_netlink.c @@ -1334,6 +1334,7 @@ ctnetlink_create_conntrack(struct net *net, u16 zone, struct nf_conn *ct; int err = -EINVAL; struct nf_conntrack_helper *helper; + struct nf_conn_tstamp *tstamp; ct = nf_conntrack_alloc(net, zone, otuple, rtuple, GFP_ATOMIC); if (IS_ERR(ct)) @@ -1451,6 +1452,9 @@ ctnetlink_create_conntrack(struct net *net, u16 zone, __set_bit(IPS_EXPECTED_BIT, &ct->status); ct->master = master_ct; } + tstamp = nf_conn_tstamp_find(ct); + if (tstamp) + tstamp->start = ktime_to_ns(ktime_get_real()); add_timer(&ct->timeout); nf_conntrack_hash_insert(ct); diff --git a/trunk/net/netfilter/x_tables.c b/trunk/net/netfilter/x_tables.c index a9adf4c6b299..8a025a585d2f 100644 --- a/trunk/net/netfilter/x_tables.c +++ b/trunk/net/netfilter/x_tables.c @@ -455,6 +455,7 @@ void xt_compat_flush_offsets(u_int8_t af) vfree(xt[af].compat_tab); xt[af].compat_tab = NULL; xt[af].number = 0; + xt[af].cur = 0; } } EXPORT_SYMBOL_GPL(xt_compat_flush_offsets); @@ -473,8 +474,7 @@ int xt_compat_calc_jump(u_int8_t af, unsigned int offset) else return mid ? tmp[mid - 1].delta : 0; } - WARN_ON_ONCE(1); - return 0; + return left ? tmp[left - 1].delta : 0; } EXPORT_SYMBOL_GPL(xt_compat_calc_jump); diff --git a/trunk/net/netfilter/xt_DSCP.c b/trunk/net/netfilter/xt_DSCP.c index 0a229191e55b..ae8271652efa 100644 --- a/trunk/net/netfilter/xt_DSCP.c +++ b/trunk/net/netfilter/xt_DSCP.c @@ -99,7 +99,7 @@ tos_tg6(struct sk_buff *skb, const struct xt_action_param *par) u_int8_t orig, nv; orig = ipv6_get_dsfield(iph); - nv = (orig & info->tos_mask) ^ info->tos_value; + nv = (orig & ~info->tos_mask) ^ info->tos_value; if (orig != nv) { if (!skb_make_writable(skb, sizeof(struct iphdr))) diff --git a/trunk/net/netfilter/xt_conntrack.c b/trunk/net/netfilter/xt_conntrack.c index 481a86fdc409..61805d7b38aa 100644 --- a/trunk/net/netfilter/xt_conntrack.c +++ b/trunk/net/netfilter/xt_conntrack.c @@ -272,11 +272,6 @@ static int conntrack_mt_check(const struct xt_mtchk_param *par) { int ret; - if (strcmp(par->table, "raw") == 0) { - pr_info("state is undetermined at the time of raw table\n"); - return -EINVAL; - } - ret = nf_ct_l3proto_try_module_get(par->family); if (ret < 0) pr_info("cannot load conntrack support for proto=%u\n", diff --git a/trunk/net/xfrm/xfrm_policy.c b/trunk/net/xfrm/xfrm_policy.c index 15792d8b6272..b4d745ea8ee1 100644 --- a/trunk/net/xfrm/xfrm_policy.c +++ b/trunk/net/xfrm/xfrm_policy.c @@ -1406,6 +1406,7 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, struct net *net = xp_net(policy); unsigned long now = jiffies; struct net_device *dev; + struct xfrm_mode *inner_mode; struct dst_entry *dst_prev = NULL; struct dst_entry *dst0 = NULL; int i = 0; @@ -1436,6 +1437,17 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, goto put_states; } + if (xfrm[i]->sel.family == AF_UNSPEC) { + inner_mode = xfrm_ip2inner_mode(xfrm[i], + xfrm_af2proto(family)); + if (!inner_mode) { + err = -EAFNOSUPPORT; + dst_release(dst); + goto put_states; + } + } else + inner_mode = xfrm[i]->inner_mode; + if (!dst_prev) dst0 = dst1; else { @@ -1464,7 +1476,7 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, dst1->lastuse = now; dst1->input = dst_discard; - dst1->output = xfrm[i]->outer_mode->afinfo->output; + dst1->output = inner_mode->afinfo->output; dst1->next = dst_prev; dst_prev = dst1; diff --git a/trunk/net/xfrm/xfrm_replay.c b/trunk/net/xfrm/xfrm_replay.c index e8a781422feb..47f1b8638df9 100644 --- a/trunk/net/xfrm/xfrm_replay.c +++ b/trunk/net/xfrm/xfrm_replay.c @@ -535,6 +535,9 @@ int xfrm_init_replay(struct xfrm_state *x) replay_esn->bmp_len * sizeof(__u32) * 8) return -EINVAL; + if ((x->props.flags & XFRM_STATE_ESN) && replay_esn->replay_window == 0) + return -EINVAL; + if ((x->props.flags & XFRM_STATE_ESN) && x->replay_esn) x->repl = &xfrm_replay_esn; else diff --git a/trunk/security/selinux/ss/policydb.c b/trunk/security/selinux/ss/policydb.c index e6e7ce0d3d55..7102457661d6 100644 --- a/trunk/security/selinux/ss/policydb.c +++ b/trunk/security/selinux/ss/policydb.c @@ -1819,8 +1819,6 @@ static int filename_trans_read(struct policydb *p, void *fp) goto out; nel = le32_to_cpu(buf[0]); - printk(KERN_ERR "%s: nel=%d\n", __func__, nel); - last = p->filename_trans; while (last && last->next) last = last->next; @@ -1857,8 +1855,6 @@ static int filename_trans_read(struct policydb *p, void *fp) goto out; name[len] = 0; - printk(KERN_ERR "%s: ft=%p ft->name=%p ft->name=%s\n", __func__, ft, ft->name, ft->name); - rc = next_entry(buf, fp, sizeof(u32) * 4); if (rc) goto out; diff --git a/trunk/sound/soc/codecs/ssm2602.c b/trunk/sound/soc/codecs/ssm2602.c index 2727befd158e..b04d28039c16 100644 --- a/trunk/sound/soc/codecs/ssm2602.c +++ b/trunk/sound/soc/codecs/ssm2602.c @@ -139,7 +139,7 @@ SOC_DOUBLE_R("Capture Volume", SSM2602_LINVOL, SSM2602_RINVOL, 0, 31, 0), SOC_DOUBLE_R("Capture Switch", SSM2602_LINVOL, SSM2602_RINVOL, 7, 1, 1), SOC_SINGLE("Mic Boost (+20dB)", SSM2602_APANA, 0, 1, 0), -SOC_SINGLE("Mic Boost2 (+20dB)", SSM2602_APANA, 7, 1, 0), +SOC_SINGLE("Mic Boost2 (+20dB)", SSM2602_APANA, 8, 1, 0), SOC_SINGLE("Mic Switch", SSM2602_APANA, 1, 1, 1), SOC_SINGLE("Sidetone Playback Volume", SSM2602_APANA, 6, 3, 1), @@ -602,7 +602,7 @@ static struct snd_soc_codec_driver soc_codec_dev_ssm2602 = { .read = ssm2602_read_reg_cache, .write = ssm2602_write, .set_bias_level = ssm2602_set_bias_level, - .reg_cache_size = sizeof(ssm2602_reg), + .reg_cache_size = ARRAY_SIZE(ssm2602_reg), .reg_word_size = sizeof(u16), .reg_cache_default = ssm2602_reg, }; @@ -614,7 +614,7 @@ static struct snd_soc_codec_driver soc_codec_dev_ssm2602 = { * low = 0x1a * high = 0x1b */ -static int ssm2602_i2c_probe(struct i2c_client *i2c, +static int __devinit ssm2602_i2c_probe(struct i2c_client *i2c, const struct i2c_device_id *id) { struct ssm2602_priv *ssm2602; @@ -635,7 +635,7 @@ static int ssm2602_i2c_probe(struct i2c_client *i2c, return ret; } -static int ssm2602_i2c_remove(struct i2c_client *client) +static int __devexit ssm2602_i2c_remove(struct i2c_client *client) { snd_soc_unregister_codec(&client->dev); kfree(i2c_get_clientdata(client)); @@ -655,7 +655,7 @@ static struct i2c_driver ssm2602_i2c_driver = { .owner = THIS_MODULE, }, .probe = ssm2602_i2c_probe, - .remove = ssm2602_i2c_remove, + .remove = __devexit_p(ssm2602_i2c_remove), .id_table = ssm2602_i2c_id, }; #endif diff --git a/trunk/sound/soc/codecs/uda134x.c b/trunk/sound/soc/codecs/uda134x.c index 48ffd406a71d..a7b8f301bad3 100644 --- a/trunk/sound/soc/codecs/uda134x.c +++ b/trunk/sound/soc/codecs/uda134x.c @@ -601,9 +601,7 @@ static struct snd_soc_codec_driver soc_codec_dev_uda134x = { .reg_cache_step = 1, .read = uda134x_read_reg_cache, .write = uda134x_write, -#ifdef POWER_OFF_ON_STANDBY .set_bias_level = uda134x_set_bias_level, -#endif }; static int __devinit uda134x_codec_probe(struct platform_device *pdev) diff --git a/trunk/sound/soc/codecs/wm8903.c b/trunk/sound/soc/codecs/wm8903.c index f52b623bb692..824d1c8c8a35 100644 --- a/trunk/sound/soc/codecs/wm8903.c +++ b/trunk/sound/soc/codecs/wm8903.c @@ -692,7 +692,7 @@ SOC_ENUM("DRC Smoothing Threshold", drc_smoothing), SOC_SINGLE_TLV("DRC Startup Volume", WM8903_DRC_0, 6, 18, 0, drc_tlv_startup), SOC_DOUBLE_R_TLV("Digital Capture Volume", WM8903_ADC_DIGITAL_VOLUME_LEFT, - WM8903_ADC_DIGITAL_VOLUME_RIGHT, 1, 96, 0, digital_tlv), + WM8903_ADC_DIGITAL_VOLUME_RIGHT, 1, 120, 0, digital_tlv), SOC_ENUM("ADC Companding Mode", adc_companding), SOC_SINGLE("ADC Companding Switch", WM8903_AUDIO_INTERFACE_0, 3, 1, 0), diff --git a/trunk/sound/soc/jz4740/jz4740-i2s.c b/trunk/sound/soc/jz4740/jz4740-i2s.c index 419bf4f5534a..cd22a54b2f14 100644 --- a/trunk/sound/soc/jz4740/jz4740-i2s.c +++ b/trunk/sound/soc/jz4740/jz4740-i2s.c @@ -133,7 +133,7 @@ static void jz4740_i2s_shutdown(struct snd_pcm_substream *substream, struct jz4740_i2s *i2s = snd_soc_dai_get_drvdata(dai); uint32_t conf; - if (!dai->active) + if (dai->active) return; conf = jz4740_i2s_read(i2s, JZ_REG_AIC_CONF); diff --git a/trunk/sound/soc/mid-x86/sst_platform.c b/trunk/sound/soc/mid-x86/sst_platform.c index d567c322a2fb..6b1f9d3bf34e 100644 --- a/trunk/sound/soc/mid-x86/sst_platform.c +++ b/trunk/sound/soc/mid-x86/sst_platform.c @@ -376,6 +376,11 @@ static int sst_platform_pcm_hw_params(struct snd_pcm_substream *substream, return 0; } +static int sst_platform_pcm_hw_free(struct snd_pcm_substream *substream) +{ + return snd_pcm_lib_free_pages(substream); +} + static struct snd_pcm_ops sst_platform_ops = { .open = sst_platform_open, .close = sst_platform_close, @@ -384,6 +389,7 @@ static struct snd_pcm_ops sst_platform_ops = { .trigger = sst_platform_pcm_trigger, .pointer = sst_platform_pcm_pointer, .hw_params = sst_platform_pcm_hw_params, + .hw_free = sst_platform_pcm_hw_free, }; static void sst_pcm_free(struct snd_pcm *pcm) diff --git a/trunk/sound/soc/soc-core.c b/trunk/sound/soc/soc-core.c index d8562ce4de7a..dd55d1069468 100644 --- a/trunk/sound/soc/soc-core.c +++ b/trunk/sound/soc/soc-core.c @@ -3291,6 +3291,8 @@ int snd_soc_register_card(struct snd_soc_card *card) if (!card->name || !card->dev) return -EINVAL; + dev_set_drvdata(card->dev, card); + snd_soc_initialize_card_lists(card); soc_init_card_debugfs(card); diff --git a/trunk/tools/perf/Documentation/perf-script-perl.txt b/trunk/tools/perf/Documentation/perf-script-perl.txt index 3152cca15501..5bb41e55a3ac 100644 --- a/trunk/tools/perf/Documentation/perf-script-perl.txt +++ b/trunk/tools/perf/Documentation/perf-script-perl.txt @@ -63,6 +63,7 @@ The format file for the sched_wakep event defines the following fields field:unsigned char common_flags; field:unsigned char common_preempt_count; field:int common_pid; + field:int common_lock_depth; field:char comm[TASK_COMM_LEN]; field:pid_t pid; diff --git a/trunk/tools/perf/Documentation/perf-script-python.txt b/trunk/tools/perf/Documentation/perf-script-python.txt index 471022069119..36b38277422c 100644 --- a/trunk/tools/perf/Documentation/perf-script-python.txt +++ b/trunk/tools/perf/Documentation/perf-script-python.txt @@ -463,6 +463,7 @@ The format file for the sched_wakep event defines the following fields field:unsigned char common_flags; field:unsigned char common_preempt_count; field:int common_pid; + field:int common_lock_depth; field:char comm[TASK_COMM_LEN]; field:pid_t pid;